Update Linux to v5.4.2 Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd

commit: 0f672f6c0b52b7b0700b0915c72b540721af4465 [log] [tgz]
author: David Brazdil <dbrazdil@google.com> Tue Dec 10 10:32:29 2019 +0000
committer: David Brazdil <dbrazdil@google.com> Tue Dec 10 19:03:18 2019 +0000
tree: 85c8cba019caa205e4f8920d72d93f6d6deaf29c
parent: 3a0ad55d848b50499b68d7141d4eca997fce28ef [diff]
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 3589764..078615c 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config VIRTIO
 	tristate
 	---help---
@@ -42,6 +43,17 @@
 
 	  If unsure, say Y.
 
+config VIRTIO_PMEM
+	tristate "Support for virtio pmem driver"
+	depends on VIRTIO
+	depends on LIBNVDIMM
+	help
+	  This driver provides access to virtio-pmem devices, storage devices
+	  that are mapped into the physical address space - similar to NVDIMMs
+	   - with a virtio-based flushing interface.
+
+	  If unsure, say Y.
+
 config VIRTIO_BALLOON
 	tristate "Virtio balloon driver"
 	depends on VIRTIO
@@ -62,12 +74,12 @@
 
 	 If unsure, say M.
 
- config VIRTIO_MMIO
+config VIRTIO_MMIO
 	tristate "Platform bus driver for memory mapped virtio devices"
 	depends on HAS_IOMEM && HAS_DMA
- 	select VIRTIO
- 	---help---
- 	 This drivers provides support for memory mapped virtio
+	select VIRTIO
+	---help---
+	 This drivers provides support for memory mapped virtio
 	 platform device driver.
 
  	 If unsure, say N.

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 59e36ef..a977e32 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/virtio.h>
 #include <linux/spinlock.h>
 #include <linux/virtio_config.h>
@@ -161,6 +162,7 @@
 
 void virtio_add_status(struct virtio_device *dev, unsigned int status)
 {
+	might_sleep();
 	dev->config->set_status(dev, dev->config->get_status(dev) | status);
 }
 EXPORT_SYMBOL_GPL(virtio_add_status);
@@ -170,6 +172,7 @@
 	int ret = dev->config->finalize_features(dev);
 	unsigned status;
 
+	might_sleep();
 	if (ret)
 		return ret;
 

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index d1c1f62..e05679c 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c

@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Virtio balloon implementation, inspired by Dor Laor and Marcelo
  * Tosatti's implementations.
  *
  *  Copyright 2008 Rusty Russell IBM Corporation
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <linux/virtio.h>
@@ -31,6 +18,7 @@
 #include <linux/mm.h>
 #include <linux/mount.h>
 #include <linux/magic.h>
+#include <linux/pseudo_fs.h>
 
 /*
  * Balloon device works in 4K page units.  So each page is pointed to by
@@ -41,13 +29,38 @@
 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
 
+#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
+					     __GFP_NOMEMALLOC)
+/* The order of free page blocks to report to host */
+#define VIRTIO_BALLOON_FREE_PAGE_ORDER (MAX_ORDER - 1)
+/* The size of a free page block in bytes */
+#define VIRTIO_BALLOON_FREE_PAGE_SIZE \
+	(1 << (VIRTIO_BALLOON_FREE_PAGE_ORDER + PAGE_SHIFT))
+
 #ifdef CONFIG_BALLOON_COMPACTION
 static struct vfsmount *balloon_mnt;
 #endif
 
+enum virtio_balloon_vq {
+	VIRTIO_BALLOON_VQ_INFLATE,
+	VIRTIO_BALLOON_VQ_DEFLATE,
+	VIRTIO_BALLOON_VQ_STATS,
+	VIRTIO_BALLOON_VQ_FREE_PAGE,
+	VIRTIO_BALLOON_VQ_MAX
+};
+
+enum virtio_balloon_config_read {
+	VIRTIO_BALLOON_CONFIG_READ_CMD_ID = 0,
+};
+
 struct virtio_balloon {
 	struct virtio_device *vdev;
-	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
+	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
+
+	/* Balloon's own wq for cpu-intensive work items */
+	struct workqueue_struct *balloon_wq;
+	/* The free page reporting work item submitted to the balloon wq */
+	struct work_struct report_free_page_work;
 
 	/* The balloon servicing is delegated to a freezable workqueue. */
 	struct work_struct update_balloon_stats_work;
@@ -56,6 +69,24 @@
 	/* Prevent updating balloon when it is being canceled. */
 	spinlock_t stop_update_lock;
 	bool stop_update;
+	/* Bitmap to indicate if reading the related config fields are needed */
+	unsigned long config_read_bitmap;
+
+	/* The list of allocated free pages, waiting to be given back to mm */
+	struct list_head free_page_list;
+	spinlock_t free_page_list_lock;
+	/* The number of free page blocks on the above list */
+	unsigned long num_free_page_blocks;
+	/*
+	 * The cmd id received from host.
+	 * Read it via virtio_balloon_cmd_id_received to get the latest value
+	 * sent from host.
+	 */
+	u32 cmd_id_received_cache;
+	/* The cmd id that is actively in use */
+	__virtio32 cmd_id_active;
+	/* Buffer to store the stop sign */
+	__virtio32 cmd_id_stop;
 
 	/* Waiting for host to ack the pages we released. */
 	wait_queue_head_t acked;
@@ -320,17 +351,6 @@
 	virtqueue_kick(vq);
 }
 
-static void virtballoon_changed(struct virtio_device *vdev)
-{
-	struct virtio_balloon *vb = vdev->priv;
-	unsigned long flags;
-
-	spin_lock_irqsave(&vb->stop_update_lock, flags);
-	if (!vb->stop_update)
-		queue_work(system_freezable_wq, &vb->update_balloon_size_work);
-	spin_unlock_irqrestore(&vb->stop_update_lock, flags);
-}
-
 static inline s64 towards_target(struct virtio_balloon *vb)
 {
 	s64 target;
@@ -347,6 +367,54 @@
 	return target - vb->num_pages;
 }
 
+/* Gives back @num_to_return blocks of free pages to mm. */
+static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb,
+					     unsigned long num_to_return)
+{
+	struct page *page;
+	unsigned long num_returned;
+
+	spin_lock_irq(&vb->free_page_list_lock);
+	for (num_returned = 0; num_returned < num_to_return; num_returned++) {
+		page = balloon_page_pop(&vb->free_page_list);
+		if (!page)
+			break;
+		free_pages((unsigned long)page_address(page),
+			   VIRTIO_BALLOON_FREE_PAGE_ORDER);
+	}
+	vb->num_free_page_blocks -= num_returned;
+	spin_unlock_irq(&vb->free_page_list_lock);
+
+	return num_returned;
+}
+
+static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb)
+{
+	if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+		return;
+
+	/* No need to queue the work if the bit was already set. */
+	if (test_and_set_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID,
+			     &vb->config_read_bitmap))
+		return;
+
+	queue_work(vb->balloon_wq, &vb->report_free_page_work);
+}
+
+static void virtballoon_changed(struct virtio_device *vdev)
+{
+	struct virtio_balloon *vb = vdev->priv;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vb->stop_update_lock, flags);
+	if (!vb->stop_update) {
+		queue_work(system_freezable_wq,
+			   &vb->update_balloon_size_work);
+		virtio_balloon_queue_free_page_work(vb);
+	}
+	spin_unlock_irqrestore(&vb->stop_update_lock, flags);
+}
+
 static void update_balloon_size(struct virtio_balloon *vb)
 {
 	u32 actual = vb->num_pages;
@@ -377,9 +445,12 @@
 			  update_balloon_size_work);
 	diff = towards_target(vb);
 
+	if (!diff)
+		return;
+
 	if (diff > 0)
 		diff -= fill_balloon(vb, diff);
-	else if (diff < 0)
+	else
 		diff += leak_balloon(vb, -diff);
 	update_balloon_size(vb);
 
@@ -389,26 +460,44 @@
 
 static int init_vqs(struct virtio_balloon *vb)
 {
-	struct virtqueue *vqs[3];
-	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
-	static const char * const names[] = { "inflate", "deflate", "stats" };
-	int err, nvqs;
+	struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
+	vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
+	const char *names[VIRTIO_BALLOON_VQ_MAX];
+	int err;
 
 	/*
-	 * We expect two virtqueues: inflate and deflate, and
-	 * optionally stat.
+	 * Inflateq and deflateq are used unconditionally. The names[]
+	 * will be NULL if the related feature is not enabled, which will
+	 * cause no allocation for the corresponding virtqueue in find_vqs.
 	 */
-	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
-	err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
+	callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
+	names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
+	callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
+	names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
+	names[VIRTIO_BALLOON_VQ_STATS] = NULL;
+	names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+		names[VIRTIO_BALLOON_VQ_STATS] = "stats";
+		callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
+	}
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+		names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
+		callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
+	}
+
+	err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
+					 vqs, callbacks, names, NULL, NULL);
 	if (err)
 		return err;
 
-	vb->inflate_vq = vqs[0];
-	vb->deflate_vq = vqs[1];
+	vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
+	vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
 	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
 		struct scatterlist sg;
 		unsigned int num_stats;
-		vb->stats_vq = vqs[2];
+		vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS];
 
 		/*
 		 * Prime this virtqueue with one buffer so the hypervisor can
@@ -426,9 +515,173 @@
 		}
 		virtqueue_kick(vb->stats_vq);
 	}
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+		vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE];
+
 	return 0;
 }
 
+static u32 virtio_balloon_cmd_id_received(struct virtio_balloon *vb)
+{
+	if (test_and_clear_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID,
+			       &vb->config_read_bitmap))
+		virtio_cread(vb->vdev, struct virtio_balloon_config,
+			     free_page_report_cmd_id,
+			     &vb->cmd_id_received_cache);
+
+	return vb->cmd_id_received_cache;
+}
+
+static int send_cmd_id_start(struct virtio_balloon *vb)
+{
+	struct scatterlist sg;
+	struct virtqueue *vq = vb->free_page_vq;
+	int err, unused;
+
+	/* Detach all the used buffers from the vq */
+	while (virtqueue_get_buf(vq, &unused))
+		;
+
+	vb->cmd_id_active = virtio32_to_cpu(vb->vdev,
+					virtio_balloon_cmd_id_received(vb));
+	sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active));
+	err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL);
+	if (!err)
+		virtqueue_kick(vq);
+	return err;
+}
+
+static int send_cmd_id_stop(struct virtio_balloon *vb)
+{
+	struct scatterlist sg;
+	struct virtqueue *vq = vb->free_page_vq;
+	int err, unused;
+
+	/* Detach all the used buffers from the vq */
+	while (virtqueue_get_buf(vq, &unused))
+		;
+
+	sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop));
+	err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL);
+	if (!err)
+		virtqueue_kick(vq);
+	return err;
+}
+
+static int get_free_page_and_send(struct virtio_balloon *vb)
+{
+	struct virtqueue *vq = vb->free_page_vq;
+	struct page *page;
+	struct scatterlist sg;
+	int err, unused;
+	void *p;
+
+	/* Detach all the used buffers from the vq */
+	while (virtqueue_get_buf(vq, &unused))
+		;
+
+	page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG,
+			   VIRTIO_BALLOON_FREE_PAGE_ORDER);
+	/*
+	 * When the allocation returns NULL, it indicates that we have got all
+	 * the possible free pages, so return -EINTR to stop.
+	 */
+	if (!page)
+		return -EINTR;
+
+	p = page_address(page);
+	sg_init_one(&sg, p, VIRTIO_BALLOON_FREE_PAGE_SIZE);
+	/* There is always 1 entry reserved for the cmd id to use. */
+	if (vq->num_free > 1) {
+		err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL);
+		if (unlikely(err)) {
+			free_pages((unsigned long)p,
+				   VIRTIO_BALLOON_FREE_PAGE_ORDER);
+			return err;
+		}
+		virtqueue_kick(vq);
+		spin_lock_irq(&vb->free_page_list_lock);
+		balloon_page_push(&vb->free_page_list, page);
+		vb->num_free_page_blocks++;
+		spin_unlock_irq(&vb->free_page_list_lock);
+	} else {
+		/*
+		 * The vq has no available entry to add this page block, so
+		 * just free it.
+		 */
+		free_pages((unsigned long)p, VIRTIO_BALLOON_FREE_PAGE_ORDER);
+	}
+
+	return 0;
+}
+
+static int send_free_pages(struct virtio_balloon *vb)
+{
+	int err;
+	u32 cmd_id_active;
+
+	while (1) {
+		/*
+		 * If a stop id or a new cmd id was just received from host,
+		 * stop the reporting.
+		 */
+		cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active);
+		if (unlikely(cmd_id_active !=
+			     virtio_balloon_cmd_id_received(vb)))
+			break;
+
+		/*
+		 * The free page blocks are allocated and sent to host one by
+		 * one.
+		 */
+		err = get_free_page_and_send(vb);
+		if (err == -EINTR)
+			break;
+		else if (unlikely(err))
+			return err;
+	}
+
+	return 0;
+}
+
+static void virtio_balloon_report_free_page(struct virtio_balloon *vb)
+{
+	int err;
+	struct device *dev = &vb->vdev->dev;
+
+	/* Start by sending the received cmd id to host with an outbuf. */
+	err = send_cmd_id_start(vb);
+	if (unlikely(err))
+		dev_err(dev, "Failed to send a start id, err = %d\n", err);
+
+	err = send_free_pages(vb);
+	if (unlikely(err))
+		dev_err(dev, "Failed to send a free page, err = %d\n", err);
+
+	/* End by sending a stop id to host with an outbuf. */
+	err = send_cmd_id_stop(vb);
+	if (unlikely(err))
+		dev_err(dev, "Failed to send a stop id, err = %d\n", err);
+}
+
+static void report_free_page_func(struct work_struct *work)
+{
+	struct virtio_balloon *vb = container_of(work, struct virtio_balloon,
+						 report_free_page_work);
+	u32 cmd_id_received;
+
+	cmd_id_received = virtio_balloon_cmd_id_received(vb);
+	if (cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) {
+		/* Pass ULONG_MAX to give back all the free pages */
+		return_free_pages_to_mm(vb, ULONG_MAX);
+	} else if (cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP &&
+		   cmd_id_received !=
+		   virtio32_to_cpu(vb->vdev, vb->cmd_id_active)) {
+		virtio_balloon_report_free_page(vb);
+	}
+}
+
 #ifdef CONFIG_BALLOON_COMPACTION
 /*
  * virtballoon_migratepage - perform the balloon page migration on behalf of
@@ -493,25 +746,58 @@
 	return MIGRATEPAGE_SUCCESS;
 }
 
-static struct dentry *balloon_mount(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *data)
+static int balloon_init_fs_context(struct fs_context *fc)
 {
-	static const struct dentry_operations ops = {
-		.d_dname = simple_dname,
-	};
-
-	return mount_pseudo(fs_type, "balloon-kvm:", NULL, &ops,
-				BALLOON_KVM_MAGIC);
+	return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM;
 }
 
 static struct file_system_type balloon_fs = {
 	.name           = "balloon-kvm",
-	.mount          = balloon_mount,
+	.init_fs_context = balloon_init_fs_context,
 	.kill_sb        = kill_anon_super,
 };
 
 #endif /* CONFIG_BALLOON_COMPACTION */
 
+static unsigned long shrink_free_pages(struct virtio_balloon *vb,
+				       unsigned long pages_to_free)
+{
+	unsigned long blocks_to_free, blocks_freed;
+
+	pages_to_free = round_up(pages_to_free,
+				 1 << VIRTIO_BALLOON_FREE_PAGE_ORDER);
+	blocks_to_free = pages_to_free >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
+	blocks_freed = return_free_pages_to_mm(vb, blocks_to_free);
+
+	return blocks_freed << VIRTIO_BALLOON_FREE_PAGE_ORDER;
+}
+
+static unsigned long leak_balloon_pages(struct virtio_balloon *vb,
+                                          unsigned long pages_to_free)
+{
+	return leak_balloon(vb, pages_to_free * VIRTIO_BALLOON_PAGES_PER_PAGE) /
+		VIRTIO_BALLOON_PAGES_PER_PAGE;
+}
+
+static unsigned long shrink_balloon_pages(struct virtio_balloon *vb,
+					  unsigned long pages_to_free)
+{
+	unsigned long pages_freed = 0;
+
+	/*
+	 * One invocation of leak_balloon can deflate at most
+	 * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it
+	 * multiple times to deflate pages till reaching pages_to_free.
+	 */
+	while (vb->num_pages && pages_freed < pages_to_free)
+		pages_freed += leak_balloon_pages(vb,
+						  pages_to_free - pages_freed);
+
+	update_balloon_size(vb);
+
+	return pages_freed;
+}
+
 static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
 						  struct shrink_control *sc)
 {
@@ -519,20 +805,17 @@
 	struct virtio_balloon *vb = container_of(shrinker,
 					struct virtio_balloon, shrinker);
 
-	pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
+	pages_to_free = sc->nr_to_scan;
 
-	/*
-	 * One invocation of leak_balloon can deflate at most
-	 * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it
-	 * multiple times to deflate pages till reaching pages_to_free.
-	 */
-	while (vb->num_pages && pages_to_free) {
-		pages_to_free -= pages_freed;
-		pages_freed += leak_balloon(vb, pages_to_free);
-	}
-	update_balloon_size(vb);
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+		pages_freed = shrink_free_pages(vb, pages_to_free);
 
-	return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE;
+	if (pages_freed >= pages_to_free)
+		return pages_freed;
+
+	pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed);
+
+	return pages_freed;
 }
 
 static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
@@ -540,8 +823,12 @@
 {
 	struct virtio_balloon *vb = container_of(shrinker,
 					struct virtio_balloon, shrinker);
+	unsigned long count;
 
-	return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
+	count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
+	count += vb->num_free_page_blocks << VIRTIO_BALLOON_FREE_PAGE_ORDER;
+
+	return count;
 }
 
 static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb)
@@ -561,6 +848,7 @@
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
+	__u32 poison_val;
 	int err;
 
 	if (!vdev->config->get) {
@@ -604,6 +892,35 @@
 	}
 	vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
 #endif
+	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+		/*
+		 * There is always one entry reserved for cmd id, so the ring
+		 * size needs to be at least two to report free page hints.
+		 */
+		if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
+			err = -ENOSPC;
+			goto out_del_vqs;
+		}
+		vb->balloon_wq = alloc_workqueue("balloon-wq",
+					WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
+		if (!vb->balloon_wq) {
+			err = -ENOMEM;
+			goto out_del_vqs;
+		}
+		INIT_WORK(&vb->report_free_page_work, report_free_page_func);
+		vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP;
+		vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
+						  VIRTIO_BALLOON_CMD_ID_STOP);
+		vb->cmd_id_stop = cpu_to_virtio32(vb->vdev,
+						  VIRTIO_BALLOON_CMD_ID_STOP);
+		spin_lock_init(&vb->free_page_list_lock);
+		INIT_LIST_HEAD(&vb->free_page_list);
+		if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
+			memset(&poison_val, PAGE_POISON, sizeof(poison_val));
+			virtio_cwrite(vb->vdev, struct virtio_balloon_config,
+				      poison_val, &poison_val);
+		}
+	}
 	/*
 	 * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
 	 * shrinker needs to be registered to relieve memory pressure.
@@ -611,7 +928,7 @@
 	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
 		err = virtio_balloon_register_shrinker(vb);
 		if (err)
-			goto out_del_vqs;
+			goto out_del_balloon_wq;
 	}
 	virtio_device_ready(vdev);
 
@@ -619,6 +936,9 @@
 		virtballoon_changed(vdev);
 	return 0;
 
+out_del_balloon_wq:
+	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+		destroy_workqueue(vb->balloon_wq);
 out_del_vqs:
 	vdev->config->del_vqs(vdev);
 out_free_vb:
@@ -652,6 +972,11 @@
 	cancel_work_sync(&vb->update_balloon_size_work);
 	cancel_work_sync(&vb->update_balloon_stats_work);
 
+	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+		cancel_work_sync(&vb->report_free_page_work);
+		destroy_workqueue(vb->balloon_wq);
+	}
+
 	remove_common(vb);
 #ifdef CONFIG_BALLOON_COMPACTION
 	if (vb->vb_dev_info.inode)
@@ -695,6 +1020,9 @@
 
 static int virtballoon_validate(struct virtio_device *vdev)
 {
+	if (!page_poisoning_enabled())
+		__virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
+
 	__virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
 	return 0;
 }
@@ -703,6 +1031,8 @@
 	VIRTIO_BALLOON_F_MUST_TELL_HOST,
 	VIRTIO_BALLOON_F_STATS_VQ,
 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
+	VIRTIO_BALLOON_F_FREE_PAGE_HINT,
+	VIRTIO_BALLOON_F_PAGE_POISON,
 };
 
 static struct virtio_driver virtio_balloon_driver = {

diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
index 3a0468f..5ae5296 100644
--- a/drivers/virtio/virtio_input.c
+++ b/drivers/virtio/virtio_input.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 #include <linux/module.h>
 #include <linux/virtio.h>
 #include <linux/virtio_config.h>

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 4cd9ea5..e09edb5 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Virtio memory mapped device driver
  *
@@ -48,12 +49,7 @@
  *		virtio_mmio.device=0x100@0x100b0000:48 \
  *				virtio_mmio.device=1K@0x1001e000:74
  *
- *
- *
  * Based on Virtio PCI driver by Anthony Liguori, copyright IBM Corp. 2007
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
  */
 
 #define pr_fmt(fmt) "virtio-mmio: " fmt
@@ -467,8 +463,13 @@
 		       struct irq_affinity *desc)
 {
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
-	unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
-	int i, err;
+	int irq = platform_get_irq(vm_dev->pdev, 0);
+	int i, err, queue_idx = 0;
+
+	if (irq < 0) {
+		dev_err(&vdev->dev, "Cannot get IRQ resource\n");
+		return irq;
+	}
 
 	err = request_irq(irq, vm_interrupt, IRQF_SHARED,
 			dev_name(&vdev->dev), vm_dev);
@@ -476,7 +477,12 @@
 		return err;
 
 	for (i = 0; i < nvqs; ++i) {
-		vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i],
+		if (!names[i]) {
+			vqs[i] = NULL;
+			continue;
+		}
+
+		vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
 				     ctx ? ctx[i] : false);
 		if (IS_ERR(vqs[i])) {
 			vm_del_vqs(vdev);

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 465a6f5..f2862f6 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Virtio PCI driver - common functionality for all device versions
  *
@@ -11,10 +12,6 @@
  *  Anthony Liguori  <aliguori@us.ibm.com>
  *  Rusty Russell <rusty@rustcorp.com.au>
  *  Michael S. Tsirkin <mst@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
  */
 
 #include "virtio_pci_common.h"
@@ -255,9 +252,11 @@
 	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
 		free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev);
 
-	for (i = 0; i < vp_dev->msix_vectors; i++)
-		if (vp_dev->msix_affinity_masks[i])
-			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+	if (vp_dev->msix_affinity_masks) {
+		for (i = 0; i < vp_dev->msix_vectors; i++)
+			if (vp_dev->msix_affinity_masks[i])
+				free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+	}
 
 	if (vp_dev->msix_enabled) {
 		/* Disable the vector used for configuration */
@@ -285,7 +284,7 @@
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	u16 msix_vec;
-	int i, err, nvectors, allocated_vectors;
+	int i, err, nvectors, allocated_vectors, queue_idx = 0;
 
 	vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
 	if (!vp_dev->vqs)
@@ -321,7 +320,7 @@
 			msix_vec = allocated_vectors++;
 		else
 			msix_vec = VP_MSIX_VQ_VECTOR;
-		vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+		vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
 				     ctx ? ctx[i] : false,
 				     msix_vec);
 		if (IS_ERR(vqs[i])) {
@@ -356,7 +355,7 @@
 		const char * const names[], const bool *ctx)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	int i, err;
+	int i, err, queue_idx = 0;
 
 	vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
 	if (!vp_dev->vqs)
@@ -374,7 +373,7 @@
 			vqs[i] = NULL;
 			continue;
 		}
-		vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+		vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
 				     ctx ? ctx[i] : false,
 				     VIRTIO_MSI_NO_VECTOR);
 		if (IS_ERR(vqs[i])) {

diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index 0227100..b2f0eb4 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 #ifndef _DRIVERS_VIRTIO_VIRTIO_PCI_COMMON_H
 #define _DRIVERS_VIRTIO_VIRTIO_PCI_COMMON_H
 /*
@@ -13,10 +14,6 @@
  *  Anthony Liguori  <aliguori@us.ibm.com>
  *  Rusty Russell <rusty@rustcorp.com.au>
  *  Michael S. Tsirkin <mst@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
  */
 
 #include <linux/module.h>

diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index de062fb..d62e983 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Virtio PCI driver - legacy device support
  *
@@ -11,10 +12,6 @@
  *  Anthony Liguori  <aliguori@us.ibm.com>
  *  Rusty Russell <rusty@rustcorp.com.au>
  *  Michael S. Tsirkin <mst@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
  */
 
 #include "virtio_pci_common.h"
@@ -52,7 +49,8 @@
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	void __iomem *ioaddr = vp_dev->ioaddr +
-				VIRTIO_PCI_CONFIG(vp_dev) + offset;
+			VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) +
+			offset;
 	u8 *ptr = buf;
 	int i;
 
@@ -67,7 +65,8 @@
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	void __iomem *ioaddr = vp_dev->ioaddr +
-				VIRTIO_PCI_CONFIG(vp_dev) + offset;
+			VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) +
+			offset;
 	const u8 *ptr = buf;
 	int i;
 

diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 07571da..7abcc50 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Virtio PCI driver - modern (virtio 1.0) device support
  *
@@ -11,10 +12,6 @@
  *  Anthony Liguori  <aliguori@us.ibm.com>
  *  Rusty Russell <rusty@rustcorp.com.au>
  *  Michael S. Tsirkin <mst@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
  */
 
 #include <linux/delay.h>

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 814b395..867c7eb 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c

@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Virtio ring implementation.
  *
  *  Copyright 2007 Rusty Russell IBM Corporation
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include <linux/virtio.h>
 #include <linux/virtio_ring.h>
@@ -44,6 +31,26 @@
 	} while (0)
 #define END_USE(_vq) \
 	do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
+#define LAST_ADD_TIME_UPDATE(_vq)				\
+	do {							\
+		ktime_t now = ktime_get();			\
+								\
+		/* No kick or get, with .1 second between?  Warn. */ \
+		if ((_vq)->last_add_time_valid)			\
+			WARN_ON(ktime_to_ms(ktime_sub(now,	\
+				(_vq)->last_add_time)) > 100);	\
+		(_vq)->last_add_time = now;			\
+		(_vq)->last_add_time_valid = true;		\
+	} while (0)
+#define LAST_ADD_TIME_CHECK(_vq)				\
+	do {							\
+		if ((_vq)->last_add_time_valid) {		\
+			WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
+				      (_vq)->last_add_time)) > 100); \
+		}						\
+	} while (0)
+#define LAST_ADD_TIME_INVALID(_vq)				\
+	((_vq)->last_add_time_valid = false)
 #else
 #define BAD_RING(_vq, fmt, args...)				\
 	do {							\
@@ -53,18 +60,38 @@
 	} while (0)
 #define START_USE(vq)
 #define END_USE(vq)
+#define LAST_ADD_TIME_UPDATE(vq)
+#define LAST_ADD_TIME_CHECK(vq)
+#define LAST_ADD_TIME_INVALID(vq)
 #endif
 
-struct vring_desc_state {
+struct vring_desc_state_split {
 	void *data;			/* Data for callback. */
 	struct vring_desc *indir_desc;	/* Indirect descriptor, if any. */
 };
 
+struct vring_desc_state_packed {
+	void *data;			/* Data for callback. */
+	struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
+	u16 num;			/* Descriptor list length. */
+	u16 next;			/* The next desc state in a list. */
+	u16 last;			/* The last desc state in a list. */
+};
+
+struct vring_desc_extra_packed {
+	dma_addr_t addr;		/* Buffer DMA addr. */
+	u32 len;			/* Buffer length. */
+	u16 flags;			/* Descriptor flags. */
+};
+
 struct vring_virtqueue {
 	struct virtqueue vq;
 
-	/* Actual memory layout for this queue */
-	struct vring vring;
+	/* Is this a packed ring? */
+	bool packed_ring;
+
+	/* Is DMA API used? */
+	bool use_dma_api;
 
 	/* Can we use weak barriers? */
 	bool weak_barriers;
@@ -86,19 +113,75 @@
 	/* Last used index we've seen. */
 	u16 last_used_idx;
 
-	/* Last written value to avail->flags */
-	u16 avail_flags_shadow;
+	union {
+		/* Available for split ring */
+		struct {
+			/* Actual memory layout for this queue. */
+			struct vring vring;
 
-	/* Last written value to avail->idx in guest byte order */
-	u16 avail_idx_shadow;
+			/* Last written value to avail->flags */
+			u16 avail_flags_shadow;
+
+			/*
+			 * Last written value to avail->idx in
+			 * guest byte order.
+			 */
+			u16 avail_idx_shadow;
+
+			/* Per-descriptor state. */
+			struct vring_desc_state_split *desc_state;
+
+			/* DMA address and size information */
+			dma_addr_t queue_dma_addr;
+			size_t queue_size_in_bytes;
+		} split;
+
+		/* Available for packed ring */
+		struct {
+			/* Actual memory layout for this queue. */
+			struct {
+				unsigned int num;
+				struct vring_packed_desc *desc;
+				struct vring_packed_desc_event *driver;
+				struct vring_packed_desc_event *device;
+			} vring;
+
+			/* Driver ring wrap counter. */
+			bool avail_wrap_counter;
+
+			/* Device ring wrap counter. */
+			bool used_wrap_counter;
+
+			/* Avail used flags. */
+			u16 avail_used_flags;
+
+			/* Index of the next avail descriptor. */
+			u16 next_avail_idx;
+
+			/*
+			 * Last written value to driver->flags in
+			 * guest byte order.
+			 */
+			u16 event_flags_shadow;
+
+			/* Per-descriptor state. */
+			struct vring_desc_state_packed *desc_state;
+			struct vring_desc_extra_packed *desc_extra;
+
+			/* DMA address and size information */
+			dma_addr_t ring_dma_addr;
+			dma_addr_t driver_event_dma_addr;
+			dma_addr_t device_event_dma_addr;
+			size_t ring_size_in_bytes;
+			size_t event_size_in_bytes;
+		} packed;
+	};
 
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	bool (*notify)(struct virtqueue *vq);
 
 	/* DMA, allocation, and size information */
 	bool we_own_ring;
-	size_t queue_size_in_bytes;
-	dma_addr_t queue_dma_addr;
 
 #ifdef DEBUG
 	/* They're supposed to lock for us. */
@@ -108,13 +191,27 @@
 	bool last_add_time_valid;
 	ktime_t last_add_time;
 #endif
-
-	/* Per-descriptor state. */
-	struct vring_desc_state desc_state[];
 };
 
+
+/*
+ * Helpers.
+ */
+
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
+static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
+					  unsigned int total_sg)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	/*
+	 * If the host supports indirect descriptor tables, and we have multiple
+	 * buffers, then go indirect. FIXME: tune this threshold
+	 */
+	return (vq->indirect && total_sg > 1 && vq->vq.num_free);
+}
+
 /*
  * Modern virtio devices have feature bits to specify whether they need a
  * quirk and bypass the IOMMU. If not there, just use the DMA API.
@@ -161,6 +258,59 @@
 	return false;
 }
 
+size_t virtio_max_dma_size(struct virtio_device *vdev)
+{
+	size_t max_segment_size = SIZE_MAX;
+
+	if (vring_use_dma_api(vdev))
+		max_segment_size = dma_max_mapping_size(&vdev->dev);
+
+	return max_segment_size;
+}
+EXPORT_SYMBOL_GPL(virtio_max_dma_size);
+
+static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
+			      dma_addr_t *dma_handle, gfp_t flag)
+{
+	if (vring_use_dma_api(vdev)) {
+		return dma_alloc_coherent(vdev->dev.parent, size,
+					  dma_handle, flag);
+	} else {
+		void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
+
+		if (queue) {
+			phys_addr_t phys_addr = virt_to_phys(queue);
+			*dma_handle = (dma_addr_t)phys_addr;
+
+			/*
+			 * Sanity check: make sure we dind't truncate
+			 * the address.  The only arches I can find that
+			 * have 64-bit phys_addr_t but 32-bit dma_addr_t
+			 * are certain non-highmem MIPS and x86
+			 * configurations, but these configurations
+			 * should never allocate physical pages above 32
+			 * bits, so this is fine.  Just in case, throw a
+			 * warning and abort if we end up with an
+			 * unrepresentable address.
+			 */
+			if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
+				free_pages_exact(queue, PAGE_ALIGN(size));
+				return NULL;
+			}
+		}
+		return queue;
+	}
+}
+
+static void vring_free_queue(struct virtio_device *vdev, size_t size,
+			     void *queue, dma_addr_t dma_handle)
+{
+	if (vring_use_dma_api(vdev))
+		dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
+	else
+		free_pages_exact(queue, PAGE_ALIGN(size));
+}
+
 /*
  * The DMA ops on various arches are rather gnarly right now, and
  * making all of the arch DMA ops work on the vring device itself
@@ -176,7 +326,7 @@
 				   struct scatterlist *sg,
 				   enum dma_data_direction direction)
 {
-	if (!vring_use_dma_api(vq->vq.vdev))
+	if (!vq->use_dma_api)
 		return (dma_addr_t)sg_phys(sg);
 
 	/*
@@ -193,19 +343,33 @@
 				   void *cpu_addr, size_t size,
 				   enum dma_data_direction direction)
 {
-	if (!vring_use_dma_api(vq->vq.vdev))
+	if (!vq->use_dma_api)
 		return (dma_addr_t)virt_to_phys(cpu_addr);
 
 	return dma_map_single(vring_dma_dev(vq),
 			      cpu_addr, size, direction);
 }
 
-static void vring_unmap_one(const struct vring_virtqueue *vq,
-			    struct vring_desc *desc)
+static int vring_mapping_error(const struct vring_virtqueue *vq,
+			       dma_addr_t addr)
+{
+	if (!vq->use_dma_api)
+		return 0;
+
+	return dma_mapping_error(vring_dma_dev(vq), addr);
+}
+
+
+/*
+ * Split ring specific functions - *_split().
+ */
+
+static void vring_unmap_one_split(const struct vring_virtqueue *vq,
+				  struct vring_desc *desc)
 {
 	u16 flags;
 
-	if (!vring_use_dma_api(vq->vq.vdev))
+	if (!vq->use_dma_api)
 		return;
 
 	flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
@@ -225,17 +389,9 @@
 	}
 }
 
-static int vring_mapping_error(const struct vring_virtqueue *vq,
-			       dma_addr_t addr)
-{
-	if (!vring_use_dma_api(vq->vq.vdev))
-		return 0;
-
-	return dma_mapping_error(vring_dma_dev(vq), addr);
-}
-
-static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
-					 unsigned int total_sg, gfp_t gfp)
+static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
+					       unsigned int total_sg,
+					       gfp_t gfp)
 {
 	struct vring_desc *desc;
 	unsigned int i;
@@ -256,14 +412,14 @@
 	return desc;
 }
 
-static inline int virtqueue_add(struct virtqueue *_vq,
-				struct scatterlist *sgs[],
-				unsigned int total_sg,
-				unsigned int out_sgs,
-				unsigned int in_sgs,
-				void *data,
-				void *ctx,
-				gfp_t gfp)
+static inline int virtqueue_add_split(struct virtqueue *_vq,
+				      struct scatterlist *sgs[],
+				      unsigned int total_sg,
+				      unsigned int out_sgs,
+				      unsigned int in_sgs,
+				      void *data,
+				      void *ctx,
+				      gfp_t gfp)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct scatterlist *sg;
@@ -282,30 +438,17 @@
 		return -EIO;
 	}
 
-#ifdef DEBUG
-	{
-		ktime_t now = ktime_get();
-
-		/* No kick or get, with .1 second between?  Warn. */
-		if (vq->last_add_time_valid)
-			WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
-					    > 100);
-		vq->last_add_time = now;
-		vq->last_add_time_valid = true;
-	}
-#endif
+	LAST_ADD_TIME_UPDATE(vq);
 
 	BUG_ON(total_sg == 0);
 
 	head = vq->free_head;
 
-	/* If the host supports indirect descriptor tables, and we have multiple
-	 * buffers, then go indirect. FIXME: tune this threshold */
-	if (vq->indirect && total_sg > 1 && vq->vq.num_free)
-		desc = alloc_indirect(_vq, total_sg, gfp);
+	if (virtqueue_use_indirect(_vq, total_sg))
+		desc = alloc_indirect_split(_vq, total_sg, gfp);
 	else {
 		desc = NULL;
-		WARN_ON_ONCE(total_sg > vq->vring.num && !vq->indirect);
+		WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
 	}
 
 	if (desc) {
@@ -316,7 +459,7 @@
 		descs_used = 1;
 	} else {
 		indirect = false;
-		desc = vq->vring.desc;
+		desc = vq->split.vring.desc;
 		i = head;
 		descs_used = total_sg;
 	}
@@ -372,10 +515,13 @@
 		if (vring_mapping_error(vq, addr))
 			goto unmap_release;
 
-		vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);
-		vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr);
+		vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev,
+				VRING_DESC_F_INDIRECT);
+		vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev,
+				addr);
 
-		vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc));
+		vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev,
+				total_sg * sizeof(struct vring_desc));
 	}
 
 	/* We're using some buffers from the free list. */
@@ -383,27 +529,29 @@
 
 	/* Update free pointer */
 	if (indirect)
-		vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next);
+		vq->free_head = virtio16_to_cpu(_vq->vdev,
+					vq->split.vring.desc[head].next);
 	else
 		vq->free_head = i;
 
 	/* Store token and indirect buffer state. */
-	vq->desc_state[head].data = data;
+	vq->split.desc_state[head].data = data;
 	if (indirect)
-		vq->desc_state[head].indir_desc = desc;
+		vq->split.desc_state[head].indir_desc = desc;
 	else
-		vq->desc_state[head].indir_desc = ctx;
+		vq->split.desc_state[head].indir_desc = ctx;
 
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
-	avail = vq->avail_idx_shadow & (vq->vring.num - 1);
-	vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
+	avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
+	vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
 
 	/* Descriptors and available array need to be set before we expose the
 	 * new available array entries. */
 	virtio_wmb(vq->weak_barriers);
-	vq->avail_idx_shadow++;
-	vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
+	vq->split.avail_idx_shadow++;
+	vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
+						vq->split.avail_idx_shadow);
 	vq->num_added++;
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
@@ -418,28 +566,1153 @@
 
 unmap_release:
 	err_idx = i;
-	i = head;
+
+	if (indirect)
+		i = 0;
+	else
+		i = head;
 
 	for (n = 0; n < total_sg; n++) {
 		if (i == err_idx)
 			break;
-		vring_unmap_one(vq, &desc[i]);
-		i = virtio16_to_cpu(_vq->vdev, vq->vring.desc[i].next);
+		vring_unmap_one_split(vq, &desc[i]);
+		i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 	}
 
 	if (indirect)
 		kfree(desc);
 
 	END_USE(vq);
+	return -ENOMEM;
+}
+
+static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 new, old;
+	bool needs_kick;
+
+	START_USE(vq);
+	/* We need to expose available array entries before checking avail
+	 * event. */
+	virtio_mb(vq->weak_barriers);
+
+	old = vq->split.avail_idx_shadow - vq->num_added;
+	new = vq->split.avail_idx_shadow;
+	vq->num_added = 0;
+
+	LAST_ADD_TIME_CHECK(vq);
+	LAST_ADD_TIME_INVALID(vq);
+
+	if (vq->event) {
+		needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
+					vring_avail_event(&vq->split.vring)),
+					      new, old);
+	} else {
+		needs_kick = !(vq->split.vring.used->flags &
+					cpu_to_virtio16(_vq->vdev,
+						VRING_USED_F_NO_NOTIFY));
+	}
+	END_USE(vq);
+	return needs_kick;
+}
+
+static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
+			     void **ctx)
+{
+	unsigned int i, j;
+	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
+
+	/* Clear data ptr. */
+	vq->split.desc_state[head].data = NULL;
+
+	/* Put back on free list: unmap first-level descriptors and find end */
+	i = head;
+
+	while (vq->split.vring.desc[i].flags & nextflag) {
+		vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
+		i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next);
+		vq->vq.num_free++;
+	}
+
+	vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
+	vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev,
+						vq->free_head);
+	vq->free_head = head;
+
+	/* Plus final descriptor */
+	vq->vq.num_free++;
+
+	if (vq->indirect) {
+		struct vring_desc *indir_desc =
+				vq->split.desc_state[head].indir_desc;
+		u32 len;
+
+		/* Free the indirect table, if any, now that it's unmapped. */
+		if (!indir_desc)
+			return;
+
+		len = virtio32_to_cpu(vq->vq.vdev,
+				vq->split.vring.desc[head].len);
+
+		BUG_ON(!(vq->split.vring.desc[head].flags &
+			 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
+		BUG_ON(len == 0 || len % sizeof(struct vring_desc));
+
+		for (j = 0; j < len / sizeof(struct vring_desc); j++)
+			vring_unmap_one_split(vq, &indir_desc[j]);
+
+		kfree(indir_desc);
+		vq->split.desc_state[head].indir_desc = NULL;
+	} else if (ctx) {
+		*ctx = vq->split.desc_state[head].indir_desc;
+	}
+}
+
+static inline bool more_used_split(const struct vring_virtqueue *vq)
+{
+	return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
+			vq->split.vring.used->idx);
+}
+
+static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
+					 unsigned int *len,
+					 void **ctx)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	void *ret;
+	unsigned int i;
+	u16 last_used;
+
+	START_USE(vq);
+
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return NULL;
+	}
+
+	if (!more_used_split(vq)) {
+		pr_debug("No more buffers in queue\n");
+		END_USE(vq);
+		return NULL;
+	}
+
+	/* Only get used array entries after they have been exposed by host. */
+	virtio_rmb(vq->weak_barriers);
+
+	last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
+	i = virtio32_to_cpu(_vq->vdev,
+			vq->split.vring.used->ring[last_used].id);
+	*len = virtio32_to_cpu(_vq->vdev,
+			vq->split.vring.used->ring[last_used].len);
+
+	if (unlikely(i >= vq->split.vring.num)) {
+		BAD_RING(vq, "id %u out of range\n", i);
+		return NULL;
+	}
+	if (unlikely(!vq->split.desc_state[i].data)) {
+		BAD_RING(vq, "id %u is not a head!\n", i);
+		return NULL;
+	}
+
+	/* detach_buf_split clears data, so grab it now. */
+	ret = vq->split.desc_state[i].data;
+	detach_buf_split(vq, i, ctx);
+	vq->last_used_idx++;
+	/* If we expect an interrupt for the next entry, tell host
+	 * by writing event index and flush out the write before
+	 * the read in the next get_buf call. */
+	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
+		virtio_store_mb(vq->weak_barriers,
+				&vring_used_event(&vq->split.vring),
+				cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
+
+	LAST_ADD_TIME_INVALID(vq);
+
+	END_USE(vq);
+	return ret;
+}
+
+static void virtqueue_disable_cb_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
+		vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+		if (!vq->event)
+			vq->split.vring.avail->flags =
+				cpu_to_virtio16(_vq->vdev,
+						vq->split.avail_flags_shadow);
+	}
+}
+
+static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 last_used_idx;
+
+	START_USE(vq);
+
+	/* We optimistically turn back on interrupts, then check if there was
+	 * more to do. */
+	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
+	 * either clear the flags bit or point the event index at the next
+	 * entry. Always do both to keep code simple. */
+	if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+		vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+		if (!vq->event)
+			vq->split.vring.avail->flags =
+				cpu_to_virtio16(_vq->vdev,
+						vq->split.avail_flags_shadow);
+	}
+	vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
+			last_used_idx = vq->last_used_idx);
+	END_USE(vq);
+	return last_used_idx;
+}
+
+static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
+			vq->split.vring.used->idx);
+}
+
+static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 bufs;
+
+	START_USE(vq);
+
+	/* We optimistically turn back on interrupts, then check if there was
+	 * more to do. */
+	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
+	 * either clear the flags bit or point the event index at the next
+	 * entry. Always update the event index to keep code simple. */
+	if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+		vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+		if (!vq->event)
+			vq->split.vring.avail->flags =
+				cpu_to_virtio16(_vq->vdev,
+						vq->split.avail_flags_shadow);
+	}
+	/* TODO: tune this threshold */
+	bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
+
+	virtio_store_mb(vq->weak_barriers,
+			&vring_used_event(&vq->split.vring),
+			cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
+
+	if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
+					- vq->last_used_idx) > bufs)) {
+		END_USE(vq);
+		return false;
+	}
+
+	END_USE(vq);
+	return true;
+}
+
+static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	unsigned int i;
+	void *buf;
+
+	START_USE(vq);
+
+	for (i = 0; i < vq->split.vring.num; i++) {
+		if (!vq->split.desc_state[i].data)
+			continue;
+		/* detach_buf_split clears data, so grab it now. */
+		buf = vq->split.desc_state[i].data;
+		detach_buf_split(vq, i, NULL);
+		vq->split.avail_idx_shadow--;
+		vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
+				vq->split.avail_idx_shadow);
+		END_USE(vq);
+		return buf;
+	}
+	/* That should have freed everything. */
+	BUG_ON(vq->vq.num_free != vq->split.vring.num);
+
+	END_USE(vq);
+	return NULL;
+}
+
+static struct virtqueue *vring_create_virtqueue_split(
+	unsigned int index,
+	unsigned int num,
+	unsigned int vring_align,
+	struct virtio_device *vdev,
+	bool weak_barriers,
+	bool may_reduce_num,
+	bool context,
+	bool (*notify)(struct virtqueue *),
+	void (*callback)(struct virtqueue *),
+	const char *name)
+{
+	struct virtqueue *vq;
+	void *queue = NULL;
+	dma_addr_t dma_addr;
+	size_t queue_size_in_bytes;
+	struct vring vring;
+
+	/* We assume num is a power of 2. */
+	if (num & (num - 1)) {
+		dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
+		return NULL;
+	}
+
+	/* TODO: allocate each queue chunk individually */
+	for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
+		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
+					  &dma_addr,
+					  GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
+		if (queue)
+			break;
+		if (!may_reduce_num)
+			return NULL;
+	}
+
+	if (!num)
+		return NULL;
+
+	if (!queue) {
+		/* Try to get a single page. You are my only hope! */
+		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
+					  &dma_addr, GFP_KERNEL|__GFP_ZERO);
+	}
+	if (!queue)
+		return NULL;
+
+	queue_size_in_bytes = vring_size(num, vring_align);
+	vring_init(&vring, num, queue, vring_align);
+
+	vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
+				   notify, callback, name);
+	if (!vq) {
+		vring_free_queue(vdev, queue_size_in_bytes, queue,
+				 dma_addr);
+		return NULL;
+	}
+
+	to_vvq(vq)->split.queue_dma_addr = dma_addr;
+	to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
+	to_vvq(vq)->we_own_ring = true;
+
+	return vq;
+}
+
+
+/*
+ * Packed ring specific functions - *_packed().
+ */
+
+static void vring_unmap_state_packed(const struct vring_virtqueue *vq,
+				     struct vring_desc_extra_packed *state)
+{
+	u16 flags;
+
+	if (!vq->use_dma_api)
+		return;
+
+	flags = state->flags;
+
+	if (flags & VRING_DESC_F_INDIRECT) {
+		dma_unmap_single(vring_dma_dev(vq),
+				 state->addr, state->len,
+				 (flags & VRING_DESC_F_WRITE) ?
+				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	} else {
+		dma_unmap_page(vring_dma_dev(vq),
+			       state->addr, state->len,
+			       (flags & VRING_DESC_F_WRITE) ?
+			       DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	}
+}
+
+static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
+				   struct vring_packed_desc *desc)
+{
+	u16 flags;
+
+	if (!vq->use_dma_api)
+		return;
+
+	flags = le16_to_cpu(desc->flags);
+
+	if (flags & VRING_DESC_F_INDIRECT) {
+		dma_unmap_single(vring_dma_dev(vq),
+				 le64_to_cpu(desc->addr),
+				 le32_to_cpu(desc->len),
+				 (flags & VRING_DESC_F_WRITE) ?
+				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	} else {
+		dma_unmap_page(vring_dma_dev(vq),
+			       le64_to_cpu(desc->addr),
+			       le32_to_cpu(desc->len),
+			       (flags & VRING_DESC_F_WRITE) ?
+			       DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	}
+}
+
+static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
+						       gfp_t gfp)
+{
+	struct vring_packed_desc *desc;
+
+	/*
+	 * We require lowmem mappings for the descriptors because
+	 * otherwise virt_to_phys will give us bogus addresses in the
+	 * virtqueue.
+	 */
+	gfp &= ~__GFP_HIGHMEM;
+
+	desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
+
+	return desc;
+}
+
+static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
+				       struct scatterlist *sgs[],
+				       unsigned int total_sg,
+				       unsigned int out_sgs,
+				       unsigned int in_sgs,
+				       void *data,
+				       gfp_t gfp)
+{
+	struct vring_packed_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i, n, err_idx;
+	u16 head, id;
+	dma_addr_t addr;
+
+	head = vq->packed.next_avail_idx;
+	desc = alloc_indirect_packed(total_sg, gfp);
+
+	if (unlikely(vq->vq.num_free < 1)) {
+		pr_debug("Can't add buf len 1 - avail = 0\n");
+		kfree(desc);
+		END_USE(vq);
+		return -ENOSPC;
+	}
+
+	i = 0;
+	id = vq->free_head;
+	BUG_ON(id == vq->packed.vring.num);
+
+	for (n = 0; n < out_sgs + in_sgs; n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			addr = vring_map_one_sg(vq, sg, n < out_sgs ?
+					DMA_TO_DEVICE : DMA_FROM_DEVICE);
+			if (vring_mapping_error(vq, addr))
+				goto unmap_release;
+
+			desc[i].flags = cpu_to_le16(n < out_sgs ?
+						0 : VRING_DESC_F_WRITE);
+			desc[i].addr = cpu_to_le64(addr);
+			desc[i].len = cpu_to_le32(sg->length);
+			i++;
+		}
+	}
+
+	/* Now that the indirect table is filled in, map it. */
+	addr = vring_map_single(vq, desc,
+			total_sg * sizeof(struct vring_packed_desc),
+			DMA_TO_DEVICE);
+	if (vring_mapping_error(vq, addr))
+		goto unmap_release;
+
+	vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
+	vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
+				sizeof(struct vring_packed_desc));
+	vq->packed.vring.desc[head].id = cpu_to_le16(id);
+
+	if (vq->use_dma_api) {
+		vq->packed.desc_extra[id].addr = addr;
+		vq->packed.desc_extra[id].len = total_sg *
+				sizeof(struct vring_packed_desc);
+		vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
+						  vq->packed.avail_used_flags;
+	}
+
+	/*
+	 * A driver MUST NOT make the first descriptor in the list
+	 * available before all subsequent descriptors comprising
+	 * the list are made available.
+	 */
+	virtio_wmb(vq->weak_barriers);
+	vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
+						vq->packed.avail_used_flags);
+
+	/* We're using some buffers from the free list. */
+	vq->vq.num_free -= 1;
+
+	/* Update free pointer */
+	n = head + 1;
+	if (n >= vq->packed.vring.num) {
+		n = 0;
+		vq->packed.avail_wrap_counter ^= 1;
+		vq->packed.avail_used_flags ^=
+				1 << VRING_PACKED_DESC_F_AVAIL |
+				1 << VRING_PACKED_DESC_F_USED;
+	}
+	vq->packed.next_avail_idx = n;
+	vq->free_head = vq->packed.desc_state[id].next;
+
+	/* Store token and indirect buffer state. */
+	vq->packed.desc_state[id].num = 1;
+	vq->packed.desc_state[id].data = data;
+	vq->packed.desc_state[id].indir_desc = desc;
+	vq->packed.desc_state[id].last = id;
+
+	vq->num_added += 1;
+
+	pr_debug("Added buffer head %i to %p\n", head, vq);
+	END_USE(vq);
+
+	return 0;
+
+unmap_release:
+	err_idx = i;
+
+	for (i = 0; i < err_idx; i++)
+		vring_unmap_desc_packed(vq, &desc[i]);
+
+	kfree(desc);
+
+	END_USE(vq);
+	return -ENOMEM;
+}
+
+static inline int virtqueue_add_packed(struct virtqueue *_vq,
+				       struct scatterlist *sgs[],
+				       unsigned int total_sg,
+				       unsigned int out_sgs,
+				       unsigned int in_sgs,
+				       void *data,
+				       void *ctx,
+				       gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_packed_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i, n, c, descs_used, err_idx;
+	__le16 uninitialized_var(head_flags), flags;
+	u16 head, id, uninitialized_var(prev), curr, avail_used_flags;
+
+	START_USE(vq);
+
+	BUG_ON(data == NULL);
+	BUG_ON(ctx && vq->indirect);
+
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return -EIO;
+	}
+
+	LAST_ADD_TIME_UPDATE(vq);
+
+	BUG_ON(total_sg == 0);
+
+	if (virtqueue_use_indirect(_vq, total_sg))
+		return virtqueue_add_indirect_packed(vq, sgs, total_sg,
+				out_sgs, in_sgs, data, gfp);
+
+	head = vq->packed.next_avail_idx;
+	avail_used_flags = vq->packed.avail_used_flags;
+
+	WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
+
+	desc = vq->packed.vring.desc;
+	i = head;
+	descs_used = total_sg;
+
+	if (unlikely(vq->vq.num_free < descs_used)) {
+		pr_debug("Can't add buf len %i - avail = %i\n",
+			 descs_used, vq->vq.num_free);
+		END_USE(vq);
+		return -ENOSPC;
+	}
+
+	id = vq->free_head;
+	BUG_ON(id == vq->packed.vring.num);
+
+	curr = id;
+	c = 0;
+	for (n = 0; n < out_sgs + in_sgs; n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
+					DMA_TO_DEVICE : DMA_FROM_DEVICE);
+			if (vring_mapping_error(vq, addr))
+				goto unmap_release;
+
+			flags = cpu_to_le16(vq->packed.avail_used_flags |
+				    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
+				    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
+			if (i == head)
+				head_flags = flags;
+			else
+				desc[i].flags = flags;
+
+			desc[i].addr = cpu_to_le64(addr);
+			desc[i].len = cpu_to_le32(sg->length);
+			desc[i].id = cpu_to_le16(id);
+
+			if (unlikely(vq->use_dma_api)) {
+				vq->packed.desc_extra[curr].addr = addr;
+				vq->packed.desc_extra[curr].len = sg->length;
+				vq->packed.desc_extra[curr].flags =
+					le16_to_cpu(flags);
+			}
+			prev = curr;
+			curr = vq->packed.desc_state[curr].next;
+
+			if ((unlikely(++i >= vq->packed.vring.num))) {
+				i = 0;
+				vq->packed.avail_used_flags ^=
+					1 << VRING_PACKED_DESC_F_AVAIL |
+					1 << VRING_PACKED_DESC_F_USED;
+			}
+		}
+	}
+
+	if (i < head)
+		vq->packed.avail_wrap_counter ^= 1;
+
+	/* We're using some buffers from the free list. */
+	vq->vq.num_free -= descs_used;
+
+	/* Update free pointer */
+	vq->packed.next_avail_idx = i;
+	vq->free_head = curr;
+
+	/* Store token. */
+	vq->packed.desc_state[id].num = descs_used;
+	vq->packed.desc_state[id].data = data;
+	vq->packed.desc_state[id].indir_desc = ctx;
+	vq->packed.desc_state[id].last = prev;
+
+	/*
+	 * A driver MUST NOT make the first descriptor in the list
+	 * available before all subsequent descriptors comprising
+	 * the list are made available.
+	 */
+	virtio_wmb(vq->weak_barriers);
+	vq->packed.vring.desc[head].flags = head_flags;
+	vq->num_added += descs_used;
+
+	pr_debug("Added buffer head %i to %p\n", head, vq);
+	END_USE(vq);
+
+	return 0;
+
+unmap_release:
+	err_idx = i;
+	i = head;
+
+	vq->packed.avail_used_flags = avail_used_flags;
+
+	for (n = 0; n < total_sg; n++) {
+		if (i == err_idx)
+			break;
+		vring_unmap_desc_packed(vq, &desc[i]);
+		i++;
+		if (i >= vq->packed.vring.num)
+			i = 0;
+	}
+
+	END_USE(vq);
 	return -EIO;
 }
 
+static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 new, old, off_wrap, flags, wrap_counter, event_idx;
+	bool needs_kick;
+	union {
+		struct {
+			__le16 off_wrap;
+			__le16 flags;
+		};
+		u32 u32;
+	} snapshot;
+
+	START_USE(vq);
+
+	/*
+	 * We need to expose the new flags value before checking notification
+	 * suppressions.
+	 */
+	virtio_mb(vq->weak_barriers);
+
+	old = vq->packed.next_avail_idx - vq->num_added;
+	new = vq->packed.next_avail_idx;
+	vq->num_added = 0;
+
+	snapshot.u32 = *(u32 *)vq->packed.vring.device;
+	flags = le16_to_cpu(snapshot.flags);
+
+	LAST_ADD_TIME_CHECK(vq);
+	LAST_ADD_TIME_INVALID(vq);
+
+	if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
+		needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
+		goto out;
+	}
+
+	off_wrap = le16_to_cpu(snapshot.off_wrap);
+
+	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
+	event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
+	if (wrap_counter != vq->packed.avail_wrap_counter)
+		event_idx -= vq->packed.vring.num;
+
+	needs_kick = vring_need_event(event_idx, new, old);
+out:
+	END_USE(vq);
+	return needs_kick;
+}
+
+static void detach_buf_packed(struct vring_virtqueue *vq,
+			      unsigned int id, void **ctx)
+{
+	struct vring_desc_state_packed *state = NULL;
+	struct vring_packed_desc *desc;
+	unsigned int i, curr;
+
+	state = &vq->packed.desc_state[id];
+
+	/* Clear data ptr. */
+	state->data = NULL;
+
+	vq->packed.desc_state[state->last].next = vq->free_head;
+	vq->free_head = id;
+	vq->vq.num_free += state->num;
+
+	if (unlikely(vq->use_dma_api)) {
+		curr = id;
+		for (i = 0; i < state->num; i++) {
+			vring_unmap_state_packed(vq,
+				&vq->packed.desc_extra[curr]);
+			curr = vq->packed.desc_state[curr].next;
+		}
+	}
+
+	if (vq->indirect) {
+		u32 len;
+
+		/* Free the indirect table, if any, now that it's unmapped. */
+		desc = state->indir_desc;
+		if (!desc)
+			return;
+
+		if (vq->use_dma_api) {
+			len = vq->packed.desc_extra[id].len;
+			for (i = 0; i < len / sizeof(struct vring_packed_desc);
+					i++)
+				vring_unmap_desc_packed(vq, &desc[i]);
+		}
+		kfree(desc);
+		state->indir_desc = NULL;
+	} else if (ctx) {
+		*ctx = state->indir_desc;
+	}
+}
+
+static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
+				       u16 idx, bool used_wrap_counter)
+{
+	bool avail, used;
+	u16 flags;
+
+	flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
+	avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
+	used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
+
+	return avail == used && used == used_wrap_counter;
+}
+
+static inline bool more_used_packed(const struct vring_virtqueue *vq)
+{
+	return is_used_desc_packed(vq, vq->last_used_idx,
+			vq->packed.used_wrap_counter);
+}
+
+static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
+					  unsigned int *len,
+					  void **ctx)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 last_used, id;
+	void *ret;
+
+	START_USE(vq);
+
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return NULL;
+	}
+
+	if (!more_used_packed(vq)) {
+		pr_debug("No more buffers in queue\n");
+		END_USE(vq);
+		return NULL;
+	}
+
+	/* Only get used elements after they have been exposed by host. */
+	virtio_rmb(vq->weak_barriers);
+
+	last_used = vq->last_used_idx;
+	id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
+	*len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
+
+	if (unlikely(id >= vq->packed.vring.num)) {
+		BAD_RING(vq, "id %u out of range\n", id);
+		return NULL;
+	}
+	if (unlikely(!vq->packed.desc_state[id].data)) {
+		BAD_RING(vq, "id %u is not a head!\n", id);
+		return NULL;
+	}
+
+	/* detach_buf_packed clears data, so grab it now. */
+	ret = vq->packed.desc_state[id].data;
+	detach_buf_packed(vq, id, ctx);
+
+	vq->last_used_idx += vq->packed.desc_state[id].num;
+	if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
+		vq->last_used_idx -= vq->packed.vring.num;
+		vq->packed.used_wrap_counter ^= 1;
+	}
+
+	/*
+	 * If we expect an interrupt for the next entry, tell host
+	 * by writing event index and flush out the write before
+	 * the read in the next get_buf call.
+	 */
+	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
+		virtio_store_mb(vq->weak_barriers,
+				&vq->packed.vring.driver->off_wrap,
+				cpu_to_le16(vq->last_used_idx |
+					(vq->packed.used_wrap_counter <<
+					 VRING_PACKED_EVENT_F_WRAP_CTR)));
+
+	LAST_ADD_TIME_INVALID(vq);
+
+	END_USE(vq);
+	return ret;
+}
+
+static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
+		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
+		vq->packed.vring.driver->flags =
+			cpu_to_le16(vq->packed.event_flags_shadow);
+	}
+}
+
+static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	START_USE(vq);
+
+	/*
+	 * We optimistically turn back on interrupts, then check if there was
+	 * more to do.
+	 */
+
+	if (vq->event) {
+		vq->packed.vring.driver->off_wrap =
+			cpu_to_le16(vq->last_used_idx |
+				(vq->packed.used_wrap_counter <<
+				 VRING_PACKED_EVENT_F_WRAP_CTR));
+		/*
+		 * We need to update event offset and event wrap
+		 * counter first before updating event flags.
+		 */
+		virtio_wmb(vq->weak_barriers);
+	}
+
+	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
+		vq->packed.event_flags_shadow = vq->event ?
+				VRING_PACKED_EVENT_FLAG_DESC :
+				VRING_PACKED_EVENT_FLAG_ENABLE;
+		vq->packed.vring.driver->flags =
+				cpu_to_le16(vq->packed.event_flags_shadow);
+	}
+
+	END_USE(vq);
+	return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<
+			VRING_PACKED_EVENT_F_WRAP_CTR);
+}
+
+static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	bool wrap_counter;
+	u16 used_idx;
+
+	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
+	used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
+
+	return is_used_desc_packed(vq, used_idx, wrap_counter);
+}
+
+static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 used_idx, wrap_counter;
+	u16 bufs;
+
+	START_USE(vq);
+
+	/*
+	 * We optimistically turn back on interrupts, then check if there was
+	 * more to do.
+	 */
+
+	if (vq->event) {
+		/* TODO: tune this threshold */
+		bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
+		wrap_counter = vq->packed.used_wrap_counter;
+
+		used_idx = vq->last_used_idx + bufs;
+		if (used_idx >= vq->packed.vring.num) {
+			used_idx -= vq->packed.vring.num;
+			wrap_counter ^= 1;
+		}
+
+		vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
+			(wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
+
+		/*
+		 * We need to update event offset and event wrap
+		 * counter first before updating event flags.
+		 */
+		virtio_wmb(vq->weak_barriers);
+	}
+
+	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
+		vq->packed.event_flags_shadow = vq->event ?
+				VRING_PACKED_EVENT_FLAG_DESC :
+				VRING_PACKED_EVENT_FLAG_ENABLE;
+		vq->packed.vring.driver->flags =
+				cpu_to_le16(vq->packed.event_flags_shadow);
+	}
+
+	/*
+	 * We need to update event suppression structure first
+	 * before re-checking for more used buffers.
+	 */
+	virtio_mb(vq->weak_barriers);
+
+	if (is_used_desc_packed(vq,
+				vq->last_used_idx,
+				vq->packed.used_wrap_counter)) {
+		END_USE(vq);
+		return false;
+	}
+
+	END_USE(vq);
+	return true;
+}
+
+static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	unsigned int i;
+	void *buf;
+
+	START_USE(vq);
+
+	for (i = 0; i < vq->packed.vring.num; i++) {
+		if (!vq->packed.desc_state[i].data)
+			continue;
+		/* detach_buf clears data, so grab it now. */
+		buf = vq->packed.desc_state[i].data;
+		detach_buf_packed(vq, i, NULL);
+		END_USE(vq);
+		return buf;
+	}
+	/* That should have freed everything. */
+	BUG_ON(vq->vq.num_free != vq->packed.vring.num);
+
+	END_USE(vq);
+	return NULL;
+}
+
+static struct virtqueue *vring_create_virtqueue_packed(
+	unsigned int index,
+	unsigned int num,
+	unsigned int vring_align,
+	struct virtio_device *vdev,
+	bool weak_barriers,
+	bool may_reduce_num,
+	bool context,
+	bool (*notify)(struct virtqueue *),
+	void (*callback)(struct virtqueue *),
+	const char *name)
+{
+	struct vring_virtqueue *vq;
+	struct vring_packed_desc *ring;
+	struct vring_packed_desc_event *driver, *device;
+	dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
+	size_t ring_size_in_bytes, event_size_in_bytes;
+	unsigned int i;
+
+	ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
+
+	ring = vring_alloc_queue(vdev, ring_size_in_bytes,
+				 &ring_dma_addr,
+				 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
+	if (!ring)
+		goto err_ring;
+
+	event_size_in_bytes = sizeof(struct vring_packed_desc_event);
+
+	driver = vring_alloc_queue(vdev, event_size_in_bytes,
+				   &driver_event_dma_addr,
+				   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
+	if (!driver)
+		goto err_driver;
+
+	device = vring_alloc_queue(vdev, event_size_in_bytes,
+				   &device_event_dma_addr,
+				   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
+	if (!device)
+		goto err_device;
+
+	vq = kmalloc(sizeof(*vq), GFP_KERNEL);
+	if (!vq)
+		goto err_vq;
+
+	vq->vq.callback = callback;
+	vq->vq.vdev = vdev;
+	vq->vq.name = name;
+	vq->vq.num_free = num;
+	vq->vq.index = index;
+	vq->we_own_ring = true;
+	vq->notify = notify;
+	vq->weak_barriers = weak_barriers;
+	vq->broken = false;
+	vq->last_used_idx = 0;
+	vq->num_added = 0;
+	vq->packed_ring = true;
+	vq->use_dma_api = vring_use_dma_api(vdev);
+	list_add_tail(&vq->vq.list, &vdev->vqs);
+#ifdef DEBUG
+	vq->in_use = false;
+	vq->last_add_time_valid = false;
+#endif
+
+	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
+		!context;
+	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
+
+	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
+		vq->weak_barriers = false;
+
+	vq->packed.ring_dma_addr = ring_dma_addr;
+	vq->packed.driver_event_dma_addr = driver_event_dma_addr;
+	vq->packed.device_event_dma_addr = device_event_dma_addr;
+
+	vq->packed.ring_size_in_bytes = ring_size_in_bytes;
+	vq->packed.event_size_in_bytes = event_size_in_bytes;
+
+	vq->packed.vring.num = num;
+	vq->packed.vring.desc = ring;
+	vq->packed.vring.driver = driver;
+	vq->packed.vring.device = device;
+
+	vq->packed.next_avail_idx = 0;
+	vq->packed.avail_wrap_counter = 1;
+	vq->packed.used_wrap_counter = 1;
+	vq->packed.event_flags_shadow = 0;
+	vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
+
+	vq->packed.desc_state = kmalloc_array(num,
+			sizeof(struct vring_desc_state_packed),
+			GFP_KERNEL);
+	if (!vq->packed.desc_state)
+		goto err_desc_state;
+
+	memset(vq->packed.desc_state, 0,
+		num * sizeof(struct vring_desc_state_packed));
+
+	/* Put everything in free lists. */
+	vq->free_head = 0;
+	for (i = 0; i < num-1; i++)
+		vq->packed.desc_state[i].next = i + 1;
+
+	vq->packed.desc_extra = kmalloc_array(num,
+			sizeof(struct vring_desc_extra_packed),
+			GFP_KERNEL);
+	if (!vq->packed.desc_extra)
+		goto err_desc_extra;
+
+	memset(vq->packed.desc_extra, 0,
+		num * sizeof(struct vring_desc_extra_packed));
+
+	/* No callback?  Tell other side not to bother us. */
+	if (!callback) {
+		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
+		vq->packed.vring.driver->flags =
+			cpu_to_le16(vq->packed.event_flags_shadow);
+	}
+
+	return &vq->vq;
+
+err_desc_extra:
+	kfree(vq->packed.desc_state);
+err_desc_state:
+	kfree(vq);
+err_vq:
+	vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr);
+err_device:
+	vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr);
+err_driver:
+	vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
+err_ring:
+	return NULL;
+}
+
+
+/*
+ * Generic functions and exported symbols.
+ */
+
+static inline int virtqueue_add(struct virtqueue *_vq,
+				struct scatterlist *sgs[],
+				unsigned int total_sg,
+				unsigned int out_sgs,
+				unsigned int in_sgs,
+				void *data,
+				void *ctx,
+				gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
+					out_sgs, in_sgs, data, ctx, gfp) :
+				 virtqueue_add_split(_vq, sgs, total_sg,
+					out_sgs, in_sgs, data, ctx, gfp);
+}
+
 /**
  * virtqueue_add_sgs - expose buffers to other end
- * @vq: the struct virtqueue we're talking about.
+ * @_vq: the struct virtqueue we're talking about.
  * @sgs: array of terminated scatterlists.
- * @out_num: the number of scatterlists readable by other side
- * @in_num: the number of scatterlists which are writable (after readable ones)
+ * @out_sgs: the number of scatterlists readable by other side
+ * @in_sgs: the number of scatterlists which are writable (after readable ones)
  * @data: the token identifying the buffer.
  * @gfp: how to do memory allocations (if necessary).
  *
@@ -460,6 +1733,7 @@
 	/* Count them first. */
 	for (i = 0; i < out_sgs + in_sgs; i++) {
 		struct scatterlist *sg;
+
 		for (sg = sgs[i]; sg; sg = sg_next(sg))
 			total_sg++;
 	}
@@ -538,7 +1812,7 @@
 
 /**
  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
- * @vq: the struct virtqueue
+ * @_vq: the struct virtqueue
  *
  * Instead of virtqueue_kick(), you can do:
  *	if (virtqueue_kick_prepare(vq))
@@ -550,40 +1824,15 @@
 bool virtqueue_kick_prepare(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
-	u16 new, old;
-	bool needs_kick;
 
-	START_USE(vq);
-	/* We need to expose available array entries before checking avail
-	 * event. */
-	virtio_mb(vq->weak_barriers);
-
-	old = vq->avail_idx_shadow - vq->num_added;
-	new = vq->avail_idx_shadow;
-	vq->num_added = 0;
-
-#ifdef DEBUG
-	if (vq->last_add_time_valid) {
-		WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
-					      vq->last_add_time)) > 100);
-	}
-	vq->last_add_time_valid = false;
-#endif
-
-	if (vq->event) {
-		needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, vring_avail_event(&vq->vring)),
-					      new, old);
-	} else {
-		needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY));
-	}
-	END_USE(vq);
-	return needs_kick;
+	return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
+				 virtqueue_kick_prepare_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
 
 /**
  * virtqueue_notify - second half of split virtqueue_kick call.
- * @vq: the struct virtqueue
+ * @_vq: the struct virtqueue
  *
  * This does not need to be serialized.
  *
@@ -625,64 +1874,11 @@
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick);
 
-static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
-		       void **ctx)
-{
-	unsigned int i, j;
-	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
-
-	/* Clear data ptr. */
-	vq->desc_state[head].data = NULL;
-
-	/* Put back on free list: unmap first-level descriptors and find end */
-	i = head;
-
-	while (vq->vring.desc[i].flags & nextflag) {
-		vring_unmap_one(vq, &vq->vring.desc[i]);
-		i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
-		vq->vq.num_free++;
-	}
-
-	vring_unmap_one(vq, &vq->vring.desc[i]);
-	vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
-	vq->free_head = head;
-
-	/* Plus final descriptor */
-	vq->vq.num_free++;
-
-	if (vq->indirect) {
-		struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
-		u32 len;
-
-		/* Free the indirect table, if any, now that it's unmapped. */
-		if (!indir_desc)
-			return;
-
-		len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
-
-		BUG_ON(!(vq->vring.desc[head].flags &
-			 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
-		BUG_ON(len == 0 || len % sizeof(struct vring_desc));
-
-		for (j = 0; j < len / sizeof(struct vring_desc); j++)
-			vring_unmap_one(vq, &indir_desc[j]);
-
-		kfree(indir_desc);
-		vq->desc_state[head].indir_desc = NULL;
-	} else if (ctx) {
-		*ctx = vq->desc_state[head].indir_desc;
-	}
-}
-
-static inline bool more_used(const struct vring_virtqueue *vq)
-{
-	return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx);
-}
-
 /**
  * virtqueue_get_buf - get the next used buffer
- * @vq: the struct virtqueue we're talking about.
+ * @_vq: the struct virtqueue we're talking about.
  * @len: the length written into the buffer
+ * @ctx: extra context for the token
  *
  * If the device wrote data into the buffer, @len will be set to the
  * amount written.  This means you don't need to clear the buffer
@@ -699,57 +1895,9 @@
 			    void **ctx)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
-	void *ret;
-	unsigned int i;
-	u16 last_used;
 
-	START_USE(vq);
-
-	if (unlikely(vq->broken)) {
-		END_USE(vq);
-		return NULL;
-	}
-
-	if (!more_used(vq)) {
-		pr_debug("No more buffers in queue\n");
-		END_USE(vq);
-		return NULL;
-	}
-
-	/* Only get used array entries after they have been exposed by host. */
-	virtio_rmb(vq->weak_barriers);
-
-	last_used = (vq->last_used_idx & (vq->vring.num - 1));
-	i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id);
-	*len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len);
-
-	if (unlikely(i >= vq->vring.num)) {
-		BAD_RING(vq, "id %u out of range\n", i);
-		return NULL;
-	}
-	if (unlikely(!vq->desc_state[i].data)) {
-		BAD_RING(vq, "id %u is not a head!\n", i);
-		return NULL;
-	}
-
-	/* detach_buf clears data, so grab it now. */
-	ret = vq->desc_state[i].data;
-	detach_buf(vq, i, ctx);
-	vq->last_used_idx++;
-	/* If we expect an interrupt for the next entry, tell host
-	 * by writing event index and flush out the write before
-	 * the read in the next get_buf call. */
-	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
-		virtio_store_mb(vq->weak_barriers,
-				&vring_used_event(&vq->vring),
-				cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
-
-#ifdef DEBUG
-	vq->last_add_time_valid = false;
-#endif
-
-	END_USE(vq);
-	return ret;
+	return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
+				 virtqueue_get_buf_ctx_split(_vq, len, ctx);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
 
@@ -760,7 +1908,7 @@
 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
 /**
  * virtqueue_disable_cb - disable callbacks
- * @vq: the struct virtqueue we're talking about.
+ * @_vq: the struct virtqueue we're talking about.
  *
  * Note that this is not necessarily synchronous, hence unreliable and only
  * useful as an optimization.
@@ -771,18 +1919,16 @@
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
-		vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
-		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
-	}
-
+	if (vq->packed_ring)
+		virtqueue_disable_cb_packed(_vq);
+	else
+		virtqueue_disable_cb_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
 
 /**
  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
- * @vq: the struct virtqueue we're talking about.
+ * @_vq: the struct virtqueue we're talking about.
  *
  * This re-enables callbacks; it returns current queue state
  * in an opaque unsigned value. This value should be later tested by
@@ -795,29 +1941,15 @@
 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
-	u16 last_used_idx;
 
-	START_USE(vq);
-
-	/* We optimistically turn back on interrupts, then check if there was
-	 * more to do. */
-	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
-	 * either clear the flags bit or point the event index at the next
-	 * entry. Always do both to keep code simple. */
-	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
-		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
-		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
-	}
-	vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
-	END_USE(vq);
-	return last_used_idx;
+	return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
+				 virtqueue_enable_cb_prepare_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
 
 /**
  * virtqueue_poll - query pending used buffers
- * @vq: the struct virtqueue we're talking about.
+ * @_vq: the struct virtqueue we're talking about.
  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
  *
  * Returns "true" if there are pending used buffers in the queue.
@@ -829,13 +1961,14 @@
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
 	virtio_mb(vq->weak_barriers);
-	return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->vring.used->idx);
+	return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
+				 virtqueue_poll_split(_vq, last_used_idx);
 }
 EXPORT_SYMBOL_GPL(virtqueue_poll);
 
 /**
  * virtqueue_enable_cb - restart callbacks after disable_cb.
- * @vq: the struct virtqueue we're talking about.
+ * @_vq: the struct virtqueue we're talking about.
  *
  * This re-enables callbacks; it returns "false" if there are pending
  * buffers in the queue, to detect a possible race between the driver
@@ -847,13 +1980,14 @@
 bool virtqueue_enable_cb(struct virtqueue *_vq)
 {
 	unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
+
 	return !virtqueue_poll(_vq, last_used_idx);
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
 
 /**
  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
- * @vq: the struct virtqueue we're talking about.
+ * @_vq: the struct virtqueue we're talking about.
  *
  * This re-enables callbacks but hints to the other side to delay
  * interrupts until most of the available buffers have been processed;
@@ -867,40 +2001,15 @@
 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
-	u16 bufs;
 
-	START_USE(vq);
-
-	/* We optimistically turn back on interrupts, then check if there was
-	 * more to do. */
-	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
-	 * either clear the flags bit or point the event index at the next
-	 * entry. Always update the event index to keep code simple. */
-	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
-		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
-		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
-	}
-	/* TODO: tune this threshold */
-	bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
-
-	virtio_store_mb(vq->weak_barriers,
-			&vring_used_event(&vq->vring),
-			cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
-
-	if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) {
-		END_USE(vq);
-		return false;
-	}
-
-	END_USE(vq);
-	return true;
+	return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
+				 virtqueue_enable_cb_delayed_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
 
 /**
  * virtqueue_detach_unused_buf - detach first unused buffer
- * @vq: the struct virtqueue we're talking about.
+ * @_vq: the struct virtqueue we're talking about.
  *
  * Returns NULL or the "data" token handed to virtqueue_add_*().
  * This is not valid on an active queue; it is useful only for device
@@ -909,30 +2018,17 @@
 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
-	unsigned int i;
-	void *buf;
 
-	START_USE(vq);
-
-	for (i = 0; i < vq->vring.num; i++) {
-		if (!vq->desc_state[i].data)
-			continue;
-		/* detach_buf clears data, so grab it now. */
-		buf = vq->desc_state[i].data;
-		detach_buf(vq, i, NULL);
-		vq->avail_idx_shadow--;
-		vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
-		END_USE(vq);
-		return buf;
-	}
-	/* That should have freed everything. */
-	BUG_ON(vq->vq.num_free != vq->vring.num);
-
-	END_USE(vq);
-	return NULL;
+	return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
+				 virtqueue_detach_unused_buf_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
 
+static inline bool more_used(const struct vring_virtqueue *vq)
+{
+	return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
+}
+
 irqreturn_t vring_interrupt(int irq, void *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -953,6 +2049,7 @@
 }
 EXPORT_SYMBOL_GPL(vring_interrupt);
 
+/* Only available for split ring */
 struct virtqueue *__vring_new_virtqueue(unsigned int index,
 					struct vring vring,
 					struct virtio_device *vdev,
@@ -965,27 +2062,26 @@
 	unsigned int i;
 	struct vring_virtqueue *vq;
 
-	vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state),
-		     GFP_KERNEL);
+	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
+		return NULL;
+
+	vq = kmalloc(sizeof(*vq), GFP_KERNEL);
 	if (!vq)
 		return NULL;
 
-	vq->vring = vring;
+	vq->packed_ring = false;
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.name = name;
 	vq->vq.num_free = vring.num;
 	vq->vq.index = index;
 	vq->we_own_ring = false;
-	vq->queue_dma_addr = 0;
-	vq->queue_size_in_bytes = 0;
 	vq->notify = notify;
 	vq->weak_barriers = weak_barriers;
 	vq->broken = false;
 	vq->last_used_idx = 0;
-	vq->avail_flags_shadow = 0;
-	vq->avail_idx_shadow = 0;
 	vq->num_added = 0;
+	vq->use_dma_api = vring_use_dma_api(vdev);
 	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
 	vq->in_use = false;
@@ -996,65 +2092,42 @@
 		!context;
 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
 
+	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
+		vq->weak_barriers = false;
+
+	vq->split.queue_dma_addr = 0;
+	vq->split.queue_size_in_bytes = 0;
+
+	vq->split.vring = vring;
+	vq->split.avail_flags_shadow = 0;
+	vq->split.avail_idx_shadow = 0;
+
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback) {
-		vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+		vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
 		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
+			vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
+					vq->split.avail_flags_shadow);
+	}
+
+	vq->split.desc_state = kmalloc_array(vring.num,
+			sizeof(struct vring_desc_state_split), GFP_KERNEL);
+	if (!vq->split.desc_state) {
+		kfree(vq);
+		return NULL;
 	}
 
 	/* Put everything in free lists. */
 	vq->free_head = 0;
 	for (i = 0; i < vring.num-1; i++)
-		vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
-	memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state));
+		vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
+	memset(vq->split.desc_state, 0, vring.num *
+			sizeof(struct vring_desc_state_split));
 
 	return &vq->vq;
 }
 EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
 
-static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
-			      dma_addr_t *dma_handle, gfp_t flag)
-{
-	if (vring_use_dma_api(vdev)) {
-		return dma_alloc_coherent(vdev->dev.parent, size,
-					  dma_handle, flag);
-	} else {
-		void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
-		if (queue) {
-			phys_addr_t phys_addr = virt_to_phys(queue);
-			*dma_handle = (dma_addr_t)phys_addr;
-
-			/*
-			 * Sanity check: make sure we dind't truncate
-			 * the address.  The only arches I can find that
-			 * have 64-bit phys_addr_t but 32-bit dma_addr_t
-			 * are certain non-highmem MIPS and x86
-			 * configurations, but these configurations
-			 * should never allocate physical pages above 32
-			 * bits, so this is fine.  Just in case, throw a
-			 * warning and abort if we end up with an
-			 * unrepresentable address.
-			 */
-			if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
-				free_pages_exact(queue, PAGE_ALIGN(size));
-				return NULL;
-			}
-		}
-		return queue;
-	}
-}
-
-static void vring_free_queue(struct virtio_device *vdev, size_t size,
-			     void *queue, dma_addr_t dma_handle)
-{
-	if (vring_use_dma_api(vdev)) {
-		dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
-	} else {
-		free_pages_exact(queue, PAGE_ALIGN(size));
-	}
-}
-
 struct virtqueue *vring_create_virtqueue(
 	unsigned int index,
 	unsigned int num,
@@ -1067,57 +2140,19 @@
 	void (*callback)(struct virtqueue *),
 	const char *name)
 {
-	struct virtqueue *vq;
-	void *queue = NULL;
-	dma_addr_t dma_addr;
-	size_t queue_size_in_bytes;
-	struct vring vring;
 
-	/* We assume num is a power of 2. */
-	if (num & (num - 1)) {
-		dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
-		return NULL;
-	}
+	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
+		return vring_create_virtqueue_packed(index, num, vring_align,
+				vdev, weak_barriers, may_reduce_num,
+				context, notify, callback, name);
 
-	/* TODO: allocate each queue chunk individually */
-	for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
-		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
-					  &dma_addr,
-					  GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
-		if (queue)
-			break;
-	}
-
-	if (!num)
-		return NULL;
-
-	if (!queue) {
-		/* Try to get a single page. You are my only hope! */
-		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
-					  &dma_addr, GFP_KERNEL|__GFP_ZERO);
-	}
-	if (!queue)
-		return NULL;
-
-	queue_size_in_bytes = vring_size(num, vring_align);
-	vring_init(&vring, num, queue, vring_align);
-
-	vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
-				   notify, callback, name);
-	if (!vq) {
-		vring_free_queue(vdev, queue_size_in_bytes, queue,
-				 dma_addr);
-		return NULL;
-	}
-
-	to_vvq(vq)->queue_dma_addr = dma_addr;
-	to_vvq(vq)->queue_size_in_bytes = queue_size_in_bytes;
-	to_vvq(vq)->we_own_ring = true;
-
-	return vq;
+	return vring_create_virtqueue_split(index, num, vring_align,
+			vdev, weak_barriers, may_reduce_num,
+			context, notify, callback, name);
 }
 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
 
+/* Only available for split ring */
 struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      unsigned int num,
 				      unsigned int vring_align,
@@ -1130,6 +2165,10 @@
 				      const char *name)
 {
 	struct vring vring;
+
+	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
+		return NULL;
+
 	vring_init(&vring, num, pages, vring_align);
 	return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
 				     notify, callback, name);
@@ -1141,8 +2180,32 @@
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
 	if (vq->we_own_ring) {
-		vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
-				 vq->vring.desc, vq->queue_dma_addr);
+		if (vq->packed_ring) {
+			vring_free_queue(vq->vq.vdev,
+					 vq->packed.ring_size_in_bytes,
+					 vq->packed.vring.desc,
+					 vq->packed.ring_dma_addr);
+
+			vring_free_queue(vq->vq.vdev,
+					 vq->packed.event_size_in_bytes,
+					 vq->packed.vring.driver,
+					 vq->packed.driver_event_dma_addr);
+
+			vring_free_queue(vq->vq.vdev,
+					 vq->packed.event_size_in_bytes,
+					 vq->packed.vring.device,
+					 vq->packed.device_event_dma_addr);
+
+			kfree(vq->packed.desc_state);
+			kfree(vq->packed.desc_extra);
+		} else {
+			vring_free_queue(vq->vq.vdev,
+					 vq->split.queue_size_in_bytes,
+					 vq->split.vring.desc,
+					 vq->split.queue_dma_addr);
+
+			kfree(vq->split.desc_state);
+		}
 	}
 	list_del(&_vq->list);
 	kfree(vq);
@@ -1164,6 +2227,10 @@
 			break;
 		case VIRTIO_F_IOMMU_PLATFORM:
 			break;
+		case VIRTIO_F_RING_PACKED:
+			break;
+		case VIRTIO_F_ORDER_PLATFORM:
+			break;
 		default:
 			/* We don't understand this bit. */
 			__virtio_clear_bit(vdev, i);
@@ -1174,7 +2241,7 @@
 
 /**
  * virtqueue_get_vring_size - return the size of the virtqueue's vring
- * @vq: the struct virtqueue containing the vring of interest.
+ * @_vq: the struct virtqueue containing the vring of interest.
  *
  * Returns the size of the vring.  This is mainly used for boasting to
  * userspace.  Unlike other operations, this need not be serialized.
@@ -1184,7 +2251,7 @@
 
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	return vq->vring.num;
+	return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
 
@@ -1217,7 +2284,10 @@
 
 	BUG_ON(!vq->we_own_ring);
 
-	return vq->queue_dma_addr;
+	if (vq->packed_ring)
+		return vq->packed.ring_dma_addr;
+
+	return vq->split.queue_dma_addr;
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
 
@@ -1227,8 +2297,11 @@
 
 	BUG_ON(!vq->we_own_ring);
 
-	return vq->queue_dma_addr +
-		((char *)vq->vring.avail - (char *)vq->vring.desc);
+	if (vq->packed_ring)
+		return vq->packed.driver_event_dma_addr;
+
+	return vq->split.queue_dma_addr +
+		((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
 
@@ -1238,14 +2311,18 @@
 
 	BUG_ON(!vq->we_own_ring);
 
-	return vq->queue_dma_addr +
-		((char *)vq->vring.used - (char *)vq->vring.desc);
+	if (vq->packed_ring)
+		return vq->packed.device_event_dma_addr;
+
+	return vq->split.queue_dma_addr +
+		((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
 
+/* Only available for split ring */
 const struct vring *virtqueue_get_vring(struct virtqueue *vq)
 {
-	return &to_vvq(vq)->vring;
+	return &to_vvq(vq)->split.vring;
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
commit	0f672f6c0b52b7b0700b0915c72b540721af4465	[log] [tgz]
author	David Brazdil <dbrazdil@google.com>	Tue Dec 10 10:32:29 2019 +0000
committer	David Brazdil <dbrazdil@google.com>	Tue Dec 10 19:03:18 2019 +0000
tree	85c8cba019caa205e4f8920d72d93f6d6deaf29c
parent	3a0ad55d848b50499b68d7141d4eca997fce28ef [diff]