Blame - drivers/net/xen-netback/netback.c - hafnium/third_party/linux.git

blob: 0020b2e8c279d40059f470a68fb2f0d8d1599cfb [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1	/*
				2	* Back-end of the driver for virtual network devices. This portion of the
				3	* driver exports a 'unified' network-device interface that can be accessed
				4	* by any operating system that implements a compatible front end. A
				5	* reference front-end implementation can be found in:
				6	* drivers/net/xen-netfront.c
				7	*
				8	* Copyright (c) 2002-2005, K A Fraser
				9	*
				10	* This program is free software; you can redistribute it and/or
				11	* modify it under the terms of the GNU General Public License version 2
				12	* as published by the Free Software Foundation; or, when distributed
				13	* separately from the Linux kernel or incorporated into other
				14	* software packages, subject to the following license:
				15	*
				16	* Permission is hereby granted, free of charge, to any person obtaining a copy
				17	* of this source file (the "Software"), to deal in the Software without
				18	* restriction, including without limitation the rights to use, copy, modify,
				19	* merge, publish, distribute, sublicense, and/or sell copies of the Software,
				20	* and to permit persons to whom the Software is furnished to do so, subject to
				21	* the following conditions:
				22	*
				23	* The above copyright notice and this permission notice shall be included in
				24	* all copies or substantial portions of the Software.
				25	*
				26	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				27	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				28	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				29	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				30	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				31	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				32	* IN THE SOFTWARE.
				33	*/
				34
				35	#include "common.h"
				36
				37	#include <linux/kthread.h>
				38	#include <linux/if_vlan.h>
				39	#include <linux/udp.h>
				40	#include <linux/highmem.h>
				41
				42	#include <net/tcp.h>
				43
				44	#include <xen/xen.h>
				45	#include <xen/events.h>
				46	#include <xen/interface/memory.h>
				47	#include <xen/page.h>
				48
				49	#include <asm/xen/hypercall.h>
				50
				51	/* Provide an option to disable split event channels at load time as
				52	* event channels are limited resource. Split event channels are
				53	* enabled by default.
				54	*/
				55	bool separate_tx_rx_irq = true;
				56	module_param(separate_tx_rx_irq, bool, 0644);
				57
				58	/* The time that packets can stay on the guest Rx internal queue
				59	* before they are dropped.
				60	*/
				61	unsigned int rx_drain_timeout_msecs = 10000;
				62	module_param(rx_drain_timeout_msecs, uint, 0444);
				63
				64	/* The length of time before the frontend is considered unresponsive
				65	* because it isn't providing Rx slots.
				66	*/
				67	unsigned int rx_stall_timeout_msecs = 60000;
				68	module_param(rx_stall_timeout_msecs, uint, 0444);
				69
				70	#define MAX_QUEUES_DEFAULT 8
				71	unsigned int xenvif_max_queues;
				72	module_param_named(max_queues, xenvif_max_queues, uint, 0644);
				73	MODULE_PARM_DESC(max_queues,
				74	"Maximum number of queues per virtual interface");
				75
				76	/*
				77	* This is the maximum slots a skb can have. If a guest sends a skb
				78	* which exceeds this limit it is considered malicious.
				79	*/
				80	#define FATAL_SKB_SLOTS_DEFAULT 20
				81	static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
				82	module_param(fatal_skb_slots, uint, 0444);
				83
				84	/* The amount to copy out of the first guest Tx slot into the skb's
				85	* linear area. If the first slot has more data, it will be mapped
				86	* and put into the first frag.
				87	*
				88	* This is sized to avoid pulling headers from the frags for most
				89	* TCP/IP packets.
				90	*/
				91	#define XEN_NETBACK_TX_COPY_LEN 128
				92
				93	/* This is the maximum number of flows in the hash cache. */
				94	#define XENVIF_HASH_CACHE_SIZE_DEFAULT 64
				95	unsigned int xenvif_hash_cache_size = XENVIF_HASH_CACHE_SIZE_DEFAULT;
				96	module_param_named(hash_cache_size, xenvif_hash_cache_size, uint, 0644);
				97	MODULE_PARM_DESC(hash_cache_size, "Number of flows in the hash cache");
				98
				99	static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
				100	u8 status);
				101
				102	static void make_tx_response(struct xenvif_queue *queue,
				103	struct xen_netif_tx_request *txp,
				104	unsigned int extra_count,
				105	s8 st);
				106	static void push_tx_responses(struct xenvif_queue *queue);
				107
				108	static inline int tx_work_todo(struct xenvif_queue *queue);
				109
				110	static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
				111	u16 idx)
				112	{
				113	return page_to_pfn(queue->mmap_pages[idx]);
				114	}
				115
				116	static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
				117	u16 idx)
				118	{
				119	return (unsigned long)pfn_to_kaddr(idx_to_pfn(queue, idx));
				120	}
				121
				122	#define callback_param(vif, pending_idx) \
				123	(vif->pending_tx_info[pending_idx].callback_struct)
				124
				125	/* Find the containing VIF's structure from a pointer in pending_tx_info array
				126	*/
				127	static inline struct xenvif_queue ubuf_to_queue(const struct ubuf_info ubuf)
				128	{
				129	u16 pending_idx = ubuf->desc;
				130	struct pending_tx_info *temp =
				131	container_of(ubuf, struct pending_tx_info, callback_struct);
				132	return container_of(temp - pending_idx,
				133	struct xenvif_queue,
				134	pending_tx_info[0]);
				135	}
				136
				137	static u16 frag_get_pending_idx(skb_frag_t *frag)
				138	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	139	return (u16)skb_frag_off(frag);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	140	}
				141
				142	static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
				143	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	144	skb_frag_off_set(frag, pending_idx);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	145	}
				146
				147	static inline pending_ring_idx_t pending_index(unsigned i)
				148	{
				149	return i & (MAX_PENDING_REQS-1);
				150	}
				151
				152	void xenvif_kick_thread(struct xenvif_queue *queue)
				153	{
				154	wake_up(&queue->wq);
				155	}
				156
				157	void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
				158	{
				159	int more_to_do;
				160
				161	RING_FINAL_CHECK_FOR_REQUESTS(&queue->tx, more_to_do);
				162
				163	if (more_to_do)
				164	napi_schedule(&queue->napi);
				165	}
				166
				167	static void tx_add_credit(struct xenvif_queue *queue)
				168	{
				169	unsigned long max_burst, max_credit;
				170
				171	/*
				172	* Allow a burst big enough to transmit a jumbo packet of up to 128kB.
				173	* Otherwise the interface can seize up due to insufficient credit.
				174	*/
				175	max_burst = max(131072UL, queue->credit_bytes);
				176
				177	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
				178	max_credit = queue->remaining_credit + queue->credit_bytes;
				179	if (max_credit < queue->remaining_credit)
				180	max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
				181
				182	queue->remaining_credit = min(max_credit, max_burst);
				183	queue->rate_limited = false;
				184	}
				185
				186	void xenvif_tx_credit_callback(struct timer_list *t)
				187	{
				188	struct xenvif_queue *queue = from_timer(queue, t, credit_timeout);
				189	tx_add_credit(queue);
				190	xenvif_napi_schedule_or_enable_events(queue);
				191	}
				192
				193	static void xenvif_tx_err(struct xenvif_queue *queue,
				194	struct xen_netif_tx_request *txp,
				195	unsigned int extra_count, RING_IDX end)
				196	{
				197	RING_IDX cons = queue->tx.req_cons;
				198	unsigned long flags;
				199
				200	do {
				201	spin_lock_irqsave(&queue->response_lock, flags);
				202	make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR);
				203	push_tx_responses(queue);
				204	spin_unlock_irqrestore(&queue->response_lock, flags);
				205	if (cons == end)
				206	break;
				207	RING_COPY_REQUEST(&queue->tx, cons++, txp);
				208	extra_count = 0; /* only the first frag can have extras */
				209	} while (1);
				210	queue->tx.req_cons = cons;
				211	}
				212
				213	static void xenvif_fatal_tx_err(struct xenvif *vif)
				214	{
				215	netdev_err(vif->dev, "fatal error; disabling device\n");
				216	vif->disabled = true;
				217	/* Disable the vif from queue 0's kthread */
				218	if (vif->num_queues)
				219	xenvif_kick_thread(&vif->queues[0]);
				220	}
				221
				222	static int xenvif_count_requests(struct xenvif_queue *queue,
				223	struct xen_netif_tx_request *first,
				224	unsigned int extra_count,
				225	struct xen_netif_tx_request *txp,
				226	int work_to_do)
				227	{
				228	RING_IDX cons = queue->tx.req_cons;
				229	int slots = 0;
				230	int drop_err = 0;
				231	int more_data;
				232
				233	if (!(first->flags & XEN_NETTXF_more_data))
				234	return 0;
				235
				236	do {
				237	struct xen_netif_tx_request dropped_tx = { 0 };
				238
				239	if (slots >= work_to_do) {
				240	netdev_err(queue->vif->dev,
				241	"Asked for %d slots but exceeds this limit\n",
				242	work_to_do);
				243	xenvif_fatal_tx_err(queue->vif);
				244	return -ENODATA;
				245	}
				246
				247	/* This guest is really using too many slots and
				248	* considered malicious.
				249	*/
				250	if (unlikely(slots >= fatal_skb_slots)) {
				251	netdev_err(queue->vif->dev,
				252	"Malicious frontend using %d slots, threshold %u\n",
				253	slots, fatal_skb_slots);
				254	xenvif_fatal_tx_err(queue->vif);
				255	return -E2BIG;
				256	}
				257
				258	/* Xen network protocol had implicit dependency on
				259	* MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
				260	* the historical MAX_SKB_FRAGS value 18 to honor the
				261	* same behavior as before. Any packet using more than
				262	* 18 slots but less than fatal_skb_slots slots is
				263	* dropped
				264	*/
				265	if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) {
				266	if (net_ratelimit())
				267	netdev_dbg(queue->vif->dev,
				268	"Too many slots (%d) exceeding limit (%d), dropping packet\n",
				269	slots, XEN_NETBK_LEGACY_SLOTS_MAX);
				270	drop_err = -E2BIG;
				271	}
				272
				273	if (drop_err)
				274	txp = &dropped_tx;
				275
				276	RING_COPY_REQUEST(&queue->tx, cons + slots, txp);
				277
				278	/* If the guest submitted a frame >= 64 KiB then
				279	* first->size overflowed and following slots will
				280	* appear to be larger than the frame.
				281	*
				282	* This cannot be fatal error as there are buggy
				283	* frontends that do this.
				284	*
				285	* Consume all slots and drop the packet.
				286	*/
				287	if (!drop_err && txp->size > first->size) {
				288	if (net_ratelimit())
				289	netdev_dbg(queue->vif->dev,
				290	"Invalid tx request, slot size %u > remaining size %u\n",
				291	txp->size, first->size);
				292	drop_err = -EIO;
				293	}
				294
				295	first->size -= txp->size;
				296	slots++;
				297
				298	if (unlikely((txp->offset + txp->size) > XEN_PAGE_SIZE)) {
				299	netdev_err(queue->vif->dev, "Cross page boundary, txp->offset: %u, size: %u\n",
				300	txp->offset, txp->size);
				301	xenvif_fatal_tx_err(queue->vif);
				302	return -EINVAL;
				303	}
				304
				305	more_data = txp->flags & XEN_NETTXF_more_data;
				306
				307	if (!drop_err)
				308	txp++;
				309
				310	} while (more_data);
				311
				312	if (drop_err) {
				313	xenvif_tx_err(queue, first, extra_count, cons + slots);
				314	return drop_err;
				315	}
				316
				317	return slots;
				318	}
				319
				320
				321	struct xenvif_tx_cb {
				322	u16 pending_idx;
				323	};
				324
				325	#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
				326
				327	static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
				328	u16 pending_idx,
				329	struct xen_netif_tx_request *txp,
				330	unsigned int extra_count,
				331	struct gnttab_map_grant_ref *mop)
				332	{
				333	queue->pages_to_map[mop-queue->tx_map_ops] = queue->mmap_pages[pending_idx];
				334	gnttab_set_map_op(mop, idx_to_kaddr(queue, pending_idx),
				335	GNTMAP_host_map \| GNTMAP_readonly,
				336	txp->gref, queue->vif->domid);
				337
				338	memcpy(&queue->pending_tx_info[pending_idx].req, txp,
				339	sizeof(*txp));
				340	queue->pending_tx_info[pending_idx].extra_count = extra_count;
				341	}
				342
				343	static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
				344	{
				345	struct sk_buff *skb =
				346	alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN,
				347	GFP_ATOMIC \| __GFP_NOWARN);
				348	if (unlikely(skb == NULL))
				349	return NULL;
				350
				351	/* Packets passed to netif_rx() must have some headroom. */
				352	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
				353
				354	/* Initialize it here to avoid later surprises */
				355	skb_shinfo(skb)->destructor_arg = NULL;
				356
				357	return skb;
				358	}
				359
				360	static struct gnttab_map_grant_ref xenvif_get_requests(struct xenvif_queue queue,
				361	struct sk_buff *skb,
				362	struct xen_netif_tx_request *txp,
				363	struct gnttab_map_grant_ref *gop,
				364	unsigned int frag_overflow,
				365	struct sk_buff *nskb)
				366	{
				367	struct skb_shared_info *shinfo = skb_shinfo(skb);
				368	skb_frag_t *frags = shinfo->frags;
				369	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
				370	int start;
				371	pending_ring_idx_t index;
				372	unsigned int nr_slots;
				373
				374	nr_slots = shinfo->nr_frags;
				375
				376	/* Skip first skb fragment if it is on same page as header fragment. */
				377	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
				378
				379	for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
				380	shinfo->nr_frags++, txp++, gop++) {
				381	index = pending_index(queue->pending_cons++);
				382	pending_idx = queue->pending_ring[index];
				383	xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop);
				384	frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
				385	}
				386
				387	if (frag_overflow) {
				388
				389	shinfo = skb_shinfo(nskb);
				390	frags = shinfo->frags;
				391
				392	for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
				393	shinfo->nr_frags++, txp++, gop++) {
				394	index = pending_index(queue->pending_cons++);
				395	pending_idx = queue->pending_ring[index];
				396	xenvif_tx_create_map_op(queue, pending_idx, txp, 0,
				397	gop);
				398	frag_set_pending_idx(&frags[shinfo->nr_frags],
				399	pending_idx);
				400	}
				401
				402	skb_shinfo(skb)->frag_list = nskb;
				403	}
				404
				405	return gop;
				406	}
				407
				408	static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
				409	u16 pending_idx,
				410	grant_handle_t handle)
				411	{
				412	if (unlikely(queue->grant_tx_handle[pending_idx] !=
				413	NETBACK_INVALID_HANDLE)) {
				414	netdev_err(queue->vif->dev,
				415	"Trying to overwrite active handle! pending_idx: 0x%x\n",
				416	pending_idx);
				417	BUG();
				418	}
				419	queue->grant_tx_handle[pending_idx] = handle;
				420	}
				421
				422	static inline void xenvif_grant_handle_reset(struct xenvif_queue *queue,
				423	u16 pending_idx)
				424	{
				425	if (unlikely(queue->grant_tx_handle[pending_idx] ==
				426	NETBACK_INVALID_HANDLE)) {
				427	netdev_err(queue->vif->dev,
				428	"Trying to unmap invalid handle! pending_idx: 0x%x\n",
				429	pending_idx);
				430	BUG();
				431	}
				432	queue->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
				433	}
				434
				435	static int xenvif_tx_check_gop(struct xenvif_queue *queue,
				436	struct sk_buff *skb,
				437	struct gnttab_map_grant_ref **gopp_map,
				438	struct gnttab_copy **gopp_copy)
				439	{
				440	struct gnttab_map_grant_ref gop_map = gopp_map;
				441	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
				442	/* This always points to the shinfo of the skb being checked, which
				443	* could be either the first or the one on the frag_list
				444	*/
				445	struct skb_shared_info *shinfo = skb_shinfo(skb);
				446	/* If this is non-NULL, we are currently checking the frag_list skb, and
				447	* this points to the shinfo of the first one
				448	*/
				449	struct skb_shared_info *first_shinfo = NULL;
				450	int nr_frags = shinfo->nr_frags;
				451	const bool sharedslot = nr_frags &&
				452	frag_get_pending_idx(&shinfo->frags[0]) == pending_idx;
				453	int i, err;
				454
				455	/* Check status of header. */
				456	err = (*gopp_copy)->status;
				457	if (unlikely(err)) {
				458	if (net_ratelimit())
				459	netdev_dbg(queue->vif->dev,
				460	"Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
				461	(*gopp_copy)->status,
				462	pending_idx,
				463	(*gopp_copy)->source.u.ref);
				464	/* The first frag might still have this slot mapped */
				465	if (!sharedslot)
				466	xenvif_idx_release(queue, pending_idx,
				467	XEN_NETIF_RSP_ERROR);
				468	}
				469	(*gopp_copy)++;
				470
				471	check_frags:
				472	for (i = 0; i < nr_frags; i++, gop_map++) {
				473	int j, newerr;
				474
				475	pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
				476
				477	/* Check error status: if okay then remember grant handle. */
				478	newerr = gop_map->status;
				479
				480	if (likely(!newerr)) {
				481	xenvif_grant_handle_set(queue,
				482	pending_idx,
				483	gop_map->handle);
				484	/* Had a previous error? Invalidate this fragment. */
				485	if (unlikely(err)) {
				486	xenvif_idx_unmap(queue, pending_idx);
				487	/* If the mapping of the first frag was OK, but
				488	* the header's copy failed, and they are
				489	* sharing a slot, send an error
				490	*/
				491	if (i == 0 && sharedslot)
				492	xenvif_idx_release(queue, pending_idx,
				493	XEN_NETIF_RSP_ERROR);
				494	else
				495	xenvif_idx_release(queue, pending_idx,
				496	XEN_NETIF_RSP_OKAY);
				497	}
				498	continue;
				499	}
				500
				501	/* Error on this fragment: respond to client with an error. */
				502	if (net_ratelimit())
				503	netdev_dbg(queue->vif->dev,
				504	"Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
				505	i,
				506	gop_map->status,
				507	pending_idx,
				508	gop_map->ref);
				509
				510	xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
				511
				512	/* Not the first error? Preceding frags already invalidated. */
				513	if (err)
				514	continue;
				515
				516	/* First error: if the header haven't shared a slot with the
				517	* first frag, release it as well.
				518	*/
				519	if (!sharedslot)
				520	xenvif_idx_release(queue,
				521	XENVIF_TX_CB(skb)->pending_idx,
				522	XEN_NETIF_RSP_OKAY);
				523
				524	/* Invalidate preceding fragments of this skb. */
				525	for (j = 0; j < i; j++) {
				526	pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
				527	xenvif_idx_unmap(queue, pending_idx);
				528	xenvif_idx_release(queue, pending_idx,
				529	XEN_NETIF_RSP_OKAY);
				530	}
				531
				532	/* And if we found the error while checking the frag_list, unmap
				533	* the first skb's frags
				534	*/
				535	if (first_shinfo) {
				536	for (j = 0; j < first_shinfo->nr_frags; j++) {
				537	pending_idx = frag_get_pending_idx(&first_shinfo->frags[j]);
				538	xenvif_idx_unmap(queue, pending_idx);
				539	xenvif_idx_release(queue, pending_idx,
				540	XEN_NETIF_RSP_OKAY);
				541	}
				542	}
				543
				544	/* Remember the error: invalidate all subsequent fragments. */
				545	err = newerr;
				546	}
				547
				548	if (skb_has_frag_list(skb) && !first_shinfo) {
				549	first_shinfo = skb_shinfo(skb);
				550	shinfo = skb_shinfo(skb_shinfo(skb)->frag_list);
				551	nr_frags = shinfo->nr_frags;
				552
				553	goto check_frags;
				554	}
				555
				556	*gopp_map = gop_map;
				557	return err;
				558	}
				559
				560	static void xenvif_fill_frags(struct xenvif_queue queue, struct sk_buff skb)
				561	{
				562	struct skb_shared_info *shinfo = skb_shinfo(skb);
				563	int nr_frags = shinfo->nr_frags;
				564	int i;
				565	u16 prev_pending_idx = INVALID_PENDING_IDX;
				566
				567	for (i = 0; i < nr_frags; i++) {
				568	skb_frag_t *frag = shinfo->frags + i;
				569	struct xen_netif_tx_request *txp;
				570	struct page *page;
				571	u16 pending_idx;
				572
				573	pending_idx = frag_get_pending_idx(frag);
				574
				575	/* If this is not the first frag, chain it to the previous*/
				576	if (prev_pending_idx == INVALID_PENDING_IDX)
				577	skb_shinfo(skb)->destructor_arg =
				578	&callback_param(queue, pending_idx);
				579	else
				580	callback_param(queue, prev_pending_idx).ctx =
				581	&callback_param(queue, pending_idx);
				582
				583	callback_param(queue, pending_idx).ctx = NULL;
				584	prev_pending_idx = pending_idx;
				585
				586	txp = &queue->pending_tx_info[pending_idx].req;
				587	page = virt_to_page(idx_to_kaddr(queue, pending_idx));
				588	__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
				589	skb->len += txp->size;
				590	skb->data_len += txp->size;
				591	skb->truesize += txp->size;
				592
				593	/* Take an extra reference to offset network stack's put_page */
				594	get_page(queue->mmap_pages[pending_idx]);
				595	}
				596	}
				597
				598	static int xenvif_get_extras(struct xenvif_queue *queue,
				599	struct xen_netif_extra_info *extras,
				600	unsigned int *extra_count,
				601	int work_to_do)
				602	{
				603	struct xen_netif_extra_info extra;
				604	RING_IDX cons = queue->tx.req_cons;
				605
				606	do {
				607	if (unlikely(work_to_do-- <= 0)) {
				608	netdev_err(queue->vif->dev, "Missing extra info\n");
				609	xenvif_fatal_tx_err(queue->vif);
				610	return -EBADR;
				611	}
				612
				613	RING_COPY_REQUEST(&queue->tx, cons, &extra);
				614
				615	queue->tx.req_cons = ++cons;
				616	(*extra_count)++;
				617
				618	if (unlikely(!extra.type \|\|
				619	extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
				620	netdev_err(queue->vif->dev,
				621	"Invalid extra type: %d\n", extra.type);
				622	xenvif_fatal_tx_err(queue->vif);
				623	return -EINVAL;
				624	}
				625
				626	memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
				627	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
				628
				629	return work_to_do;
				630	}
				631
				632	static int xenvif_set_skb_gso(struct xenvif *vif,
				633	struct sk_buff *skb,
				634	struct xen_netif_extra_info *gso)
				635	{
				636	if (!gso->u.gso.size) {
				637	netdev_err(vif->dev, "GSO size must not be zero.\n");
				638	xenvif_fatal_tx_err(vif);
				639	return -EINVAL;
				640	}
				641
				642	switch (gso->u.gso.type) {
				643	case XEN_NETIF_GSO_TYPE_TCPV4:
				644	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
				645	break;
				646	case XEN_NETIF_GSO_TYPE_TCPV6:
				647	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
				648	break;
				649	default:
				650	netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
				651	xenvif_fatal_tx_err(vif);
				652	return -EINVAL;
				653	}
				654
				655	skb_shinfo(skb)->gso_size = gso->u.gso.size;
				656	/* gso_segs will be calculated later */
				657
				658	return 0;
				659	}
				660
				661	static int checksum_setup(struct xenvif_queue queue, struct sk_buff skb)
				662	{
				663	bool recalculate_partial_csum = false;
				664
				665	/* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
				666	* peers can fail to set NETRXF_csum_blank when sending a GSO
				667	* frame. In this case force the SKB to CHECKSUM_PARTIAL and
				668	* recalculate the partial checksum.
				669	*/
				670	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
				671	queue->stats.rx_gso_checksum_fixup++;
				672	skb->ip_summed = CHECKSUM_PARTIAL;
				673	recalculate_partial_csum = true;
				674	}
				675
				676	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
				677	if (skb->ip_summed != CHECKSUM_PARTIAL)
				678	return 0;
				679
				680	return skb_checksum_setup(skb, recalculate_partial_csum);
				681	}
				682
				683	static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
				684	{
				685	u64 now = get_jiffies_64();
				686	u64 next_credit = queue->credit_window_start +
				687	msecs_to_jiffies(queue->credit_usec / 1000);
				688
				689	/* Timer could already be pending in rare cases. */
				690	if (timer_pending(&queue->credit_timeout)) {
				691	queue->rate_limited = true;
				692	return true;
				693	}
				694
				695	/* Passed the point where we can replenish credit? */
				696	if (time_after_eq64(now, next_credit)) {
				697	queue->credit_window_start = now;
				698	tx_add_credit(queue);
				699	}
				700
				701	/* Still too big to send right now? Set a callback. */
				702	if (size > queue->remaining_credit) {
				703	mod_timer(&queue->credit_timeout,
				704	next_credit);
				705	queue->credit_window_start = next_credit;
				706	queue->rate_limited = true;
				707
				708	return true;
				709	}
				710
				711	return false;
				712	}
				713
				714	/* No locking is required in xenvif_mcast_add/del() as they are
				715	* only ever invoked from NAPI poll. An RCU list is used because
				716	* xenvif_mcast_match() is called asynchronously, during start_xmit.
				717	*/
				718
				719	static int xenvif_mcast_add(struct xenvif vif, const u8 addr)
				720	{
				721	struct xenvif_mcast_addr *mcast;
				722
				723	if (vif->fe_mcast_count == XEN_NETBK_MCAST_MAX) {
				724	if (net_ratelimit())
				725	netdev_err(vif->dev,
				726	"Too many multicast addresses\n");
				727	return -ENOSPC;
				728	}
				729
				730	mcast = kzalloc(sizeof(*mcast), GFP_ATOMIC);
				731	if (!mcast)
				732	return -ENOMEM;
				733
				734	ether_addr_copy(mcast->addr, addr);
				735	list_add_tail_rcu(&mcast->entry, &vif->fe_mcast_addr);
				736	vif->fe_mcast_count++;
				737
				738	return 0;
				739	}
				740
				741	static void xenvif_mcast_del(struct xenvif vif, const u8 addr)
				742	{
				743	struct xenvif_mcast_addr *mcast;
				744
				745	list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) {
				746	if (ether_addr_equal(addr, mcast->addr)) {
				747	--vif->fe_mcast_count;
				748	list_del_rcu(&mcast->entry);
				749	kfree_rcu(mcast, rcu);
				750	break;
				751	}
				752	}
				753	}
				754
				755	bool xenvif_mcast_match(struct xenvif vif, const u8 addr)
				756	{
				757	struct xenvif_mcast_addr *mcast;
				758
				759	rcu_read_lock();
				760	list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) {
				761	if (ether_addr_equal(addr, mcast->addr)) {
				762	rcu_read_unlock();
				763	return true;
				764	}
				765	}
				766	rcu_read_unlock();
				767
				768	return false;
				769	}
				770
				771	void xenvif_mcast_addr_list_free(struct xenvif *vif)
				772	{
				773	/* No need for locking or RCU here. NAPI poll and TX queue
				774	* are stopped.
				775	*/
				776	while (!list_empty(&vif->fe_mcast_addr)) {
				777	struct xenvif_mcast_addr *mcast;
				778
				779	mcast = list_first_entry(&vif->fe_mcast_addr,
				780	struct xenvif_mcast_addr,
				781	entry);
				782	--vif->fe_mcast_count;
				783	list_del(&mcast->entry);
				784	kfree(mcast);
				785	}
				786	}
				787
				788	static void xenvif_tx_build_gops(struct xenvif_queue *queue,
				789	int budget,
				790	unsigned *copy_ops,
				791	unsigned *map_ops)
				792	{
				793	struct gnttab_map_grant_ref *gop = queue->tx_map_ops;
				794	struct sk_buff skb, nskb;
				795	int ret;
				796	unsigned int frag_overflow;
				797
				798	while (skb_queue_len(&queue->tx_queue) < budget) {
				799	struct xen_netif_tx_request txreq;
				800	struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
				801	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
				802	unsigned int extra_count;
				803	u16 pending_idx;
				804	RING_IDX idx;
				805	int work_to_do;
				806	unsigned int data_len;
				807	pending_ring_idx_t index;
				808
				809	if (queue->tx.sring->req_prod - queue->tx.req_cons >
				810	XEN_NETIF_TX_RING_SIZE) {
				811	netdev_err(queue->vif->dev,
				812	"Impossible number of requests. "
				813	"req_prod %d, req_cons %d, size %ld\n",
				814	queue->tx.sring->req_prod, queue->tx.req_cons,
				815	XEN_NETIF_TX_RING_SIZE);
				816	xenvif_fatal_tx_err(queue->vif);
				817	break;
				818	}
				819
				820	work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx);
				821	if (!work_to_do)
				822	break;
				823
				824	idx = queue->tx.req_cons;
				825	rmb(); /* Ensure that we see the request before we copy it. */
				826	RING_COPY_REQUEST(&queue->tx, idx, &txreq);
				827
				828	/* Credit-based scheduling. */
				829	if (txreq.size > queue->remaining_credit &&
				830	tx_credit_exceeded(queue, txreq.size))
				831	break;
				832
				833	queue->remaining_credit -= txreq.size;
				834
				835	work_to_do--;
				836	queue->tx.req_cons = ++idx;
				837
				838	memset(extras, 0, sizeof(extras));
				839	extra_count = 0;
				840	if (txreq.flags & XEN_NETTXF_extra_info) {
				841	work_to_do = xenvif_get_extras(queue, extras,
				842	&extra_count,
				843	work_to_do);
				844	idx = queue->tx.req_cons;
				845	if (unlikely(work_to_do < 0))
				846	break;
				847	}
				848
				849	if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1].type) {
				850	struct xen_netif_extra_info *extra;
				851
				852	extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1];
				853	ret = xenvif_mcast_add(queue->vif, extra->u.mcast.addr);
				854
				855	make_tx_response(queue, &txreq, extra_count,
				856	(ret == 0) ?
				857	XEN_NETIF_RSP_OKAY :
				858	XEN_NETIF_RSP_ERROR);
				859	push_tx_responses(queue);
				860	continue;
				861	}
				862
				863	if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1].type) {
				864	struct xen_netif_extra_info *extra;
				865
				866	extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1];
				867	xenvif_mcast_del(queue->vif, extra->u.mcast.addr);
				868
				869	make_tx_response(queue, &txreq, extra_count,
				870	XEN_NETIF_RSP_OKAY);
				871	push_tx_responses(queue);
				872	continue;
				873	}
				874
				875	ret = xenvif_count_requests(queue, &txreq, extra_count,
				876	txfrags, work_to_do);
				877	if (unlikely(ret < 0))
				878	break;
				879
				880	idx += ret;
				881
				882	if (unlikely(txreq.size < ETH_HLEN)) {
				883	netdev_dbg(queue->vif->dev,
				884	"Bad packet size: %d\n", txreq.size);
				885	xenvif_tx_err(queue, &txreq, extra_count, idx);
				886	break;
				887	}
				888
				889	/* No crossing a page as the payload mustn't fragment. */
				890	if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) {
				891	netdev_err(queue->vif->dev,
				892	"txreq.offset: %u, size: %u, end: %lu\n",
				893	txreq.offset, txreq.size,
				894	(unsigned long)(txreq.offset&~XEN_PAGE_MASK) + txreq.size);
				895	xenvif_fatal_tx_err(queue->vif);
				896	break;
				897	}
				898
				899	index = pending_index(queue->pending_cons);
				900	pending_idx = queue->pending_ring[index];
				901
				902	data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN &&
				903	ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
				904	XEN_NETBACK_TX_COPY_LEN : txreq.size;
				905
				906	skb = xenvif_alloc_skb(data_len);
				907	if (unlikely(skb == NULL)) {
				908	netdev_dbg(queue->vif->dev,
				909	"Can't allocate a skb in start_xmit.\n");
				910	xenvif_tx_err(queue, &txreq, extra_count, idx);
				911	break;
				912	}
				913
				914	skb_shinfo(skb)->nr_frags = ret;
				915	if (data_len < txreq.size)
				916	skb_shinfo(skb)->nr_frags++;
				917	/* At this point shinfo->nr_frags is in fact the number of
				918	* slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
				919	*/
				920	frag_overflow = 0;
				921	nskb = NULL;
				922	if (skb_shinfo(skb)->nr_frags > MAX_SKB_FRAGS) {
				923	frag_overflow = skb_shinfo(skb)->nr_frags - MAX_SKB_FRAGS;
				924	BUG_ON(frag_overflow > MAX_SKB_FRAGS);
				925	skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS;
				926	nskb = xenvif_alloc_skb(0);
				927	if (unlikely(nskb == NULL)) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	928	skb_shinfo(skb)->nr_frags = 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	929	kfree_skb(skb);
				930	xenvif_tx_err(queue, &txreq, extra_count, idx);
				931	if (net_ratelimit())
				932	netdev_err(queue->vif->dev,
				933	"Can't allocate the frag_list skb.\n");
				934	break;
				935	}
				936	}
				937
				938	if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
				939	struct xen_netif_extra_info *gso;
				940	gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
				941
				942	if (xenvif_set_skb_gso(queue->vif, skb, gso)) {
				943	/* Failure in xenvif_set_skb_gso is fatal. */
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	944	skb_shinfo(skb)->nr_frags = 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	945	kfree_skb(skb);
				946	kfree_skb(nskb);
				947	break;
				948	}
				949	}
				950
				951	if (extras[XEN_NETIF_EXTRA_TYPE_HASH - 1].type) {
				952	struct xen_netif_extra_info *extra;
				953	enum pkt_hash_types type = PKT_HASH_TYPE_NONE;
				954
				955	extra = &extras[XEN_NETIF_EXTRA_TYPE_HASH - 1];
				956
				957	switch (extra->u.hash.type) {
				958	case _XEN_NETIF_CTRL_HASH_TYPE_IPV4:
				959	case _XEN_NETIF_CTRL_HASH_TYPE_IPV6:
				960	type = PKT_HASH_TYPE_L3;
				961	break;
				962
				963	case _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP:
				964	case _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP:
				965	type = PKT_HASH_TYPE_L4;
				966	break;
				967
				968	default:
				969	break;
				970	}
				971
				972	if (type != PKT_HASH_TYPE_NONE)
				973	skb_set_hash(skb,
				974	(u32 )extra->u.hash.value,
				975	type);
				976	}
				977
				978	XENVIF_TX_CB(skb)->pending_idx = pending_idx;
				979
				980	__skb_put(skb, data_len);
				981	queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
				982	queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid;
				983	queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
				984
				985	queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
				986	virt_to_gfn(skb->data);
				987	queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
				988	queue->tx_copy_ops[*copy_ops].dest.offset =
				989	offset_in_page(skb->data) & ~XEN_PAGE_MASK;
				990
				991	queue->tx_copy_ops[*copy_ops].len = data_len;
				992	queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
				993
				994	(*copy_ops)++;
				995
				996	if (data_len < txreq.size) {
				997	frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
				998	pending_idx);
				999	xenvif_tx_create_map_op(queue, pending_idx, &txreq,
				1000	extra_count, gop);
				1001	gop++;
				1002	} else {
				1003	frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
				1004	INVALID_PENDING_IDX);
				1005	memcpy(&queue->pending_tx_info[pending_idx].req,
				1006	&txreq, sizeof(txreq));
				1007	queue->pending_tx_info[pending_idx].extra_count =
				1008	extra_count;
				1009	}
				1010
				1011	queue->pending_cons++;
				1012
				1013	gop = xenvif_get_requests(queue, skb, txfrags, gop,
				1014	frag_overflow, nskb);
				1015
				1016	__skb_queue_tail(&queue->tx_queue, skb);
				1017
				1018	queue->tx.req_cons = idx;
				1019
				1020	if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) \|\|
				1021	(*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
				1022	break;
				1023	}
				1024
				1025	(*map_ops) = gop - queue->tx_map_ops;
				1026	return;
				1027	}
				1028
				1029	/* Consolidate skb with a frag_list into a brand new one with local pages on
				1030	* frags. Returns 0 or -ENOMEM if can't allocate new pages.
				1031	*/
				1032	static int xenvif_handle_frag_list(struct xenvif_queue queue, struct sk_buff skb)
				1033	{
				1034	unsigned int offset = skb_headlen(skb);
				1035	skb_frag_t frags[MAX_SKB_FRAGS];
				1036	int i, f;
				1037	struct ubuf_info *uarg;
				1038	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
				1039
				1040	queue->stats.tx_zerocopy_sent += 2;
				1041	queue->stats.tx_frag_overflow++;
				1042
				1043	xenvif_fill_frags(queue, nskb);
				1044	/* Subtract frags size, we will correct it later */
				1045	skb->truesize -= skb->data_len;
				1046	skb->len += nskb->len;
				1047	skb->data_len += nskb->len;
				1048
				1049	/* create a brand new frags array and coalesce there */
				1050	for (i = 0; offset < skb->len; i++) {
				1051	struct page *page;
				1052	unsigned int len;
				1053
				1054	BUG_ON(i >= MAX_SKB_FRAGS);
				1055	page = alloc_page(GFP_ATOMIC);
				1056	if (!page) {
				1057	int j;
				1058	skb->truesize += skb->data_len;
				1059	for (j = 0; j < i; j++)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	1060	put_page(skb_frag_page(&frags[j]));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1061	return -ENOMEM;
				1062	}
				1063
				1064	if (offset + PAGE_SIZE < skb->len)
				1065	len = PAGE_SIZE;
				1066	else
				1067	len = skb->len - offset;
				1068	if (skb_copy_bits(skb, offset, page_address(page), len))
				1069	BUG();
				1070
				1071	offset += len;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	1072	__skb_frag_set_page(&frags[i], page);
				1073	skb_frag_off_set(&frags[i], 0);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1074	skb_frag_size_set(&frags[i], len);
				1075	}
				1076
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1077	/* Release all the original (foreign) frags. */
				1078	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
				1079	skb_frag_unref(skb, f);
				1080	uarg = skb_shinfo(skb)->destructor_arg;
				1081	/* increase inflight counter to offset decrement in callback */
				1082	atomic_inc(&queue->inflight_packets);
				1083	uarg->callback(uarg, true);
				1084	skb_shinfo(skb)->destructor_arg = NULL;
				1085
				1086	/* Fill the skb with the new (local) frags. */
				1087	memcpy(skb_shinfo(skb)->frags, frags, i * sizeof(skb_frag_t));
				1088	skb_shinfo(skb)->nr_frags = i;
				1089	skb->truesize += i * PAGE_SIZE;
				1090
				1091	return 0;
				1092	}
				1093
				1094	static int xenvif_tx_submit(struct xenvif_queue *queue)
				1095	{
				1096	struct gnttab_map_grant_ref *gop_map = queue->tx_map_ops;
				1097	struct gnttab_copy *gop_copy = queue->tx_copy_ops;
				1098	struct sk_buff *skb;
				1099	int work_done = 0;
				1100
				1101	while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
				1102	struct xen_netif_tx_request *txp;
				1103	u16 pending_idx;
				1104	unsigned data_len;
				1105
				1106	pending_idx = XENVIF_TX_CB(skb)->pending_idx;
				1107	txp = &queue->pending_tx_info[pending_idx].req;
				1108
				1109	/* Check the remap error code. */
				1110	if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) {
				1111	/* If there was an error, xenvif_tx_check_gop is
				1112	* expected to release all the frags which were mapped,
				1113	* so kfree_skb shouldn't do it again
				1114	*/
				1115	skb_shinfo(skb)->nr_frags = 0;
				1116	if (skb_has_frag_list(skb)) {
				1117	struct sk_buff *nskb =
				1118	skb_shinfo(skb)->frag_list;
				1119	skb_shinfo(nskb)->nr_frags = 0;
				1120	}
				1121	kfree_skb(skb);
				1122	continue;
				1123	}
				1124
				1125	data_len = skb->len;
				1126	callback_param(queue, pending_idx).ctx = NULL;
				1127	if (data_len < txp->size) {
				1128	/* Append the packet payload as a fragment. */
				1129	txp->offset += data_len;
				1130	txp->size -= data_len;
				1131	} else {
				1132	/* Schedule a response immediately. */
				1133	xenvif_idx_release(queue, pending_idx,
				1134	XEN_NETIF_RSP_OKAY);
				1135	}
				1136
				1137	if (txp->flags & XEN_NETTXF_csum_blank)
				1138	skb->ip_summed = CHECKSUM_PARTIAL;
				1139	else if (txp->flags & XEN_NETTXF_data_validated)
				1140	skb->ip_summed = CHECKSUM_UNNECESSARY;
				1141
				1142	xenvif_fill_frags(queue, skb);
				1143
				1144	if (unlikely(skb_has_frag_list(skb))) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	1145	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
				1146	xenvif_skb_zerocopy_prepare(queue, nskb);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1147	if (xenvif_handle_frag_list(queue, skb)) {
				1148	if (net_ratelimit())
				1149	netdev_err(queue->vif->dev,
				1150	"Not enough memory to consolidate frag_list!\n");
				1151	xenvif_skb_zerocopy_prepare(queue, skb);
				1152	kfree_skb(skb);
				1153	continue;
				1154	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	1155	/* Copied all the bits from the frag list -- free it. */
				1156	skb_frag_list_init(skb);
				1157	kfree_skb(nskb);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1158	}
				1159
				1160	skb->dev = queue->vif->dev;
				1161	skb->protocol = eth_type_trans(skb, skb->dev);
				1162	skb_reset_network_header(skb);
				1163
				1164	if (checksum_setup(queue, skb)) {
				1165	netdev_dbg(queue->vif->dev,
				1166	"Can't setup checksum in net_tx_action\n");
				1167	/* We have to set this flag to trigger the callback */
				1168	if (skb_shinfo(skb)->destructor_arg)
				1169	xenvif_skb_zerocopy_prepare(queue, skb);
				1170	kfree_skb(skb);
				1171	continue;
				1172	}
				1173
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	1174	skb_probe_transport_header(skb);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1175
				1176	/* If the packet is GSO then we will have just set up the
				1177	* transport header offset in checksum_setup so it's now
				1178	* straightforward to calculate gso_segs.
				1179	*/
				1180	if (skb_is_gso(skb)) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	1181	int mss, hdrlen;
				1182
				1183	/* GSO implies having the L4 header. */
				1184	WARN_ON_ONCE(!skb_transport_header_was_set(skb));
				1185	if (unlikely(!skb_transport_header_was_set(skb))) {
				1186	kfree_skb(skb);
				1187	continue;
				1188	}
				1189
				1190	mss = skb_shinfo(skb)->gso_size;
				1191	hdrlen = skb_transport_header(skb) -
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1192	skb_mac_header(skb) +
				1193	tcp_hdrlen(skb);
				1194
				1195	skb_shinfo(skb)->gso_segs =
				1196	DIV_ROUND_UP(skb->len - hdrlen, mss);
				1197	}
				1198
				1199	queue->stats.rx_bytes += skb->len;
				1200	queue->stats.rx_packets++;
				1201
				1202	work_done++;
				1203
				1204	/* Set this flag right before netif_receive_skb, otherwise
				1205	* someone might think this packet already left netback, and
				1206	* do a skb_copy_ubufs while we are still in control of the
				1207	* skb. E.g. the __pskb_pull_tail earlier can do such thing.
				1208	*/
				1209	if (skb_shinfo(skb)->destructor_arg) {
				1210	xenvif_skb_zerocopy_prepare(queue, skb);
				1211	queue->stats.tx_zerocopy_sent++;
				1212	}
				1213
				1214	netif_receive_skb(skb);
				1215	}
				1216
				1217	return work_done;
				1218	}
				1219
				1220	void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
				1221	{
				1222	unsigned long flags;
				1223	pending_ring_idx_t index;
				1224	struct xenvif_queue *queue = ubuf_to_queue(ubuf);
				1225
				1226	/* This is the only place where we grab this lock, to protect callbacks
				1227	* from each other.
				1228	*/
				1229	spin_lock_irqsave(&queue->callback_lock, flags);
				1230	do {
				1231	u16 pending_idx = ubuf->desc;
				1232	ubuf = (struct ubuf_info *) ubuf->ctx;
				1233	BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
				1234	MAX_PENDING_REQS);
				1235	index = pending_index(queue->dealloc_prod);
				1236	queue->dealloc_ring[index] = pending_idx;
				1237	/* Sync with xenvif_tx_dealloc_action:
				1238	* insert idx then incr producer.
				1239	*/
				1240	smp_wmb();
				1241	queue->dealloc_prod++;
				1242	} while (ubuf);
				1243	spin_unlock_irqrestore(&queue->callback_lock, flags);
				1244
				1245	if (likely(zerocopy_success))
				1246	queue->stats.tx_zerocopy_success++;
				1247	else
				1248	queue->stats.tx_zerocopy_fail++;
				1249	xenvif_skb_zerocopy_complete(queue);
				1250	}
				1251
				1252	static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
				1253	{
				1254	struct gnttab_unmap_grant_ref *gop;
				1255	pending_ring_idx_t dc, dp;
				1256	u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
				1257	unsigned int i = 0;
				1258
				1259	dc = queue->dealloc_cons;
				1260	gop = queue->tx_unmap_ops;
				1261
				1262	/* Free up any grants we have finished using */
				1263	do {
				1264	dp = queue->dealloc_prod;
				1265
				1266	/* Ensure we see all indices enqueued by all
				1267	* xenvif_zerocopy_callback().
				1268	*/
				1269	smp_rmb();
				1270
				1271	while (dc != dp) {
				1272	BUG_ON(gop - queue->tx_unmap_ops >= MAX_PENDING_REQS);
				1273	pending_idx =
				1274	queue->dealloc_ring[pending_index(dc++)];
				1275
				1276	pending_idx_release[gop - queue->tx_unmap_ops] =
				1277	pending_idx;
				1278	queue->pages_to_unmap[gop - queue->tx_unmap_ops] =
				1279	queue->mmap_pages[pending_idx];
				1280	gnttab_set_unmap_op(gop,
				1281	idx_to_kaddr(queue, pending_idx),
				1282	GNTMAP_host_map,
				1283	queue->grant_tx_handle[pending_idx]);
				1284	xenvif_grant_handle_reset(queue, pending_idx);
				1285	++gop;
				1286	}
				1287
				1288	} while (dp != queue->dealloc_prod);
				1289
				1290	queue->dealloc_cons = dc;
				1291
				1292	if (gop - queue->tx_unmap_ops > 0) {
				1293	int ret;
				1294	ret = gnttab_unmap_refs(queue->tx_unmap_ops,
				1295	NULL,
				1296	queue->pages_to_unmap,
				1297	gop - queue->tx_unmap_ops);
				1298	if (ret) {
				1299	netdev_err(queue->vif->dev, "Unmap fail: nr_ops %tu ret %d\n",
				1300	gop - queue->tx_unmap_ops, ret);
				1301	for (i = 0; i < gop - queue->tx_unmap_ops; ++i) {
				1302	if (gop[i].status != GNTST_okay)
				1303	netdev_err(queue->vif->dev,
				1304	" host_addr: 0x%llx handle: 0x%x status: %d\n",
				1305	gop[i].host_addr,
				1306	gop[i].handle,
				1307	gop[i].status);
				1308	}
				1309	BUG();
				1310	}
				1311	}
				1312
				1313	for (i = 0; i < gop - queue->tx_unmap_ops; ++i)
				1314	xenvif_idx_release(queue, pending_idx_release[i],
				1315	XEN_NETIF_RSP_OKAY);
				1316	}
				1317
				1318
				1319	/* Called after netfront has transmitted */
				1320	int xenvif_tx_action(struct xenvif_queue *queue, int budget)
				1321	{
				1322	unsigned nr_mops, nr_cops = 0;
				1323	int work_done, ret;
				1324
				1325	if (unlikely(!tx_work_todo(queue)))
				1326	return 0;
				1327
				1328	xenvif_tx_build_gops(queue, budget, &nr_cops, &nr_mops);
				1329
				1330	if (nr_cops == 0)
				1331	return 0;
				1332
				1333	gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
				1334	if (nr_mops != 0) {
				1335	ret = gnttab_map_refs(queue->tx_map_ops,
				1336	NULL,
				1337	queue->pages_to_map,
				1338	nr_mops);
				1339	BUG_ON(ret);
				1340	}
				1341
				1342	work_done = xenvif_tx_submit(queue);
				1343
				1344	return work_done;
				1345	}
				1346
				1347	static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
				1348	u8 status)
				1349	{
				1350	struct pending_tx_info *pending_tx_info;
				1351	pending_ring_idx_t index;
				1352	unsigned long flags;
				1353
				1354	pending_tx_info = &queue->pending_tx_info[pending_idx];
				1355
				1356	spin_lock_irqsave(&queue->response_lock, flags);
				1357
				1358	make_tx_response(queue, &pending_tx_info->req,
				1359	pending_tx_info->extra_count, status);
				1360
				1361	/* Release the pending index before pusing the Tx response so
				1362	* its available before a new Tx request is pushed by the
				1363	* frontend.
				1364	*/
				1365	index = pending_index(queue->pending_prod++);
				1366	queue->pending_ring[index] = pending_idx;
				1367
				1368	push_tx_responses(queue);
				1369
				1370	spin_unlock_irqrestore(&queue->response_lock, flags);
				1371	}
				1372
				1373
				1374	static void make_tx_response(struct xenvif_queue *queue,
				1375	struct xen_netif_tx_request *txp,
				1376	unsigned int extra_count,
				1377	s8 st)
				1378	{
				1379	RING_IDX i = queue->tx.rsp_prod_pvt;
				1380	struct xen_netif_tx_response *resp;
				1381
				1382	resp = RING_GET_RESPONSE(&queue->tx, i);
				1383	resp->id = txp->id;
				1384	resp->status = st;
				1385
				1386	while (extra_count-- != 0)
				1387	RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
				1388
				1389	queue->tx.rsp_prod_pvt = ++i;
				1390	}
				1391
				1392	static void push_tx_responses(struct xenvif_queue *queue)
				1393	{
				1394	int notify;
				1395
				1396	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
				1397	if (notify)
				1398	notify_remote_via_irq(queue->tx_irq);
				1399	}
				1400
				1401	void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
				1402	{
				1403	int ret;
				1404	struct gnttab_unmap_grant_ref tx_unmap_op;
				1405
				1406	gnttab_set_unmap_op(&tx_unmap_op,
				1407	idx_to_kaddr(queue, pending_idx),
				1408	GNTMAP_host_map,
				1409	queue->grant_tx_handle[pending_idx]);
				1410	xenvif_grant_handle_reset(queue, pending_idx);
				1411
				1412	ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
				1413	&queue->mmap_pages[pending_idx], 1);
				1414	if (ret) {
				1415	netdev_err(queue->vif->dev,
				1416	"Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: 0x%x status: %d\n",
				1417	ret,
				1418	pending_idx,
				1419	tx_unmap_op.host_addr,
				1420	tx_unmap_op.handle,
				1421	tx_unmap_op.status);
				1422	BUG();
				1423	}
				1424	}
				1425
				1426	static inline int tx_work_todo(struct xenvif_queue *queue)
				1427	{
				1428	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
				1429	return 1;
				1430
				1431	return 0;
				1432	}
				1433
				1434	static inline bool tx_dealloc_work_todo(struct xenvif_queue *queue)
				1435	{
				1436	return queue->dealloc_cons != queue->dealloc_prod;
				1437	}
				1438
				1439	void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue)
				1440	{
				1441	if (queue->tx.sring)
				1442	xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
				1443	queue->tx.sring);
				1444	if (queue->rx.sring)
				1445	xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
				1446	queue->rx.sring);
				1447	}
				1448
				1449	int xenvif_map_frontend_data_rings(struct xenvif_queue *queue,
				1450	grant_ref_t tx_ring_ref,
				1451	grant_ref_t rx_ring_ref)
				1452	{
				1453	void *addr;
				1454	struct xen_netif_tx_sring *txs;
				1455	struct xen_netif_rx_sring *rxs;
				1456
				1457	int err = -ENOMEM;
				1458
				1459	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
				1460	&tx_ring_ref, 1, &addr);
				1461	if (err)
				1462	goto err;
				1463
				1464	txs = (struct xen_netif_tx_sring *)addr;
				1465	BACK_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
				1466
				1467	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
				1468	&rx_ring_ref, 1, &addr);
				1469	if (err)
				1470	goto err;
				1471
				1472	rxs = (struct xen_netif_rx_sring *)addr;
				1473	BACK_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
				1474
				1475	return 0;
				1476
				1477	err:
				1478	xenvif_unmap_frontend_data_rings(queue);
				1479	return err;
				1480	}
				1481
				1482	static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
				1483	{
				1484	/* Dealloc thread must remain running until all inflight
				1485	* packets complete.
				1486	*/
				1487	return kthread_should_stop() &&
				1488	!atomic_read(&queue->inflight_packets);
				1489	}
				1490
				1491	int xenvif_dealloc_kthread(void *data)
				1492	{
				1493	struct xenvif_queue *queue = data;
				1494
				1495	for (;;) {
				1496	wait_event_interruptible(queue->dealloc_wq,
				1497	tx_dealloc_work_todo(queue) \|\|
				1498	xenvif_dealloc_kthread_should_stop(queue));
				1499	if (xenvif_dealloc_kthread_should_stop(queue))
				1500	break;
				1501
				1502	xenvif_tx_dealloc_action(queue);
				1503	cond_resched();
				1504	}
				1505
				1506	/* Unmap anything remaining*/
				1507	if (tx_dealloc_work_todo(queue))
				1508	xenvif_tx_dealloc_action(queue);
				1509
				1510	return 0;
				1511	}
				1512
				1513	static void make_ctrl_response(struct xenvif *vif,
				1514	const struct xen_netif_ctrl_request *req,
				1515	u32 status, u32 data)
				1516	{
				1517	RING_IDX idx = vif->ctrl.rsp_prod_pvt;
				1518	struct xen_netif_ctrl_response rsp = {
				1519	.id = req->id,
				1520	.type = req->type,
				1521	.status = status,
				1522	.data = data,
				1523	};
				1524
				1525	*RING_GET_RESPONSE(&vif->ctrl, idx) = rsp;
				1526	vif->ctrl.rsp_prod_pvt = ++idx;
				1527	}
				1528
				1529	static void push_ctrl_response(struct xenvif *vif)
				1530	{
				1531	int notify;
				1532
				1533	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->ctrl, notify);
				1534	if (notify)
				1535	notify_remote_via_irq(vif->ctrl_irq);
				1536	}
				1537
				1538	static void process_ctrl_request(struct xenvif *vif,
				1539	const struct xen_netif_ctrl_request *req)
				1540	{
				1541	u32 status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED;
				1542	u32 data = 0;
				1543
				1544	switch (req->type) {
				1545	case XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM:
				1546	status = xenvif_set_hash_alg(vif, req->data[0]);
				1547	break;
				1548
				1549	case XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS:
				1550	status = xenvif_get_hash_flags(vif, &data);
				1551	break;
				1552
				1553	case XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS:
				1554	status = xenvif_set_hash_flags(vif, req->data[0]);
				1555	break;
				1556
				1557	case XEN_NETIF_CTRL_TYPE_SET_HASH_KEY:
				1558	status = xenvif_set_hash_key(vif, req->data[0],
				1559	req->data[1]);
				1560	break;
				1561
				1562	case XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE:
				1563	status = XEN_NETIF_CTRL_STATUS_SUCCESS;
				1564	data = XEN_NETBK_MAX_HASH_MAPPING_SIZE;
				1565	break;
				1566
				1567	case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE:
				1568	status = xenvif_set_hash_mapping_size(vif,
				1569	req->data[0]);
				1570	break;
				1571
				1572	case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING:
				1573	status = xenvif_set_hash_mapping(vif, req->data[0],
				1574	req->data[1],
				1575	req->data[2]);
				1576	break;
				1577
				1578	default:
				1579	break;
				1580	}
				1581
				1582	make_ctrl_response(vif, req, status, data);
				1583	push_ctrl_response(vif);
				1584	}
				1585
				1586	static void xenvif_ctrl_action(struct xenvif *vif)
				1587	{
				1588	for (;;) {
				1589	RING_IDX req_prod, req_cons;
				1590
				1591	req_prod = vif->ctrl.sring->req_prod;
				1592	req_cons = vif->ctrl.req_cons;
				1593
				1594	/* Make sure we can see requests before we process them. */
				1595	rmb();
				1596
				1597	if (req_cons == req_prod)
				1598	break;
				1599
				1600	while (req_cons != req_prod) {
				1601	struct xen_netif_ctrl_request req;
				1602
				1603	RING_COPY_REQUEST(&vif->ctrl, req_cons, &req);
				1604	req_cons++;
				1605
				1606	process_ctrl_request(vif, &req);
				1607	}
				1608
				1609	vif->ctrl.req_cons = req_cons;
				1610	vif->ctrl.sring->req_event = req_cons + 1;
				1611	}
				1612	}
				1613
				1614	static bool xenvif_ctrl_work_todo(struct xenvif *vif)
				1615	{
				1616	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->ctrl)))
				1617	return true;
				1618
				1619	return false;
				1620	}
				1621
				1622	irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data)
				1623	{
				1624	struct xenvif *vif = data;
				1625
				1626	while (xenvif_ctrl_work_todo(vif))
				1627	xenvif_ctrl_action(vif);
				1628
				1629	return IRQ_HANDLED;
				1630	}
				1631
				1632	static int __init netback_init(void)
				1633	{
				1634	int rc = 0;
				1635
				1636	if (!xen_domain())
				1637	return -ENODEV;
				1638
				1639	/* Allow as many queues as there are CPUs but max. 8 if user has not
				1640	* specified a value.
				1641	*/
				1642	if (xenvif_max_queues == 0)
				1643	xenvif_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
				1644	num_online_cpus());
				1645
				1646	if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
				1647	pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
				1648	fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
				1649	fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
				1650	}
				1651
				1652	rc = xenvif_xenbus_init();
				1653	if (rc)
				1654	goto failed_init;
				1655
				1656	#ifdef CONFIG_DEBUG_FS
				1657	xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1658	#endif /* CONFIG_DEBUG_FS */
				1659
				1660	return 0;
				1661
				1662	failed_init:
				1663	return rc;
				1664	}
				1665
				1666	module_init(netback_init);
				1667
				1668	static void __exit netback_fini(void)
				1669	{
				1670	#ifdef CONFIG_DEBUG_FS
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	1671	debugfs_remove_recursive(xen_netback_dbg_root);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1672	#endif /* CONFIG_DEBUG_FS */
				1673	xenvif_xenbus_fini();
				1674	}
				1675	module_exit(netback_fini);
				1676
				1677	MODULE_LICENSE("Dual BSD/GPL");
				1678	MODULE_ALIAS("xen-backend:vif");