Blame - drivers/net/xen-netback/netback.c - hafnium/third_party/linux

blob: c213f2b812691174f4a60e5a0a7e27df96e00f87 [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1	/*
				2	* Back-end of the driver for virtual network devices. This portion of the
				3	* driver exports a 'unified' network-device interface that can be accessed
				4	* by any operating system that implements a compatible front end. A
				5	* reference front-end implementation can be found in:
				6	* drivers/net/xen-netfront.c
				7	*
				8	* Copyright (c) 2002-2005, K A Fraser
				9	*
				10	* This program is free software; you can redistribute it and/or
				11	* modify it under the terms of the GNU General Public License version 2
				12	* as published by the Free Software Foundation; or, when distributed
				13	* separately from the Linux kernel or incorporated into other
				14	* software packages, subject to the following license:
				15	*
				16	* Permission is hereby granted, free of charge, to any person obtaining a copy
				17	* of this source file (the "Software"), to deal in the Software without
				18	* restriction, including without limitation the rights to use, copy, modify,
				19	* merge, publish, distribute, sublicense, and/or sell copies of the Software,
				20	* and to permit persons to whom the Software is furnished to do so, subject to
				21	* the following conditions:
				22	*
				23	* The above copyright notice and this permission notice shall be included in
				24	* all copies or substantial portions of the Software.
				25	*
				26	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				27	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				28	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				29	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				30	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				31	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				32	* IN THE SOFTWARE.
				33	*/
				34
				35	#include "common.h"
				36
				37	#include <linux/kthread.h>
				38	#include <linux/if_vlan.h>
				39	#include <linux/udp.h>
				40	#include <linux/highmem.h>
				41
				42	#include <net/tcp.h>
				43
				44	#include <xen/xen.h>
				45	#include <xen/events.h>
				46	#include <xen/interface/memory.h>
				47	#include <xen/page.h>
				48
				49	#include <asm/xen/hypercall.h>
				50
				51	/* Provide an option to disable split event channels at load time as
				52	* event channels are limited resource. Split event channels are
				53	* enabled by default.
				54	*/
				55	bool separate_tx_rx_irq = true;
				56	module_param(separate_tx_rx_irq, bool, 0644);
				57
				58	/* The time that packets can stay on the guest Rx internal queue
				59	* before they are dropped.
				60	*/
				61	unsigned int rx_drain_timeout_msecs = 10000;
				62	module_param(rx_drain_timeout_msecs, uint, 0444);
				63
				64	/* The length of time before the frontend is considered unresponsive
				65	* because it isn't providing Rx slots.
				66	*/
				67	unsigned int rx_stall_timeout_msecs = 60000;
				68	module_param(rx_stall_timeout_msecs, uint, 0444);
				69
				70	#define MAX_QUEUES_DEFAULT 8
				71	unsigned int xenvif_max_queues;
				72	module_param_named(max_queues, xenvif_max_queues, uint, 0644);
				73	MODULE_PARM_DESC(max_queues,
				74	"Maximum number of queues per virtual interface");
				75
				76	/*
				77	* This is the maximum slots a skb can have. If a guest sends a skb
				78	* which exceeds this limit it is considered malicious.
				79	*/
				80	#define FATAL_SKB_SLOTS_DEFAULT 20
				81	static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
				82	module_param(fatal_skb_slots, uint, 0444);
				83
				84	/* The amount to copy out of the first guest Tx slot into the skb's
				85	* linear area. If the first slot has more data, it will be mapped
				86	* and put into the first frag.
				87	*
				88	* This is sized to avoid pulling headers from the frags for most
				89	* TCP/IP packets.
				90	*/
				91	#define XEN_NETBACK_TX_COPY_LEN 128
				92
				93	/* This is the maximum number of flows in the hash cache. */
				94	#define XENVIF_HASH_CACHE_SIZE_DEFAULT 64
				95	unsigned int xenvif_hash_cache_size = XENVIF_HASH_CACHE_SIZE_DEFAULT;
				96	module_param_named(hash_cache_size, xenvif_hash_cache_size, uint, 0644);
				97	MODULE_PARM_DESC(hash_cache_size, "Number of flows in the hash cache");
				98
				99	static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
				100	u8 status);
				101
				102	static void make_tx_response(struct xenvif_queue *queue,
				103	struct xen_netif_tx_request *txp,
				104	unsigned int extra_count,
				105	s8 st);
				106	static void push_tx_responses(struct xenvif_queue *queue);
				107
				108	static inline int tx_work_todo(struct xenvif_queue *queue);
				109
				110	static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
				111	u16 idx)
				112	{
				113	return page_to_pfn(queue->mmap_pages[idx]);
				114	}
				115
				116	static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
				117	u16 idx)
				118	{
				119	return (unsigned long)pfn_to_kaddr(idx_to_pfn(queue, idx));
				120	}
				121
				122	#define callback_param(vif, pending_idx) \
				123	(vif->pending_tx_info[pending_idx].callback_struct)
				124
				125	/* Find the containing VIF's structure from a pointer in pending_tx_info array
				126	*/
				127	static inline struct xenvif_queue ubuf_to_queue(const struct ubuf_info ubuf)
				128	{
				129	u16 pending_idx = ubuf->desc;
				130	struct pending_tx_info *temp =
				131	container_of(ubuf, struct pending_tx_info, callback_struct);
				132	return container_of(temp - pending_idx,
				133	struct xenvif_queue,
				134	pending_tx_info[0]);
				135	}
				136
				137	static u16 frag_get_pending_idx(skb_frag_t *frag)
				138	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	139	return (u16)skb_frag_off(frag);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	140	}
				141
				142	static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
				143	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	144	skb_frag_off_set(frag, pending_idx);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	145	}
				146
				147	static inline pending_ring_idx_t pending_index(unsigned i)
				148	{
				149	return i & (MAX_PENDING_REQS-1);
				150	}
				151
				152	void xenvif_kick_thread(struct xenvif_queue *queue)
				153	{
				154	wake_up(&queue->wq);
				155	}
				156
				157	void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
				158	{
				159	int more_to_do;
				160
				161	RING_FINAL_CHECK_FOR_REQUESTS(&queue->tx, more_to_do);
				162
				163	if (more_to_do)
				164	napi_schedule(&queue->napi);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	165	else if (atomic_fetch_andnot(NETBK_TX_EOI \| NETBK_COMMON_EOI,
				166	&queue->eoi_pending) &
				167	(NETBK_TX_EOI \| NETBK_COMMON_EOI))
				168	xen_irq_lateeoi(queue->tx_irq, 0);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	169	}
				170
				171	static void tx_add_credit(struct xenvif_queue *queue)
				172	{
				173	unsigned long max_burst, max_credit;
				174
				175	/*
				176	* Allow a burst big enough to transmit a jumbo packet of up to 128kB.
				177	* Otherwise the interface can seize up due to insufficient credit.
				178	*/
				179	max_burst = max(131072UL, queue->credit_bytes);
				180
				181	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
				182	max_credit = queue->remaining_credit + queue->credit_bytes;
				183	if (max_credit < queue->remaining_credit)
				184	max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
				185
				186	queue->remaining_credit = min(max_credit, max_burst);
				187	queue->rate_limited = false;
				188	}
				189
				190	void xenvif_tx_credit_callback(struct timer_list *t)
				191	{
				192	struct xenvif_queue *queue = from_timer(queue, t, credit_timeout);
				193	tx_add_credit(queue);
				194	xenvif_napi_schedule_or_enable_events(queue);
				195	}
				196
				197	static void xenvif_tx_err(struct xenvif_queue *queue,
				198	struct xen_netif_tx_request *txp,
				199	unsigned int extra_count, RING_IDX end)
				200	{
				201	RING_IDX cons = queue->tx.req_cons;
				202	unsigned long flags;
				203
				204	do {
				205	spin_lock_irqsave(&queue->response_lock, flags);
				206	make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR);
				207	push_tx_responses(queue);
				208	spin_unlock_irqrestore(&queue->response_lock, flags);
				209	if (cons == end)
				210	break;
				211	RING_COPY_REQUEST(&queue->tx, cons++, txp);
				212	extra_count = 0; /* only the first frag can have extras */
				213	} while (1);
				214	queue->tx.req_cons = cons;
				215	}
				216
				217	static void xenvif_fatal_tx_err(struct xenvif *vif)
				218	{
				219	netdev_err(vif->dev, "fatal error; disabling device\n");
				220	vif->disabled = true;
				221	/* Disable the vif from queue 0's kthread */
				222	if (vif->num_queues)
				223	xenvif_kick_thread(&vif->queues[0]);
				224	}
				225
				226	static int xenvif_count_requests(struct xenvif_queue *queue,
				227	struct xen_netif_tx_request *first,
				228	unsigned int extra_count,
				229	struct xen_netif_tx_request *txp,
				230	int work_to_do)
				231	{
				232	RING_IDX cons = queue->tx.req_cons;
				233	int slots = 0;
				234	int drop_err = 0;
				235	int more_data;
				236
				237	if (!(first->flags & XEN_NETTXF_more_data))
				238	return 0;
				239
				240	do {
				241	struct xen_netif_tx_request dropped_tx = { 0 };
				242
				243	if (slots >= work_to_do) {
				244	netdev_err(queue->vif->dev,
				245	"Asked for %d slots but exceeds this limit\n",
				246	work_to_do);
				247	xenvif_fatal_tx_err(queue->vif);
				248	return -ENODATA;
				249	}
				250
				251	/* This guest is really using too many slots and
				252	* considered malicious.
				253	*/
				254	if (unlikely(slots >= fatal_skb_slots)) {
				255	netdev_err(queue->vif->dev,
				256	"Malicious frontend using %d slots, threshold %u\n",
				257	slots, fatal_skb_slots);
				258	xenvif_fatal_tx_err(queue->vif);
				259	return -E2BIG;
				260	}
				261
				262	/* Xen network protocol had implicit dependency on
				263	* MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
				264	* the historical MAX_SKB_FRAGS value 18 to honor the
				265	* same behavior as before. Any packet using more than
				266	* 18 slots but less than fatal_skb_slots slots is
				267	* dropped
				268	*/
				269	if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) {
				270	if (net_ratelimit())
				271	netdev_dbg(queue->vif->dev,
				272	"Too many slots (%d) exceeding limit (%d), dropping packet\n",
				273	slots, XEN_NETBK_LEGACY_SLOTS_MAX);
				274	drop_err = -E2BIG;
				275	}
				276
				277	if (drop_err)
				278	txp = &dropped_tx;
				279
				280	RING_COPY_REQUEST(&queue->tx, cons + slots, txp);
				281
				282	/* If the guest submitted a frame >= 64 KiB then
				283	* first->size overflowed and following slots will
				284	* appear to be larger than the frame.
				285	*
				286	* This cannot be fatal error as there are buggy
				287	* frontends that do this.
				288	*
				289	* Consume all slots and drop the packet.
				290	*/
				291	if (!drop_err && txp->size > first->size) {
				292	if (net_ratelimit())
				293	netdev_dbg(queue->vif->dev,
				294	"Invalid tx request, slot size %u > remaining size %u\n",
				295	txp->size, first->size);
				296	drop_err = -EIO;
				297	}
				298
				299	first->size -= txp->size;
				300	slots++;
				301
				302	if (unlikely((txp->offset + txp->size) > XEN_PAGE_SIZE)) {
				303	netdev_err(queue->vif->dev, "Cross page boundary, txp->offset: %u, size: %u\n",
				304	txp->offset, txp->size);
				305	xenvif_fatal_tx_err(queue->vif);
				306	return -EINVAL;
				307	}
				308
				309	more_data = txp->flags & XEN_NETTXF_more_data;
				310
				311	if (!drop_err)
				312	txp++;
				313
				314	} while (more_data);
				315
				316	if (drop_err) {
				317	xenvif_tx_err(queue, first, extra_count, cons + slots);
				318	return drop_err;
				319	}
				320
				321	return slots;
				322	}
				323
				324
				325	struct xenvif_tx_cb {
				326	u16 pending_idx;
				327	};
				328
				329	#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
				330
				331	static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
				332	u16 pending_idx,
				333	struct xen_netif_tx_request *txp,
				334	unsigned int extra_count,
				335	struct gnttab_map_grant_ref *mop)
				336	{
				337	queue->pages_to_map[mop-queue->tx_map_ops] = queue->mmap_pages[pending_idx];
				338	gnttab_set_map_op(mop, idx_to_kaddr(queue, pending_idx),
				339	GNTMAP_host_map \| GNTMAP_readonly,
				340	txp->gref, queue->vif->domid);
				341
				342	memcpy(&queue->pending_tx_info[pending_idx].req, txp,
				343	sizeof(*txp));
				344	queue->pending_tx_info[pending_idx].extra_count = extra_count;
				345	}
				346
				347	static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
				348	{
				349	struct sk_buff *skb =
				350	alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN,
				351	GFP_ATOMIC \| __GFP_NOWARN);
				352	if (unlikely(skb == NULL))
				353	return NULL;
				354
				355	/* Packets passed to netif_rx() must have some headroom. */
				356	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
				357
				358	/* Initialize it here to avoid later surprises */
				359	skb_shinfo(skb)->destructor_arg = NULL;
				360
				361	return skb;
				362	}
				363
				364	static struct gnttab_map_grant_ref xenvif_get_requests(struct xenvif_queue queue,
				365	struct sk_buff *skb,
				366	struct xen_netif_tx_request *txp,
				367	struct gnttab_map_grant_ref *gop,
				368	unsigned int frag_overflow,
				369	struct sk_buff *nskb)
				370	{
				371	struct skb_shared_info *shinfo = skb_shinfo(skb);
				372	skb_frag_t *frags = shinfo->frags;
				373	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
				374	int start;
				375	pending_ring_idx_t index;
				376	unsigned int nr_slots;
				377
				378	nr_slots = shinfo->nr_frags;
				379
				380	/* Skip first skb fragment if it is on same page as header fragment. */
				381	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
				382
				383	for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
				384	shinfo->nr_frags++, txp++, gop++) {
				385	index = pending_index(queue->pending_cons++);
				386	pending_idx = queue->pending_ring[index];
				387	xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop);
				388	frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
				389	}
				390
				391	if (frag_overflow) {
				392
				393	shinfo = skb_shinfo(nskb);
				394	frags = shinfo->frags;
				395
				396	for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
				397	shinfo->nr_frags++, txp++, gop++) {
				398	index = pending_index(queue->pending_cons++);
				399	pending_idx = queue->pending_ring[index];
				400	xenvif_tx_create_map_op(queue, pending_idx, txp, 0,
				401	gop);
				402	frag_set_pending_idx(&frags[shinfo->nr_frags],
				403	pending_idx);
				404	}
				405
				406	skb_shinfo(skb)->frag_list = nskb;
				407	}
				408
				409	return gop;
				410	}
				411
				412	static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
				413	u16 pending_idx,
				414	grant_handle_t handle)
				415	{
				416	if (unlikely(queue->grant_tx_handle[pending_idx] !=
				417	NETBACK_INVALID_HANDLE)) {
				418	netdev_err(queue->vif->dev,
				419	"Trying to overwrite active handle! pending_idx: 0x%x\n",
				420	pending_idx);
				421	BUG();
				422	}
				423	queue->grant_tx_handle[pending_idx] = handle;
				424	}
				425
				426	static inline void xenvif_grant_handle_reset(struct xenvif_queue *queue,
				427	u16 pending_idx)
				428	{
				429	if (unlikely(queue->grant_tx_handle[pending_idx] ==
				430	NETBACK_INVALID_HANDLE)) {
				431	netdev_err(queue->vif->dev,
				432	"Trying to unmap invalid handle! pending_idx: 0x%x\n",
				433	pending_idx);
				434	BUG();
				435	}
				436	queue->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
				437	}
				438
				439	static int xenvif_tx_check_gop(struct xenvif_queue *queue,
				440	struct sk_buff *skb,
				441	struct gnttab_map_grant_ref **gopp_map,
				442	struct gnttab_copy **gopp_copy)
				443	{
				444	struct gnttab_map_grant_ref gop_map = gopp_map;
				445	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
				446	/* This always points to the shinfo of the skb being checked, which
				447	* could be either the first or the one on the frag_list
				448	*/
				449	struct skb_shared_info *shinfo = skb_shinfo(skb);
				450	/* If this is non-NULL, we are currently checking the frag_list skb, and
				451	* this points to the shinfo of the first one
				452	*/
				453	struct skb_shared_info *first_shinfo = NULL;
				454	int nr_frags = shinfo->nr_frags;
				455	const bool sharedslot = nr_frags &&
				456	frag_get_pending_idx(&shinfo->frags[0]) == pending_idx;
				457	int i, err;
				458
				459	/* Check status of header. */
				460	err = (*gopp_copy)->status;
				461	if (unlikely(err)) {
				462	if (net_ratelimit())
				463	netdev_dbg(queue->vif->dev,
				464	"Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
				465	(*gopp_copy)->status,
				466	pending_idx,
				467	(*gopp_copy)->source.u.ref);
				468	/* The first frag might still have this slot mapped */
				469	if (!sharedslot)
				470	xenvif_idx_release(queue, pending_idx,
				471	XEN_NETIF_RSP_ERROR);
				472	}
				473	(*gopp_copy)++;
				474
				475	check_frags:
				476	for (i = 0; i < nr_frags; i++, gop_map++) {
				477	int j, newerr;
				478
				479	pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
				480
				481	/* Check error status: if okay then remember grant handle. */
				482	newerr = gop_map->status;
				483
				484	if (likely(!newerr)) {
				485	xenvif_grant_handle_set(queue,
				486	pending_idx,
				487	gop_map->handle);
				488	/* Had a previous error? Invalidate this fragment. */
				489	if (unlikely(err)) {
				490	xenvif_idx_unmap(queue, pending_idx);
				491	/* If the mapping of the first frag was OK, but
				492	* the header's copy failed, and they are
				493	* sharing a slot, send an error
				494	*/
				495	if (i == 0 && sharedslot)
				496	xenvif_idx_release(queue, pending_idx,
				497	XEN_NETIF_RSP_ERROR);
				498	else
				499	xenvif_idx_release(queue, pending_idx,
				500	XEN_NETIF_RSP_OKAY);
				501	}
				502	continue;
				503	}
				504
				505	/* Error on this fragment: respond to client with an error. */
				506	if (net_ratelimit())
				507	netdev_dbg(queue->vif->dev,
				508	"Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
				509	i,
				510	gop_map->status,
				511	pending_idx,
				512	gop_map->ref);
				513
				514	xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
				515
				516	/* Not the first error? Preceding frags already invalidated. */
				517	if (err)
				518	continue;
				519
				520	/* First error: if the header haven't shared a slot with the
				521	* first frag, release it as well.
				522	*/
				523	if (!sharedslot)
				524	xenvif_idx_release(queue,
				525	XENVIF_TX_CB(skb)->pending_idx,
				526	XEN_NETIF_RSP_OKAY);
				527
				528	/* Invalidate preceding fragments of this skb. */
				529	for (j = 0; j < i; j++) {
				530	pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
				531	xenvif_idx_unmap(queue, pending_idx);
				532	xenvif_idx_release(queue, pending_idx,
				533	XEN_NETIF_RSP_OKAY);
				534	}
				535
				536	/* And if we found the error while checking the frag_list, unmap
				537	* the first skb's frags
				538	*/
				539	if (first_shinfo) {
				540	for (j = 0; j < first_shinfo->nr_frags; j++) {
				541	pending_idx = frag_get_pending_idx(&first_shinfo->frags[j]);
				542	xenvif_idx_unmap(queue, pending_idx);
				543	xenvif_idx_release(queue, pending_idx,
				544	XEN_NETIF_RSP_OKAY);
				545	}
				546	}
				547
				548	/* Remember the error: invalidate all subsequent fragments. */
				549	err = newerr;
				550	}
				551
				552	if (skb_has_frag_list(skb) && !first_shinfo) {
				553	first_shinfo = skb_shinfo(skb);
				554	shinfo = skb_shinfo(skb_shinfo(skb)->frag_list);
				555	nr_frags = shinfo->nr_frags;
				556
				557	goto check_frags;
				558	}
				559
				560	*gopp_map = gop_map;
				561	return err;
				562	}
				563
				564	static void xenvif_fill_frags(struct xenvif_queue queue, struct sk_buff skb)
				565	{
				566	struct skb_shared_info *shinfo = skb_shinfo(skb);
				567	int nr_frags = shinfo->nr_frags;
				568	int i;
				569	u16 prev_pending_idx = INVALID_PENDING_IDX;
				570
				571	for (i = 0; i < nr_frags; i++) {
				572	skb_frag_t *frag = shinfo->frags + i;
				573	struct xen_netif_tx_request *txp;
				574	struct page *page;
				575	u16 pending_idx;
				576
				577	pending_idx = frag_get_pending_idx(frag);
				578
				579	/* If this is not the first frag, chain it to the previous*/
				580	if (prev_pending_idx == INVALID_PENDING_IDX)
				581	skb_shinfo(skb)->destructor_arg =
				582	&callback_param(queue, pending_idx);
				583	else
				584	callback_param(queue, prev_pending_idx).ctx =
				585	&callback_param(queue, pending_idx);
				586
				587	callback_param(queue, pending_idx).ctx = NULL;
				588	prev_pending_idx = pending_idx;
				589
				590	txp = &queue->pending_tx_info[pending_idx].req;
				591	page = virt_to_page(idx_to_kaddr(queue, pending_idx));
				592	__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
				593	skb->len += txp->size;
				594	skb->data_len += txp->size;
				595	skb->truesize += txp->size;
				596
				597	/* Take an extra reference to offset network stack's put_page */
				598	get_page(queue->mmap_pages[pending_idx]);
				599	}
				600	}
				601
				602	static int xenvif_get_extras(struct xenvif_queue *queue,
				603	struct xen_netif_extra_info *extras,
				604	unsigned int *extra_count,
				605	int work_to_do)
				606	{
				607	struct xen_netif_extra_info extra;
				608	RING_IDX cons = queue->tx.req_cons;
				609
				610	do {
				611	if (unlikely(work_to_do-- <= 0)) {
				612	netdev_err(queue->vif->dev, "Missing extra info\n");
				613	xenvif_fatal_tx_err(queue->vif);
				614	return -EBADR;
				615	}
				616
				617	RING_COPY_REQUEST(&queue->tx, cons, &extra);
				618
				619	queue->tx.req_cons = ++cons;
				620	(*extra_count)++;
				621
				622	if (unlikely(!extra.type \|\|
				623	extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
				624	netdev_err(queue->vif->dev,
				625	"Invalid extra type: %d\n", extra.type);
				626	xenvif_fatal_tx_err(queue->vif);
				627	return -EINVAL;
				628	}
				629
				630	memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
				631	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
				632
				633	return work_to_do;
				634	}
				635
				636	static int xenvif_set_skb_gso(struct xenvif *vif,
				637	struct sk_buff *skb,
				638	struct xen_netif_extra_info *gso)
				639	{
				640	if (!gso->u.gso.size) {
				641	netdev_err(vif->dev, "GSO size must not be zero.\n");
				642	xenvif_fatal_tx_err(vif);
				643	return -EINVAL;
				644	}
				645
				646	switch (gso->u.gso.type) {
				647	case XEN_NETIF_GSO_TYPE_TCPV4:
				648	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
				649	break;
				650	case XEN_NETIF_GSO_TYPE_TCPV6:
				651	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
				652	break;
				653	default:
				654	netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
				655	xenvif_fatal_tx_err(vif);
				656	return -EINVAL;
				657	}
				658
				659	skb_shinfo(skb)->gso_size = gso->u.gso.size;
				660	/* gso_segs will be calculated later */
				661
				662	return 0;
				663	}
				664
				665	static int checksum_setup(struct xenvif_queue queue, struct sk_buff skb)
				666	{
				667	bool recalculate_partial_csum = false;
				668
				669	/* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
				670	* peers can fail to set NETRXF_csum_blank when sending a GSO
				671	* frame. In this case force the SKB to CHECKSUM_PARTIAL and
				672	* recalculate the partial checksum.
				673	*/
				674	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
				675	queue->stats.rx_gso_checksum_fixup++;
				676	skb->ip_summed = CHECKSUM_PARTIAL;
				677	recalculate_partial_csum = true;
				678	}
				679
				680	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
				681	if (skb->ip_summed != CHECKSUM_PARTIAL)
				682	return 0;
				683
				684	return skb_checksum_setup(skb, recalculate_partial_csum);
				685	}
				686
				687	static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
				688	{
				689	u64 now = get_jiffies_64();
				690	u64 next_credit = queue->credit_window_start +
				691	msecs_to_jiffies(queue->credit_usec / 1000);
				692
				693	/* Timer could already be pending in rare cases. */
				694	if (timer_pending(&queue->credit_timeout)) {
				695	queue->rate_limited = true;
				696	return true;
				697	}
				698
				699	/* Passed the point where we can replenish credit? */
				700	if (time_after_eq64(now, next_credit)) {
				701	queue->credit_window_start = now;
				702	tx_add_credit(queue);
				703	}
				704
				705	/* Still too big to send right now? Set a callback. */
				706	if (size > queue->remaining_credit) {
				707	mod_timer(&queue->credit_timeout,
				708	next_credit);
				709	queue->credit_window_start = next_credit;
				710	queue->rate_limited = true;
				711
				712	return true;
				713	}
				714
				715	return false;
				716	}
				717
				718	/* No locking is required in xenvif_mcast_add/del() as they are
				719	* only ever invoked from NAPI poll. An RCU list is used because
				720	* xenvif_mcast_match() is called asynchronously, during start_xmit.
				721	*/
				722
				723	static int xenvif_mcast_add(struct xenvif vif, const u8 addr)
				724	{
				725	struct xenvif_mcast_addr *mcast;
				726
				727	if (vif->fe_mcast_count == XEN_NETBK_MCAST_MAX) {
				728	if (net_ratelimit())
				729	netdev_err(vif->dev,
				730	"Too many multicast addresses\n");
				731	return -ENOSPC;
				732	}
				733
				734	mcast = kzalloc(sizeof(*mcast), GFP_ATOMIC);
				735	if (!mcast)
				736	return -ENOMEM;
				737
				738	ether_addr_copy(mcast->addr, addr);
				739	list_add_tail_rcu(&mcast->entry, &vif->fe_mcast_addr);
				740	vif->fe_mcast_count++;
				741
				742	return 0;
				743	}
				744
				745	static void xenvif_mcast_del(struct xenvif vif, const u8 addr)
				746	{
				747	struct xenvif_mcast_addr *mcast;
				748
				749	list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) {
				750	if (ether_addr_equal(addr, mcast->addr)) {
				751	--vif->fe_mcast_count;
				752	list_del_rcu(&mcast->entry);
				753	kfree_rcu(mcast, rcu);
				754	break;
				755	}
				756	}
				757	}
				758
				759	bool xenvif_mcast_match(struct xenvif vif, const u8 addr)
				760	{
				761	struct xenvif_mcast_addr *mcast;
				762
				763	rcu_read_lock();
				764	list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) {
				765	if (ether_addr_equal(addr, mcast->addr)) {
				766	rcu_read_unlock();
				767	return true;
				768	}
				769	}
				770	rcu_read_unlock();
				771
				772	return false;
				773	}
				774
				775	void xenvif_mcast_addr_list_free(struct xenvif *vif)
				776	{
				777	/* No need for locking or RCU here. NAPI poll and TX queue
				778	* are stopped.
				779	*/
				780	while (!list_empty(&vif->fe_mcast_addr)) {
				781	struct xenvif_mcast_addr *mcast;
				782
				783	mcast = list_first_entry(&vif->fe_mcast_addr,
				784	struct xenvif_mcast_addr,
				785	entry);
				786	--vif->fe_mcast_count;
				787	list_del(&mcast->entry);
				788	kfree(mcast);
				789	}
				790	}
				791
				792	static void xenvif_tx_build_gops(struct xenvif_queue *queue,
				793	int budget,
				794	unsigned *copy_ops,
				795	unsigned *map_ops)
				796	{
				797	struct gnttab_map_grant_ref *gop = queue->tx_map_ops;
				798	struct sk_buff skb, nskb;
				799	int ret;
				800	unsigned int frag_overflow;
				801
				802	while (skb_queue_len(&queue->tx_queue) < budget) {
				803	struct xen_netif_tx_request txreq;
				804	struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
				805	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
				806	unsigned int extra_count;
				807	u16 pending_idx;
				808	RING_IDX idx;
				809	int work_to_do;
				810	unsigned int data_len;
				811	pending_ring_idx_t index;
				812
				813	if (queue->tx.sring->req_prod - queue->tx.req_cons >
				814	XEN_NETIF_TX_RING_SIZE) {
				815	netdev_err(queue->vif->dev,
				816	"Impossible number of requests. "
				817	"req_prod %d, req_cons %d, size %ld\n",
				818	queue->tx.sring->req_prod, queue->tx.req_cons,
				819	XEN_NETIF_TX_RING_SIZE);
				820	xenvif_fatal_tx_err(queue->vif);
				821	break;
				822	}
				823
				824	work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx);
				825	if (!work_to_do)
				826	break;
				827
				828	idx = queue->tx.req_cons;
				829	rmb(); /* Ensure that we see the request before we copy it. */
				830	RING_COPY_REQUEST(&queue->tx, idx, &txreq);
				831
				832	/* Credit-based scheduling. */
				833	if (txreq.size > queue->remaining_credit &&
				834	tx_credit_exceeded(queue, txreq.size))
				835	break;
				836
				837	queue->remaining_credit -= txreq.size;
				838
				839	work_to_do--;
				840	queue->tx.req_cons = ++idx;
				841
				842	memset(extras, 0, sizeof(extras));
				843	extra_count = 0;
				844	if (txreq.flags & XEN_NETTXF_extra_info) {
				845	work_to_do = xenvif_get_extras(queue, extras,
				846	&extra_count,
				847	work_to_do);
				848	idx = queue->tx.req_cons;
				849	if (unlikely(work_to_do < 0))
				850	break;
				851	}
				852
				853	if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1].type) {
				854	struct xen_netif_extra_info *extra;
				855
				856	extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1];
				857	ret = xenvif_mcast_add(queue->vif, extra->u.mcast.addr);
				858
				859	make_tx_response(queue, &txreq, extra_count,
				860	(ret == 0) ?
				861	XEN_NETIF_RSP_OKAY :
				862	XEN_NETIF_RSP_ERROR);
				863	push_tx_responses(queue);
				864	continue;
				865	}
				866
				867	if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1].type) {
				868	struct xen_netif_extra_info *extra;
				869
				870	extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1];
				871	xenvif_mcast_del(queue->vif, extra->u.mcast.addr);
				872
				873	make_tx_response(queue, &txreq, extra_count,
				874	XEN_NETIF_RSP_OKAY);
				875	push_tx_responses(queue);
				876	continue;
				877	}
				878
				879	ret = xenvif_count_requests(queue, &txreq, extra_count,
				880	txfrags, work_to_do);
				881	if (unlikely(ret < 0))
				882	break;
				883
				884	idx += ret;
				885
				886	if (unlikely(txreq.size < ETH_HLEN)) {
				887	netdev_dbg(queue->vif->dev,
				888	"Bad packet size: %d\n", txreq.size);
				889	xenvif_tx_err(queue, &txreq, extra_count, idx);
				890	break;
				891	}
				892
				893	/* No crossing a page as the payload mustn't fragment. */
				894	if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) {
				895	netdev_err(queue->vif->dev,
				896	"txreq.offset: %u, size: %u, end: %lu\n",
				897	txreq.offset, txreq.size,
				898	(unsigned long)(txreq.offset&~XEN_PAGE_MASK) + txreq.size);
				899	xenvif_fatal_tx_err(queue->vif);
				900	break;
				901	}
				902
				903	index = pending_index(queue->pending_cons);
				904	pending_idx = queue->pending_ring[index];
				905
				906	data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN &&
				907	ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
				908	XEN_NETBACK_TX_COPY_LEN : txreq.size;
				909
				910	skb = xenvif_alloc_skb(data_len);
				911	if (unlikely(skb == NULL)) {
				912	netdev_dbg(queue->vif->dev,
				913	"Can't allocate a skb in start_xmit.\n");
				914	xenvif_tx_err(queue, &txreq, extra_count, idx);
				915	break;
				916	}
				917
				918	skb_shinfo(skb)->nr_frags = ret;
				919	if (data_len < txreq.size)
				920	skb_shinfo(skb)->nr_frags++;
				921	/* At this point shinfo->nr_frags is in fact the number of
				922	* slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
				923	*/
				924	frag_overflow = 0;
				925	nskb = NULL;
				926	if (skb_shinfo(skb)->nr_frags > MAX_SKB_FRAGS) {
				927	frag_overflow = skb_shinfo(skb)->nr_frags - MAX_SKB_FRAGS;
				928	BUG_ON(frag_overflow > MAX_SKB_FRAGS);
				929	skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS;
				930	nskb = xenvif_alloc_skb(0);
				931	if (unlikely(nskb == NULL)) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	932	skb_shinfo(skb)->nr_frags = 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	933	kfree_skb(skb);
				934	xenvif_tx_err(queue, &txreq, extra_count, idx);
				935	if (net_ratelimit())
				936	netdev_err(queue->vif->dev,
				937	"Can't allocate the frag_list skb.\n");
				938	break;
				939	}
				940	}
				941
				942	if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
				943	struct xen_netif_extra_info *gso;
				944	gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
				945
				946	if (xenvif_set_skb_gso(queue->vif, skb, gso)) {
				947	/* Failure in xenvif_set_skb_gso is fatal. */
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	948	skb_shinfo(skb)->nr_frags = 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	949	kfree_skb(skb);
				950	kfree_skb(nskb);
				951	break;
				952	}
				953	}
				954
				955	if (extras[XEN_NETIF_EXTRA_TYPE_HASH - 1].type) {
				956	struct xen_netif_extra_info *extra;
				957	enum pkt_hash_types type = PKT_HASH_TYPE_NONE;
				958
				959	extra = &extras[XEN_NETIF_EXTRA_TYPE_HASH - 1];
				960
				961	switch (extra->u.hash.type) {
				962	case _XEN_NETIF_CTRL_HASH_TYPE_IPV4:
				963	case _XEN_NETIF_CTRL_HASH_TYPE_IPV6:
				964	type = PKT_HASH_TYPE_L3;
				965	break;
				966
				967	case _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP:
				968	case _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP:
				969	type = PKT_HASH_TYPE_L4;
				970	break;
				971
				972	default:
				973	break;
				974	}
				975
				976	if (type != PKT_HASH_TYPE_NONE)
				977	skb_set_hash(skb,
				978	(u32 )extra->u.hash.value,
				979	type);
				980	}
				981
				982	XENVIF_TX_CB(skb)->pending_idx = pending_idx;
				983
				984	__skb_put(skb, data_len);
				985	queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
				986	queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid;
				987	queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
				988
				989	queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
				990	virt_to_gfn(skb->data);
				991	queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
				992	queue->tx_copy_ops[*copy_ops].dest.offset =
				993	offset_in_page(skb->data) & ~XEN_PAGE_MASK;
				994
				995	queue->tx_copy_ops[*copy_ops].len = data_len;
				996	queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
				997
				998	(*copy_ops)++;
				999
				1000	if (data_len < txreq.size) {
				1001	frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
				1002	pending_idx);
				1003	xenvif_tx_create_map_op(queue, pending_idx, &txreq,
				1004	extra_count, gop);
				1005	gop++;
				1006	} else {
				1007	frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
				1008	INVALID_PENDING_IDX);
				1009	memcpy(&queue->pending_tx_info[pending_idx].req,
				1010	&txreq, sizeof(txreq));
				1011	queue->pending_tx_info[pending_idx].extra_count =
				1012	extra_count;
				1013	}
				1014
				1015	queue->pending_cons++;
				1016
				1017	gop = xenvif_get_requests(queue, skb, txfrags, gop,
				1018	frag_overflow, nskb);
				1019
				1020	__skb_queue_tail(&queue->tx_queue, skb);
				1021
				1022	queue->tx.req_cons = idx;
				1023
				1024	if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) \|\|
				1025	(*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
				1026	break;
				1027	}
				1028
				1029	(*map_ops) = gop - queue->tx_map_ops;
				1030	return;
				1031	}
				1032
				1033	/* Consolidate skb with a frag_list into a brand new one with local pages on
				1034	* frags. Returns 0 or -ENOMEM if can't allocate new pages.
				1035	*/
				1036	static int xenvif_handle_frag_list(struct xenvif_queue queue, struct sk_buff skb)
				1037	{
				1038	unsigned int offset = skb_headlen(skb);
				1039	skb_frag_t frags[MAX_SKB_FRAGS];
				1040	int i, f;
				1041	struct ubuf_info *uarg;
				1042	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
				1043
				1044	queue->stats.tx_zerocopy_sent += 2;
				1045	queue->stats.tx_frag_overflow++;
				1046
				1047	xenvif_fill_frags(queue, nskb);
				1048	/* Subtract frags size, we will correct it later */
				1049	skb->truesize -= skb->data_len;
				1050	skb->len += nskb->len;
				1051	skb->data_len += nskb->len;
				1052
				1053	/* create a brand new frags array and coalesce there */
				1054	for (i = 0; offset < skb->len; i++) {
				1055	struct page *page;
				1056	unsigned int len;
				1057
				1058	BUG_ON(i >= MAX_SKB_FRAGS);
				1059	page = alloc_page(GFP_ATOMIC);
				1060	if (!page) {
				1061	int j;
				1062	skb->truesize += skb->data_len;
				1063	for (j = 0; j < i; j++)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1064	put_page(skb_frag_page(&frags[j]));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1065	return -ENOMEM;
				1066	}
				1067
				1068	if (offset + PAGE_SIZE < skb->len)
				1069	len = PAGE_SIZE;
				1070	else
				1071	len = skb->len - offset;
				1072	if (skb_copy_bits(skb, offset, page_address(page), len))
				1073	BUG();
				1074
				1075	offset += len;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1076	__skb_frag_set_page(&frags[i], page);
				1077	skb_frag_off_set(&frags[i], 0);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1078	skb_frag_size_set(&frags[i], len);
				1079	}
				1080
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1081	/* Release all the original (foreign) frags. */
				1082	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
				1083	skb_frag_unref(skb, f);
				1084	uarg = skb_shinfo(skb)->destructor_arg;
				1085	/* increase inflight counter to offset decrement in callback */
				1086	atomic_inc(&queue->inflight_packets);
				1087	uarg->callback(uarg, true);
				1088	skb_shinfo(skb)->destructor_arg = NULL;
				1089
				1090	/* Fill the skb with the new (local) frags. */
				1091	memcpy(skb_shinfo(skb)->frags, frags, i * sizeof(skb_frag_t));
				1092	skb_shinfo(skb)->nr_frags = i;
				1093	skb->truesize += i * PAGE_SIZE;
				1094
				1095	return 0;
				1096	}
				1097
				1098	static int xenvif_tx_submit(struct xenvif_queue *queue)
				1099	{
				1100	struct gnttab_map_grant_ref *gop_map = queue->tx_map_ops;
				1101	struct gnttab_copy *gop_copy = queue->tx_copy_ops;
				1102	struct sk_buff *skb;
				1103	int work_done = 0;
				1104
				1105	while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
				1106	struct xen_netif_tx_request *txp;
				1107	u16 pending_idx;
				1108	unsigned data_len;
				1109
				1110	pending_idx = XENVIF_TX_CB(skb)->pending_idx;
				1111	txp = &queue->pending_tx_info[pending_idx].req;
				1112
				1113	/* Check the remap error code. */
				1114	if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) {
				1115	/* If there was an error, xenvif_tx_check_gop is
				1116	* expected to release all the frags which were mapped,
				1117	* so kfree_skb shouldn't do it again
				1118	*/
				1119	skb_shinfo(skb)->nr_frags = 0;
				1120	if (skb_has_frag_list(skb)) {
				1121	struct sk_buff *nskb =
				1122	skb_shinfo(skb)->frag_list;
				1123	skb_shinfo(nskb)->nr_frags = 0;
				1124	}
				1125	kfree_skb(skb);
				1126	continue;
				1127	}
				1128
				1129	data_len = skb->len;
				1130	callback_param(queue, pending_idx).ctx = NULL;
				1131	if (data_len < txp->size) {
				1132	/* Append the packet payload as a fragment. */
				1133	txp->offset += data_len;
				1134	txp->size -= data_len;
				1135	} else {
				1136	/* Schedule a response immediately. */
				1137	xenvif_idx_release(queue, pending_idx,
				1138	XEN_NETIF_RSP_OKAY);
				1139	}
				1140
				1141	if (txp->flags & XEN_NETTXF_csum_blank)
				1142	skb->ip_summed = CHECKSUM_PARTIAL;
				1143	else if (txp->flags & XEN_NETTXF_data_validated)
				1144	skb->ip_summed = CHECKSUM_UNNECESSARY;
				1145
				1146	xenvif_fill_frags(queue, skb);
				1147
				1148	if (unlikely(skb_has_frag_list(skb))) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1149	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
				1150	xenvif_skb_zerocopy_prepare(queue, nskb);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1151	if (xenvif_handle_frag_list(queue, skb)) {
				1152	if (net_ratelimit())
				1153	netdev_err(queue->vif->dev,
				1154	"Not enough memory to consolidate frag_list!\n");
				1155	xenvif_skb_zerocopy_prepare(queue, skb);
				1156	kfree_skb(skb);
				1157	continue;
				1158	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1159	/* Copied all the bits from the frag list -- free it. */
				1160	skb_frag_list_init(skb);
				1161	kfree_skb(nskb);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1162	}
				1163
				1164	skb->dev = queue->vif->dev;
				1165	skb->protocol = eth_type_trans(skb, skb->dev);
				1166	skb_reset_network_header(skb);
				1167
				1168	if (checksum_setup(queue, skb)) {
				1169	netdev_dbg(queue->vif->dev,
				1170	"Can't setup checksum in net_tx_action\n");
				1171	/* We have to set this flag to trigger the callback */
				1172	if (skb_shinfo(skb)->destructor_arg)
				1173	xenvif_skb_zerocopy_prepare(queue, skb);
				1174	kfree_skb(skb);
				1175	continue;
				1176	}
				1177
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1178	skb_probe_transport_header(skb);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1179
				1180	/* If the packet is GSO then we will have just set up the
				1181	* transport header offset in checksum_setup so it's now
				1182	* straightforward to calculate gso_segs.
				1183	*/
				1184	if (skb_is_gso(skb)) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1185	int mss, hdrlen;
				1186
				1187	/* GSO implies having the L4 header. */
				1188	WARN_ON_ONCE(!skb_transport_header_was_set(skb));
				1189	if (unlikely(!skb_transport_header_was_set(skb))) {
				1190	kfree_skb(skb);
				1191	continue;
				1192	}
				1193
				1194	mss = skb_shinfo(skb)->gso_size;
				1195	hdrlen = skb_transport_header(skb) -
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1196	skb_mac_header(skb) +
				1197	tcp_hdrlen(skb);
				1198
				1199	skb_shinfo(skb)->gso_segs =
				1200	DIV_ROUND_UP(skb->len - hdrlen, mss);
				1201	}
				1202
				1203	queue->stats.rx_bytes += skb->len;
				1204	queue->stats.rx_packets++;
				1205
				1206	work_done++;
				1207
				1208	/* Set this flag right before netif_receive_skb, otherwise
				1209	* someone might think this packet already left netback, and
				1210	* do a skb_copy_ubufs while we are still in control of the
				1211	* skb. E.g. the __pskb_pull_tail earlier can do such thing.
				1212	*/
				1213	if (skb_shinfo(skb)->destructor_arg) {
				1214	xenvif_skb_zerocopy_prepare(queue, skb);
				1215	queue->stats.tx_zerocopy_sent++;
				1216	}
				1217
				1218	netif_receive_skb(skb);
				1219	}
				1220
				1221	return work_done;
				1222	}
				1223
				1224	void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
				1225	{
				1226	unsigned long flags;
				1227	pending_ring_idx_t index;
				1228	struct xenvif_queue *queue = ubuf_to_queue(ubuf);
				1229
				1230	/* This is the only place where we grab this lock, to protect callbacks
				1231	* from each other.
				1232	*/
				1233	spin_lock_irqsave(&queue->callback_lock, flags);
				1234	do {
				1235	u16 pending_idx = ubuf->desc;
				1236	ubuf = (struct ubuf_info *) ubuf->ctx;
				1237	BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
				1238	MAX_PENDING_REQS);
				1239	index = pending_index(queue->dealloc_prod);
				1240	queue->dealloc_ring[index] = pending_idx;
				1241	/* Sync with xenvif_tx_dealloc_action:
				1242	* insert idx then incr producer.
				1243	*/
				1244	smp_wmb();
				1245	queue->dealloc_prod++;
				1246	} while (ubuf);
				1247	spin_unlock_irqrestore(&queue->callback_lock, flags);
				1248
				1249	if (likely(zerocopy_success))
				1250	queue->stats.tx_zerocopy_success++;
				1251	else
				1252	queue->stats.tx_zerocopy_fail++;
				1253	xenvif_skb_zerocopy_complete(queue);
				1254	}
				1255
				1256	static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
				1257	{
				1258	struct gnttab_unmap_grant_ref *gop;
				1259	pending_ring_idx_t dc, dp;
				1260	u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
				1261	unsigned int i = 0;
				1262
				1263	dc = queue->dealloc_cons;
				1264	gop = queue->tx_unmap_ops;
				1265
				1266	/* Free up any grants we have finished using */
				1267	do {
				1268	dp = queue->dealloc_prod;
				1269
				1270	/* Ensure we see all indices enqueued by all
				1271	* xenvif_zerocopy_callback().
				1272	*/
				1273	smp_rmb();
				1274
				1275	while (dc != dp) {
				1276	BUG_ON(gop - queue->tx_unmap_ops >= MAX_PENDING_REQS);
				1277	pending_idx =
				1278	queue->dealloc_ring[pending_index(dc++)];
				1279
				1280	pending_idx_release[gop - queue->tx_unmap_ops] =
				1281	pending_idx;
				1282	queue->pages_to_unmap[gop - queue->tx_unmap_ops] =
				1283	queue->mmap_pages[pending_idx];
				1284	gnttab_set_unmap_op(gop,
				1285	idx_to_kaddr(queue, pending_idx),
				1286	GNTMAP_host_map,
				1287	queue->grant_tx_handle[pending_idx]);
				1288	xenvif_grant_handle_reset(queue, pending_idx);
				1289	++gop;
				1290	}
				1291
				1292	} while (dp != queue->dealloc_prod);
				1293
				1294	queue->dealloc_cons = dc;
				1295
				1296	if (gop - queue->tx_unmap_ops > 0) {
				1297	int ret;
				1298	ret = gnttab_unmap_refs(queue->tx_unmap_ops,
				1299	NULL,
				1300	queue->pages_to_unmap,
				1301	gop - queue->tx_unmap_ops);
				1302	if (ret) {
				1303	netdev_err(queue->vif->dev, "Unmap fail: nr_ops %tu ret %d\n",
				1304	gop - queue->tx_unmap_ops, ret);
				1305	for (i = 0; i < gop - queue->tx_unmap_ops; ++i) {
				1306	if (gop[i].status != GNTST_okay)
				1307	netdev_err(queue->vif->dev,
				1308	" host_addr: 0x%llx handle: 0x%x status: %d\n",
				1309	gop[i].host_addr,
				1310	gop[i].handle,
				1311	gop[i].status);
				1312	}
				1313	BUG();
				1314	}
				1315	}
				1316
				1317	for (i = 0; i < gop - queue->tx_unmap_ops; ++i)
				1318	xenvif_idx_release(queue, pending_idx_release[i],
				1319	XEN_NETIF_RSP_OKAY);
				1320	}
				1321
				1322
				1323	/* Called after netfront has transmitted */
				1324	int xenvif_tx_action(struct xenvif_queue *queue, int budget)
				1325	{
				1326	unsigned nr_mops, nr_cops = 0;
				1327	int work_done, ret;
				1328
				1329	if (unlikely(!tx_work_todo(queue)))
				1330	return 0;
				1331
				1332	xenvif_tx_build_gops(queue, budget, &nr_cops, &nr_mops);
				1333
				1334	if (nr_cops == 0)
				1335	return 0;
				1336
				1337	gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
				1338	if (nr_mops != 0) {
				1339	ret = gnttab_map_refs(queue->tx_map_ops,
				1340	NULL,
				1341	queue->pages_to_map,
				1342	nr_mops);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1343	if (ret) {
				1344	unsigned int i;
				1345
				1346	netdev_err(queue->vif->dev, "Map fail: nr %u ret %d\n",
				1347	nr_mops, ret);
				1348	for (i = 0; i < nr_mops; ++i)
				1349	WARN_ON_ONCE(queue->tx_map_ops[i].status ==
				1350	GNTST_okay);
				1351	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1352	}
				1353
				1354	work_done = xenvif_tx_submit(queue);
				1355
				1356	return work_done;
				1357	}
				1358
				1359	static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
				1360	u8 status)
				1361	{
				1362	struct pending_tx_info *pending_tx_info;
				1363	pending_ring_idx_t index;
				1364	unsigned long flags;
				1365
				1366	pending_tx_info = &queue->pending_tx_info[pending_idx];
				1367
				1368	spin_lock_irqsave(&queue->response_lock, flags);
				1369
				1370	make_tx_response(queue, &pending_tx_info->req,
				1371	pending_tx_info->extra_count, status);
				1372
				1373	/* Release the pending index before pusing the Tx response so
				1374	* its available before a new Tx request is pushed by the
				1375	* frontend.
				1376	*/
				1377	index = pending_index(queue->pending_prod++);
				1378	queue->pending_ring[index] = pending_idx;
				1379
				1380	push_tx_responses(queue);
				1381
				1382	spin_unlock_irqrestore(&queue->response_lock, flags);
				1383	}
				1384
				1385
				1386	static void make_tx_response(struct xenvif_queue *queue,
				1387	struct xen_netif_tx_request *txp,
				1388	unsigned int extra_count,
				1389	s8 st)
				1390	{
				1391	RING_IDX i = queue->tx.rsp_prod_pvt;
				1392	struct xen_netif_tx_response *resp;
				1393
				1394	resp = RING_GET_RESPONSE(&queue->tx, i);
				1395	resp->id = txp->id;
				1396	resp->status = st;
				1397
				1398	while (extra_count-- != 0)
				1399	RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
				1400
				1401	queue->tx.rsp_prod_pvt = ++i;
				1402	}
				1403
				1404	static void push_tx_responses(struct xenvif_queue *queue)
				1405	{
				1406	int notify;
				1407
				1408	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
				1409	if (notify)
				1410	notify_remote_via_irq(queue->tx_irq);
				1411	}
				1412
				1413	void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
				1414	{
				1415	int ret;
				1416	struct gnttab_unmap_grant_ref tx_unmap_op;
				1417
				1418	gnttab_set_unmap_op(&tx_unmap_op,
				1419	idx_to_kaddr(queue, pending_idx),
				1420	GNTMAP_host_map,
				1421	queue->grant_tx_handle[pending_idx]);
				1422	xenvif_grant_handle_reset(queue, pending_idx);
				1423
				1424	ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
				1425	&queue->mmap_pages[pending_idx], 1);
				1426	if (ret) {
				1427	netdev_err(queue->vif->dev,
				1428	"Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: 0x%x status: %d\n",
				1429	ret,
				1430	pending_idx,
				1431	tx_unmap_op.host_addr,
				1432	tx_unmap_op.handle,
				1433	tx_unmap_op.status);
				1434	BUG();
				1435	}
				1436	}
				1437
				1438	static inline int tx_work_todo(struct xenvif_queue *queue)
				1439	{
				1440	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
				1441	return 1;
				1442
				1443	return 0;
				1444	}
				1445
				1446	static inline bool tx_dealloc_work_todo(struct xenvif_queue *queue)
				1447	{
				1448	return queue->dealloc_cons != queue->dealloc_prod;
				1449	}
				1450
				1451	void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue)
				1452	{
				1453	if (queue->tx.sring)
				1454	xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
				1455	queue->tx.sring);
				1456	if (queue->rx.sring)
				1457	xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
				1458	queue->rx.sring);
				1459	}
				1460
				1461	int xenvif_map_frontend_data_rings(struct xenvif_queue *queue,
				1462	grant_ref_t tx_ring_ref,
				1463	grant_ref_t rx_ring_ref)
				1464	{
				1465	void *addr;
				1466	struct xen_netif_tx_sring *txs;
				1467	struct xen_netif_rx_sring *rxs;
				1468
				1469	int err = -ENOMEM;
				1470
				1471	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
				1472	&tx_ring_ref, 1, &addr);
				1473	if (err)
				1474	goto err;
				1475
				1476	txs = (struct xen_netif_tx_sring *)addr;
				1477	BACK_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
				1478
				1479	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
				1480	&rx_ring_ref, 1, &addr);
				1481	if (err)
				1482	goto err;
				1483
				1484	rxs = (struct xen_netif_rx_sring *)addr;
				1485	BACK_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
				1486
				1487	return 0;
				1488
				1489	err:
				1490	xenvif_unmap_frontend_data_rings(queue);
				1491	return err;
				1492	}
				1493
				1494	static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
				1495	{
				1496	/* Dealloc thread must remain running until all inflight
				1497	* packets complete.
				1498	*/
				1499	return kthread_should_stop() &&
				1500	!atomic_read(&queue->inflight_packets);
				1501	}
				1502
				1503	int xenvif_dealloc_kthread(void *data)
				1504	{
				1505	struct xenvif_queue *queue = data;
				1506
				1507	for (;;) {
				1508	wait_event_interruptible(queue->dealloc_wq,
				1509	tx_dealloc_work_todo(queue) \|\|
				1510	xenvif_dealloc_kthread_should_stop(queue));
				1511	if (xenvif_dealloc_kthread_should_stop(queue))
				1512	break;
				1513
				1514	xenvif_tx_dealloc_action(queue);
				1515	cond_resched();
				1516	}
				1517
				1518	/* Unmap anything remaining*/
				1519	if (tx_dealloc_work_todo(queue))
				1520	xenvif_tx_dealloc_action(queue);
				1521
				1522	return 0;
				1523	}
				1524
				1525	static void make_ctrl_response(struct xenvif *vif,
				1526	const struct xen_netif_ctrl_request *req,
				1527	u32 status, u32 data)
				1528	{
				1529	RING_IDX idx = vif->ctrl.rsp_prod_pvt;
				1530	struct xen_netif_ctrl_response rsp = {
				1531	.id = req->id,
				1532	.type = req->type,
				1533	.status = status,
				1534	.data = data,
				1535	};
				1536
				1537	*RING_GET_RESPONSE(&vif->ctrl, idx) = rsp;
				1538	vif->ctrl.rsp_prod_pvt = ++idx;
				1539	}
				1540
				1541	static void push_ctrl_response(struct xenvif *vif)
				1542	{
				1543	int notify;
				1544
				1545	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->ctrl, notify);
				1546	if (notify)
				1547	notify_remote_via_irq(vif->ctrl_irq);
				1548	}
				1549
				1550	static void process_ctrl_request(struct xenvif *vif,
				1551	const struct xen_netif_ctrl_request *req)
				1552	{
				1553	u32 status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED;
				1554	u32 data = 0;
				1555
				1556	switch (req->type) {
				1557	case XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM:
				1558	status = xenvif_set_hash_alg(vif, req->data[0]);
				1559	break;
				1560
				1561	case XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS:
				1562	status = xenvif_get_hash_flags(vif, &data);
				1563	break;
				1564
				1565	case XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS:
				1566	status = xenvif_set_hash_flags(vif, req->data[0]);
				1567	break;
				1568
				1569	case XEN_NETIF_CTRL_TYPE_SET_HASH_KEY:
				1570	status = xenvif_set_hash_key(vif, req->data[0],
				1571	req->data[1]);
				1572	break;
				1573
				1574	case XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE:
				1575	status = XEN_NETIF_CTRL_STATUS_SUCCESS;
				1576	data = XEN_NETBK_MAX_HASH_MAPPING_SIZE;
				1577	break;
				1578
				1579	case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE:
				1580	status = xenvif_set_hash_mapping_size(vif,
				1581	req->data[0]);
				1582	break;
				1583
				1584	case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING:
				1585	status = xenvif_set_hash_mapping(vif, req->data[0],
				1586	req->data[1],
				1587	req->data[2]);
				1588	break;
				1589
				1590	default:
				1591	break;
				1592	}
				1593
				1594	make_ctrl_response(vif, req, status, data);
				1595	push_ctrl_response(vif);
				1596	}
				1597
				1598	static void xenvif_ctrl_action(struct xenvif *vif)
				1599	{
				1600	for (;;) {
				1601	RING_IDX req_prod, req_cons;
				1602
				1603	req_prod = vif->ctrl.sring->req_prod;
				1604	req_cons = vif->ctrl.req_cons;
				1605
				1606	/* Make sure we can see requests before we process them. */
				1607	rmb();
				1608
				1609	if (req_cons == req_prod)
				1610	break;
				1611
				1612	while (req_cons != req_prod) {
				1613	struct xen_netif_ctrl_request req;
				1614
				1615	RING_COPY_REQUEST(&vif->ctrl, req_cons, &req);
				1616	req_cons++;
				1617
				1618	process_ctrl_request(vif, &req);
				1619	}
				1620
				1621	vif->ctrl.req_cons = req_cons;
				1622	vif->ctrl.sring->req_event = req_cons + 1;
				1623	}
				1624	}
				1625
				1626	static bool xenvif_ctrl_work_todo(struct xenvif *vif)
				1627	{
				1628	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->ctrl)))
				1629	return true;
				1630
				1631	return false;
				1632	}
				1633
				1634	irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data)
				1635	{
				1636	struct xenvif *vif = data;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1637	unsigned int eoi_flag = XEN_EOI_FLAG_SPURIOUS;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1638
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1639	while (xenvif_ctrl_work_todo(vif)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1640	xenvif_ctrl_action(vif);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1641	eoi_flag = 0;
				1642	}
				1643
				1644	xen_irq_lateeoi(irq, eoi_flag);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1645
				1646	return IRQ_HANDLED;
				1647	}
				1648
				1649	static int __init netback_init(void)
				1650	{
				1651	int rc = 0;
				1652
				1653	if (!xen_domain())
				1654	return -ENODEV;
				1655
				1656	/* Allow as many queues as there are CPUs but max. 8 if user has not
				1657	* specified a value.
				1658	*/
				1659	if (xenvif_max_queues == 0)
				1660	xenvif_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
				1661	num_online_cpus());
				1662
				1663	if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
				1664	pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
				1665	fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
				1666	fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
				1667	}
				1668
				1669	rc = xenvif_xenbus_init();
				1670	if (rc)
				1671	goto failed_init;
				1672
				1673	#ifdef CONFIG_DEBUG_FS
				1674	xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1675	#endif /* CONFIG_DEBUG_FS */
				1676
				1677	return 0;
				1678
				1679	failed_init:
				1680	return rc;
				1681	}
				1682
				1683	module_init(netback_init);
				1684
				1685	static void __exit netback_fini(void)
				1686	{
				1687	#ifdef CONFIG_DEBUG_FS
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1688	debugfs_remove_recursive(xen_netback_dbg_root);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1689	#endif /* CONFIG_DEBUG_FS */
				1690	xenvif_xenbus_fini();
				1691	}
				1692	module_exit(netback_fini);
				1693
				1694	MODULE_LICENSE("Dual BSD/GPL");
				1695	MODULE_ALIAS("xen-backend:vif");