David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 2 | /**************************************************************************** |
| 3 | * Driver for Solarflare network controllers and boards |
| 4 | * Copyright 2005-2006 Fen Systems Ltd. |
| 5 | * Copyright 2005-2013 Solarflare Communications Inc. |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 6 | */ |
| 7 | |
| 8 | #include <linux/socket.h> |
| 9 | #include <linux/in.h> |
| 10 | #include <linux/slab.h> |
| 11 | #include <linux/ip.h> |
| 12 | #include <linux/ipv6.h> |
| 13 | #include <linux/tcp.h> |
| 14 | #include <linux/udp.h> |
| 15 | #include <linux/prefetch.h> |
| 16 | #include <linux/moduleparam.h> |
| 17 | #include <linux/iommu.h> |
| 18 | #include <net/ip.h> |
| 19 | #include <net/checksum.h> |
| 20 | #include "net_driver.h" |
| 21 | #include "efx.h" |
| 22 | #include "filter.h" |
| 23 | #include "nic.h" |
| 24 | #include "selftest.h" |
| 25 | #include "workarounds.h" |
| 26 | |
| 27 | /* Preferred number of descriptors to fill at once */ |
| 28 | #define EFX_RX_PREFERRED_BATCH 8U |
| 29 | |
| 30 | /* Number of RX buffers to recycle pages for. When creating the RX page recycle |
| 31 | * ring, this number is divided by the number of buffers per page to calculate |
| 32 | * the number of pages to store in the RX page recycle ring. |
| 33 | */ |
| 34 | #define EFX_RECYCLE_RING_SIZE_IOMMU 4096 |
| 35 | #define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH) |
| 36 | |
| 37 | /* Size of buffer allocated for skb header area. */ |
| 38 | #define EFX_SKB_HEADERS 128u |
| 39 | |
| 40 | /* This is the percentage fill level below which new RX descriptors |
| 41 | * will be added to the RX descriptor ring. |
| 42 | */ |
| 43 | static unsigned int rx_refill_threshold; |
| 44 | |
| 45 | /* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */ |
| 46 | #define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \ |
| 47 | EFX_RX_USR_BUF_SIZE) |
| 48 | |
| 49 | /* |
| 50 | * RX maximum head room required. |
| 51 | * |
| 52 | * This must be at least 1 to prevent overflow, plus one packet-worth |
| 53 | * to allow pipelined receives. |
| 54 | */ |
| 55 | #define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS) |
| 56 | |
| 57 | static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf) |
| 58 | { |
| 59 | return page_address(buf->page) + buf->page_offset; |
| 60 | } |
| 61 | |
| 62 | static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh) |
| 63 | { |
| 64 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) |
| 65 | return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset)); |
| 66 | #else |
| 67 | const u8 *data = eh + efx->rx_packet_hash_offset; |
| 68 | return (u32)data[0] | |
| 69 | (u32)data[1] << 8 | |
| 70 | (u32)data[2] << 16 | |
| 71 | (u32)data[3] << 24; |
| 72 | #endif |
| 73 | } |
| 74 | |
| 75 | static inline struct efx_rx_buffer * |
| 76 | efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf) |
| 77 | { |
| 78 | if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask))) |
| 79 | return efx_rx_buffer(rx_queue, 0); |
| 80 | else |
| 81 | return rx_buf + 1; |
| 82 | } |
| 83 | |
| 84 | static inline void efx_sync_rx_buffer(struct efx_nic *efx, |
| 85 | struct efx_rx_buffer *rx_buf, |
| 86 | unsigned int len) |
| 87 | { |
| 88 | dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, len, |
| 89 | DMA_FROM_DEVICE); |
| 90 | } |
| 91 | |
| 92 | void efx_rx_config_page_split(struct efx_nic *efx) |
| 93 | { |
| 94 | efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align, |
| 95 | EFX_RX_BUF_ALIGNMENT); |
| 96 | efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : |
| 97 | ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / |
| 98 | efx->rx_page_buf_step); |
| 99 | efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) / |
| 100 | efx->rx_bufs_per_page; |
| 101 | efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH, |
| 102 | efx->rx_bufs_per_page); |
| 103 | } |
| 104 | |
| 105 | /* Check the RX page recycle ring for a page that can be reused. */ |
| 106 | static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue) |
| 107 | { |
| 108 | struct efx_nic *efx = rx_queue->efx; |
| 109 | struct page *page; |
| 110 | struct efx_rx_page_state *state; |
| 111 | unsigned index; |
| 112 | |
| 113 | index = rx_queue->page_remove & rx_queue->page_ptr_mask; |
| 114 | page = rx_queue->page_ring[index]; |
| 115 | if (page == NULL) |
| 116 | return NULL; |
| 117 | |
| 118 | rx_queue->page_ring[index] = NULL; |
| 119 | /* page_remove cannot exceed page_add. */ |
| 120 | if (rx_queue->page_remove != rx_queue->page_add) |
| 121 | ++rx_queue->page_remove; |
| 122 | |
| 123 | /* If page_count is 1 then we hold the only reference to this page. */ |
| 124 | if (page_count(page) == 1) { |
| 125 | ++rx_queue->page_recycle_count; |
| 126 | return page; |
| 127 | } else { |
| 128 | state = page_address(page); |
| 129 | dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, |
| 130 | PAGE_SIZE << efx->rx_buffer_order, |
| 131 | DMA_FROM_DEVICE); |
| 132 | put_page(page); |
| 133 | ++rx_queue->page_recycle_failed; |
| 134 | } |
| 135 | |
| 136 | return NULL; |
| 137 | } |
| 138 | |
| 139 | /** |
| 140 | * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers |
| 141 | * |
| 142 | * @rx_queue: Efx RX queue |
| 143 | * |
| 144 | * This allocates a batch of pages, maps them for DMA, and populates |
| 145 | * struct efx_rx_buffers for each one. Return a negative error code or |
| 146 | * 0 on success. If a single page can be used for multiple buffers, |
| 147 | * then the page will either be inserted fully, or not at all. |
| 148 | */ |
| 149 | static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) |
| 150 | { |
| 151 | struct efx_nic *efx = rx_queue->efx; |
| 152 | struct efx_rx_buffer *rx_buf; |
| 153 | struct page *page; |
| 154 | unsigned int page_offset; |
| 155 | struct efx_rx_page_state *state; |
| 156 | dma_addr_t dma_addr; |
| 157 | unsigned index, count; |
| 158 | |
| 159 | count = 0; |
| 160 | do { |
| 161 | page = efx_reuse_page(rx_queue); |
| 162 | if (page == NULL) { |
| 163 | page = alloc_pages(__GFP_COMP | |
| 164 | (atomic ? GFP_ATOMIC : GFP_KERNEL), |
| 165 | efx->rx_buffer_order); |
| 166 | if (unlikely(page == NULL)) |
| 167 | return -ENOMEM; |
| 168 | dma_addr = |
| 169 | dma_map_page(&efx->pci_dev->dev, page, 0, |
| 170 | PAGE_SIZE << efx->rx_buffer_order, |
| 171 | DMA_FROM_DEVICE); |
| 172 | if (unlikely(dma_mapping_error(&efx->pci_dev->dev, |
| 173 | dma_addr))) { |
| 174 | __free_pages(page, efx->rx_buffer_order); |
| 175 | return -EIO; |
| 176 | } |
| 177 | state = page_address(page); |
| 178 | state->dma_addr = dma_addr; |
| 179 | } else { |
| 180 | state = page_address(page); |
| 181 | dma_addr = state->dma_addr; |
| 182 | } |
| 183 | |
| 184 | dma_addr += sizeof(struct efx_rx_page_state); |
| 185 | page_offset = sizeof(struct efx_rx_page_state); |
| 186 | |
| 187 | do { |
| 188 | index = rx_queue->added_count & rx_queue->ptr_mask; |
| 189 | rx_buf = efx_rx_buffer(rx_queue, index); |
| 190 | rx_buf->dma_addr = dma_addr + efx->rx_ip_align; |
| 191 | rx_buf->page = page; |
| 192 | rx_buf->page_offset = page_offset + efx->rx_ip_align; |
| 193 | rx_buf->len = efx->rx_dma_len; |
| 194 | rx_buf->flags = 0; |
| 195 | ++rx_queue->added_count; |
| 196 | get_page(page); |
| 197 | dma_addr += efx->rx_page_buf_step; |
| 198 | page_offset += efx->rx_page_buf_step; |
| 199 | } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE); |
| 200 | |
| 201 | rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE; |
| 202 | } while (++count < efx->rx_pages_per_batch); |
| 203 | |
| 204 | return 0; |
| 205 | } |
| 206 | |
| 207 | /* Unmap a DMA-mapped page. This function is only called for the final RX |
| 208 | * buffer in a page. |
| 209 | */ |
| 210 | static void efx_unmap_rx_buffer(struct efx_nic *efx, |
| 211 | struct efx_rx_buffer *rx_buf) |
| 212 | { |
| 213 | struct page *page = rx_buf->page; |
| 214 | |
| 215 | if (page) { |
| 216 | struct efx_rx_page_state *state = page_address(page); |
| 217 | dma_unmap_page(&efx->pci_dev->dev, |
| 218 | state->dma_addr, |
| 219 | PAGE_SIZE << efx->rx_buffer_order, |
| 220 | DMA_FROM_DEVICE); |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | static void efx_free_rx_buffers(struct efx_rx_queue *rx_queue, |
| 225 | struct efx_rx_buffer *rx_buf, |
| 226 | unsigned int num_bufs) |
| 227 | { |
| 228 | do { |
| 229 | if (rx_buf->page) { |
| 230 | put_page(rx_buf->page); |
| 231 | rx_buf->page = NULL; |
| 232 | } |
| 233 | rx_buf = efx_rx_buf_next(rx_queue, rx_buf); |
| 234 | } while (--num_bufs); |
| 235 | } |
| 236 | |
| 237 | /* Attempt to recycle the page if there is an RX recycle ring; the page can |
| 238 | * only be added if this is the final RX buffer, to prevent pages being used in |
| 239 | * the descriptor ring and appearing in the recycle ring simultaneously. |
| 240 | */ |
| 241 | static void efx_recycle_rx_page(struct efx_channel *channel, |
| 242 | struct efx_rx_buffer *rx_buf) |
| 243 | { |
| 244 | struct page *page = rx_buf->page; |
| 245 | struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); |
| 246 | struct efx_nic *efx = rx_queue->efx; |
| 247 | unsigned index; |
| 248 | |
| 249 | /* Only recycle the page after processing the final buffer. */ |
| 250 | if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE)) |
| 251 | return; |
| 252 | |
| 253 | index = rx_queue->page_add & rx_queue->page_ptr_mask; |
| 254 | if (rx_queue->page_ring[index] == NULL) { |
| 255 | unsigned read_index = rx_queue->page_remove & |
| 256 | rx_queue->page_ptr_mask; |
| 257 | |
| 258 | /* The next slot in the recycle ring is available, but |
| 259 | * increment page_remove if the read pointer currently |
| 260 | * points here. |
| 261 | */ |
| 262 | if (read_index == index) |
| 263 | ++rx_queue->page_remove; |
| 264 | rx_queue->page_ring[index] = page; |
| 265 | ++rx_queue->page_add; |
| 266 | return; |
| 267 | } |
| 268 | ++rx_queue->page_recycle_full; |
| 269 | efx_unmap_rx_buffer(efx, rx_buf); |
| 270 | put_page(rx_buf->page); |
| 271 | } |
| 272 | |
| 273 | static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, |
| 274 | struct efx_rx_buffer *rx_buf) |
| 275 | { |
| 276 | /* Release the page reference we hold for the buffer. */ |
| 277 | if (rx_buf->page) |
| 278 | put_page(rx_buf->page); |
| 279 | |
| 280 | /* If this is the last buffer in a page, unmap and free it. */ |
| 281 | if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) { |
| 282 | efx_unmap_rx_buffer(rx_queue->efx, rx_buf); |
| 283 | efx_free_rx_buffers(rx_queue, rx_buf, 1); |
| 284 | } |
| 285 | rx_buf->page = NULL; |
| 286 | } |
| 287 | |
| 288 | /* Recycle the pages that are used by buffers that have just been received. */ |
| 289 | static void efx_recycle_rx_pages(struct efx_channel *channel, |
| 290 | struct efx_rx_buffer *rx_buf, |
| 291 | unsigned int n_frags) |
| 292 | { |
| 293 | struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); |
| 294 | |
| 295 | do { |
| 296 | efx_recycle_rx_page(channel, rx_buf); |
| 297 | rx_buf = efx_rx_buf_next(rx_queue, rx_buf); |
| 298 | } while (--n_frags); |
| 299 | } |
| 300 | |
| 301 | static void efx_discard_rx_packet(struct efx_channel *channel, |
| 302 | struct efx_rx_buffer *rx_buf, |
| 303 | unsigned int n_frags) |
| 304 | { |
| 305 | struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); |
| 306 | |
| 307 | efx_recycle_rx_pages(channel, rx_buf, n_frags); |
| 308 | |
| 309 | efx_free_rx_buffers(rx_queue, rx_buf, n_frags); |
| 310 | } |
| 311 | |
| 312 | /** |
| 313 | * efx_fast_push_rx_descriptors - push new RX descriptors quickly |
| 314 | * @rx_queue: RX descriptor queue |
| 315 | * |
| 316 | * This will aim to fill the RX descriptor queue up to |
| 317 | * @rx_queue->@max_fill. If there is insufficient atomic |
| 318 | * memory to do so, a slow fill will be scheduled. |
| 319 | * |
| 320 | * The caller must provide serialisation (none is used here). In practise, |
| 321 | * this means this function must run from the NAPI handler, or be called |
| 322 | * when NAPI is disabled. |
| 323 | */ |
| 324 | void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic) |
| 325 | { |
| 326 | struct efx_nic *efx = rx_queue->efx; |
| 327 | unsigned int fill_level, batch_size; |
| 328 | int space, rc = 0; |
| 329 | |
| 330 | if (!rx_queue->refill_enabled) |
| 331 | return; |
| 332 | |
| 333 | /* Calculate current fill level, and exit if we don't need to fill */ |
| 334 | fill_level = (rx_queue->added_count - rx_queue->removed_count); |
| 335 | EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries); |
| 336 | if (fill_level >= rx_queue->fast_fill_trigger) |
| 337 | goto out; |
| 338 | |
| 339 | /* Record minimum fill level */ |
| 340 | if (unlikely(fill_level < rx_queue->min_fill)) { |
| 341 | if (fill_level) |
| 342 | rx_queue->min_fill = fill_level; |
| 343 | } |
| 344 | |
| 345 | batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page; |
| 346 | space = rx_queue->max_fill - fill_level; |
| 347 | EFX_WARN_ON_ONCE_PARANOID(space < batch_size); |
| 348 | |
| 349 | netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, |
| 350 | "RX queue %d fast-filling descriptor ring from" |
| 351 | " level %d to level %d\n", |
| 352 | efx_rx_queue_index(rx_queue), fill_level, |
| 353 | rx_queue->max_fill); |
| 354 | |
| 355 | |
| 356 | do { |
| 357 | rc = efx_init_rx_buffers(rx_queue, atomic); |
| 358 | if (unlikely(rc)) { |
| 359 | /* Ensure that we don't leave the rx queue empty */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 360 | efx_schedule_slow_fill(rx_queue); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 361 | goto out; |
| 362 | } |
| 363 | } while ((space -= batch_size) >= batch_size); |
| 364 | |
| 365 | netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, |
| 366 | "RX queue %d fast-filled descriptor ring " |
| 367 | "to level %d\n", efx_rx_queue_index(rx_queue), |
| 368 | rx_queue->added_count - rx_queue->removed_count); |
| 369 | |
| 370 | out: |
| 371 | if (rx_queue->notified_count != rx_queue->added_count) |
| 372 | efx_nic_notify_rx_desc(rx_queue); |
| 373 | } |
| 374 | |
| 375 | void efx_rx_slow_fill(struct timer_list *t) |
| 376 | { |
| 377 | struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill); |
| 378 | |
| 379 | /* Post an event to cause NAPI to run and refill the queue */ |
| 380 | efx_nic_generate_fill_event(rx_queue); |
| 381 | ++rx_queue->slow_fill_count; |
| 382 | } |
| 383 | |
| 384 | static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, |
| 385 | struct efx_rx_buffer *rx_buf, |
| 386 | int len) |
| 387 | { |
| 388 | struct efx_nic *efx = rx_queue->efx; |
| 389 | unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding; |
| 390 | |
| 391 | if (likely(len <= max_len)) |
| 392 | return; |
| 393 | |
| 394 | /* The packet must be discarded, but this is only a fatal error |
| 395 | * if the caller indicated it was |
| 396 | */ |
| 397 | rx_buf->flags |= EFX_RX_PKT_DISCARD; |
| 398 | |
| 399 | if (net_ratelimit()) |
| 400 | netif_err(efx, rx_err, efx->net_dev, |
| 401 | "RX queue %d overlength RX event (%#x > %#x)\n", |
| 402 | efx_rx_queue_index(rx_queue), len, max_len); |
| 403 | |
| 404 | efx_rx_queue_channel(rx_queue)->n_rx_overlength++; |
| 405 | } |
| 406 | |
| 407 | /* Pass a received packet up through GRO. GRO can handle pages |
| 408 | * regardless of checksum state and skbs with a good checksum. |
| 409 | */ |
| 410 | static void |
| 411 | efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, |
| 412 | unsigned int n_frags, u8 *eh) |
| 413 | { |
| 414 | struct napi_struct *napi = &channel->napi_str; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 415 | struct efx_nic *efx = channel->efx; |
| 416 | struct sk_buff *skb; |
| 417 | |
| 418 | skb = napi_get_frags(napi); |
| 419 | if (unlikely(!skb)) { |
| 420 | struct efx_rx_queue *rx_queue; |
| 421 | |
| 422 | rx_queue = efx_channel_get_rx_queue(channel); |
| 423 | efx_free_rx_buffers(rx_queue, rx_buf, n_frags); |
| 424 | return; |
| 425 | } |
| 426 | |
| 427 | if (efx->net_dev->features & NETIF_F_RXHASH) |
| 428 | skb_set_hash(skb, efx_rx_buf_hash(efx, eh), |
| 429 | PKT_HASH_TYPE_L3); |
| 430 | skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? |
| 431 | CHECKSUM_UNNECESSARY : CHECKSUM_NONE); |
| 432 | skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); |
| 433 | |
| 434 | for (;;) { |
| 435 | skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, |
| 436 | rx_buf->page, rx_buf->page_offset, |
| 437 | rx_buf->len); |
| 438 | rx_buf->page = NULL; |
| 439 | skb->len += rx_buf->len; |
| 440 | if (skb_shinfo(skb)->nr_frags == n_frags) |
| 441 | break; |
| 442 | |
| 443 | rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); |
| 444 | } |
| 445 | |
| 446 | skb->data_len = skb->len; |
| 447 | skb->truesize += n_frags * efx->rx_buffer_truesize; |
| 448 | |
| 449 | skb_record_rx_queue(skb, channel->rx_queue.core_index); |
| 450 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 451 | napi_gro_frags(napi); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 452 | } |
| 453 | |
| 454 | /* Allocate and construct an SKB around page fragments */ |
| 455 | static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel, |
| 456 | struct efx_rx_buffer *rx_buf, |
| 457 | unsigned int n_frags, |
| 458 | u8 *eh, int hdr_len) |
| 459 | { |
| 460 | struct efx_nic *efx = channel->efx; |
| 461 | struct sk_buff *skb; |
| 462 | |
| 463 | /* Allocate an SKB to store the headers */ |
| 464 | skb = netdev_alloc_skb(efx->net_dev, |
| 465 | efx->rx_ip_align + efx->rx_prefix_size + |
| 466 | hdr_len); |
| 467 | if (unlikely(skb == NULL)) { |
| 468 | atomic_inc(&efx->n_rx_noskb_drops); |
| 469 | return NULL; |
| 470 | } |
| 471 | |
| 472 | EFX_WARN_ON_ONCE_PARANOID(rx_buf->len < hdr_len); |
| 473 | |
| 474 | memcpy(skb->data + efx->rx_ip_align, eh - efx->rx_prefix_size, |
| 475 | efx->rx_prefix_size + hdr_len); |
| 476 | skb_reserve(skb, efx->rx_ip_align + efx->rx_prefix_size); |
| 477 | __skb_put(skb, hdr_len); |
| 478 | |
| 479 | /* Append the remaining page(s) onto the frag list */ |
| 480 | if (rx_buf->len > hdr_len) { |
| 481 | rx_buf->page_offset += hdr_len; |
| 482 | rx_buf->len -= hdr_len; |
| 483 | |
| 484 | for (;;) { |
| 485 | skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, |
| 486 | rx_buf->page, rx_buf->page_offset, |
| 487 | rx_buf->len); |
| 488 | rx_buf->page = NULL; |
| 489 | skb->len += rx_buf->len; |
| 490 | skb->data_len += rx_buf->len; |
| 491 | if (skb_shinfo(skb)->nr_frags == n_frags) |
| 492 | break; |
| 493 | |
| 494 | rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); |
| 495 | } |
| 496 | } else { |
| 497 | __free_pages(rx_buf->page, efx->rx_buffer_order); |
| 498 | rx_buf->page = NULL; |
| 499 | n_frags = 0; |
| 500 | } |
| 501 | |
| 502 | skb->truesize += n_frags * efx->rx_buffer_truesize; |
| 503 | |
| 504 | /* Move past the ethernet header */ |
| 505 | skb->protocol = eth_type_trans(skb, efx->net_dev); |
| 506 | |
| 507 | skb_mark_napi_id(skb, &channel->napi_str); |
| 508 | |
| 509 | return skb; |
| 510 | } |
| 511 | |
| 512 | void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, |
| 513 | unsigned int n_frags, unsigned int len, u16 flags) |
| 514 | { |
| 515 | struct efx_nic *efx = rx_queue->efx; |
| 516 | struct efx_channel *channel = efx_rx_queue_channel(rx_queue); |
| 517 | struct efx_rx_buffer *rx_buf; |
| 518 | |
| 519 | rx_queue->rx_packets++; |
| 520 | |
| 521 | rx_buf = efx_rx_buffer(rx_queue, index); |
| 522 | rx_buf->flags |= flags; |
| 523 | |
| 524 | /* Validate the number of fragments and completed length */ |
| 525 | if (n_frags == 1) { |
| 526 | if (!(flags & EFX_RX_PKT_PREFIX_LEN)) |
| 527 | efx_rx_packet__check_len(rx_queue, rx_buf, len); |
| 528 | } else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) || |
| 529 | unlikely(len <= (n_frags - 1) * efx->rx_dma_len) || |
| 530 | unlikely(len > n_frags * efx->rx_dma_len) || |
| 531 | unlikely(!efx->rx_scatter)) { |
| 532 | /* If this isn't an explicit discard request, either |
| 533 | * the hardware or the driver is broken. |
| 534 | */ |
| 535 | WARN_ON(!(len == 0 && rx_buf->flags & EFX_RX_PKT_DISCARD)); |
| 536 | rx_buf->flags |= EFX_RX_PKT_DISCARD; |
| 537 | } |
| 538 | |
| 539 | netif_vdbg(efx, rx_status, efx->net_dev, |
| 540 | "RX queue %d received ids %x-%x len %d %s%s\n", |
| 541 | efx_rx_queue_index(rx_queue), index, |
| 542 | (index + n_frags - 1) & rx_queue->ptr_mask, len, |
| 543 | (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "", |
| 544 | (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : ""); |
| 545 | |
| 546 | /* Discard packet, if instructed to do so. Process the |
| 547 | * previous receive first. |
| 548 | */ |
| 549 | if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) { |
| 550 | efx_rx_flush_packet(channel); |
| 551 | efx_discard_rx_packet(channel, rx_buf, n_frags); |
| 552 | return; |
| 553 | } |
| 554 | |
| 555 | if (n_frags == 1 && !(flags & EFX_RX_PKT_PREFIX_LEN)) |
| 556 | rx_buf->len = len; |
| 557 | |
| 558 | /* Release and/or sync the DMA mapping - assumes all RX buffers |
| 559 | * consumed in-order per RX queue. |
| 560 | */ |
| 561 | efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); |
| 562 | |
| 563 | /* Prefetch nice and early so data will (hopefully) be in cache by |
| 564 | * the time we look at it. |
| 565 | */ |
| 566 | prefetch(efx_rx_buf_va(rx_buf)); |
| 567 | |
| 568 | rx_buf->page_offset += efx->rx_prefix_size; |
| 569 | rx_buf->len -= efx->rx_prefix_size; |
| 570 | |
| 571 | if (n_frags > 1) { |
| 572 | /* Release/sync DMA mapping for additional fragments. |
| 573 | * Fix length for last fragment. |
| 574 | */ |
| 575 | unsigned int tail_frags = n_frags - 1; |
| 576 | |
| 577 | for (;;) { |
| 578 | rx_buf = efx_rx_buf_next(rx_queue, rx_buf); |
| 579 | if (--tail_frags == 0) |
| 580 | break; |
| 581 | efx_sync_rx_buffer(efx, rx_buf, efx->rx_dma_len); |
| 582 | } |
| 583 | rx_buf->len = len - (n_frags - 1) * efx->rx_dma_len; |
| 584 | efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); |
| 585 | } |
| 586 | |
| 587 | /* All fragments have been DMA-synced, so recycle pages. */ |
| 588 | rx_buf = efx_rx_buffer(rx_queue, index); |
| 589 | efx_recycle_rx_pages(channel, rx_buf, n_frags); |
| 590 | |
| 591 | /* Pipeline receives so that we give time for packet headers to be |
| 592 | * prefetched into cache. |
| 593 | */ |
| 594 | efx_rx_flush_packet(channel); |
| 595 | channel->rx_pkt_n_frags = n_frags; |
| 596 | channel->rx_pkt_index = index; |
| 597 | } |
| 598 | |
| 599 | static void efx_rx_deliver(struct efx_channel *channel, u8 *eh, |
| 600 | struct efx_rx_buffer *rx_buf, |
| 601 | unsigned int n_frags) |
| 602 | { |
| 603 | struct sk_buff *skb; |
| 604 | u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS); |
| 605 | |
| 606 | skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len); |
| 607 | if (unlikely(skb == NULL)) { |
| 608 | struct efx_rx_queue *rx_queue; |
| 609 | |
| 610 | rx_queue = efx_channel_get_rx_queue(channel); |
| 611 | efx_free_rx_buffers(rx_queue, rx_buf, n_frags); |
| 612 | return; |
| 613 | } |
| 614 | skb_record_rx_queue(skb, channel->rx_queue.core_index); |
| 615 | |
| 616 | /* Set the SKB flags */ |
| 617 | skb_checksum_none_assert(skb); |
| 618 | if (likely(rx_buf->flags & EFX_RX_PKT_CSUMMED)) { |
| 619 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
| 620 | skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); |
| 621 | } |
| 622 | |
| 623 | efx_rx_skb_attach_timestamp(channel, skb); |
| 624 | |
| 625 | if (channel->type->receive_skb) |
| 626 | if (channel->type->receive_skb(channel, skb)) |
| 627 | return; |
| 628 | |
| 629 | /* Pass the packet up */ |
| 630 | if (channel->rx_list != NULL) |
| 631 | /* Add to list, will pass up later */ |
| 632 | list_add_tail(&skb->list, channel->rx_list); |
| 633 | else |
| 634 | /* No list, so pass it up now */ |
| 635 | netif_receive_skb(skb); |
| 636 | } |
| 637 | |
| 638 | /* Handle a received packet. Second half: Touches packet payload. */ |
| 639 | void __efx_rx_packet(struct efx_channel *channel) |
| 640 | { |
| 641 | struct efx_nic *efx = channel->efx; |
| 642 | struct efx_rx_buffer *rx_buf = |
| 643 | efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index); |
| 644 | u8 *eh = efx_rx_buf_va(rx_buf); |
| 645 | |
| 646 | /* Read length from the prefix if necessary. This already |
| 647 | * excludes the length of the prefix itself. |
| 648 | */ |
| 649 | if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN) |
| 650 | rx_buf->len = le16_to_cpup((__le16 *) |
| 651 | (eh + efx->rx_packet_len_offset)); |
| 652 | |
| 653 | /* If we're in loopback test, then pass the packet directly to the |
| 654 | * loopback layer, and free the rx_buf here |
| 655 | */ |
| 656 | if (unlikely(efx->loopback_selftest)) { |
| 657 | struct efx_rx_queue *rx_queue; |
| 658 | |
| 659 | efx_loopback_rx_packet(efx, eh, rx_buf->len); |
| 660 | rx_queue = efx_channel_get_rx_queue(channel); |
| 661 | efx_free_rx_buffers(rx_queue, rx_buf, |
| 662 | channel->rx_pkt_n_frags); |
| 663 | goto out; |
| 664 | } |
| 665 | |
| 666 | if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) |
| 667 | rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; |
| 668 | |
| 669 | if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb) |
| 670 | efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh); |
| 671 | else |
| 672 | efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags); |
| 673 | out: |
| 674 | channel->rx_pkt_n_frags = 0; |
| 675 | } |
| 676 | |
| 677 | int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) |
| 678 | { |
| 679 | struct efx_nic *efx = rx_queue->efx; |
| 680 | unsigned int entries; |
| 681 | int rc; |
| 682 | |
| 683 | /* Create the smallest power-of-two aligned ring */ |
| 684 | entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE); |
| 685 | EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); |
| 686 | rx_queue->ptr_mask = entries - 1; |
| 687 | |
| 688 | netif_dbg(efx, probe, efx->net_dev, |
| 689 | "creating RX queue %d size %#x mask %#x\n", |
| 690 | efx_rx_queue_index(rx_queue), efx->rxq_entries, |
| 691 | rx_queue->ptr_mask); |
| 692 | |
| 693 | /* Allocate RX buffers */ |
| 694 | rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer), |
| 695 | GFP_KERNEL); |
| 696 | if (!rx_queue->buffer) |
| 697 | return -ENOMEM; |
| 698 | |
| 699 | rc = efx_nic_probe_rx(rx_queue); |
| 700 | if (rc) { |
| 701 | kfree(rx_queue->buffer); |
| 702 | rx_queue->buffer = NULL; |
| 703 | } |
| 704 | |
| 705 | return rc; |
| 706 | } |
| 707 | |
| 708 | static void efx_init_rx_recycle_ring(struct efx_nic *efx, |
| 709 | struct efx_rx_queue *rx_queue) |
| 710 | { |
| 711 | unsigned int bufs_in_recycle_ring, page_ring_size; |
| 712 | |
| 713 | /* Set the RX recycle ring size */ |
| 714 | #ifdef CONFIG_PPC64 |
| 715 | bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; |
| 716 | #else |
| 717 | if (iommu_present(&pci_bus_type)) |
| 718 | bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; |
| 719 | else |
| 720 | bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU; |
| 721 | #endif /* CONFIG_PPC64 */ |
| 722 | |
| 723 | page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring / |
| 724 | efx->rx_bufs_per_page); |
| 725 | rx_queue->page_ring = kcalloc(page_ring_size, |
| 726 | sizeof(*rx_queue->page_ring), GFP_KERNEL); |
| 727 | rx_queue->page_ptr_mask = page_ring_size - 1; |
| 728 | } |
| 729 | |
| 730 | void efx_init_rx_queue(struct efx_rx_queue *rx_queue) |
| 731 | { |
| 732 | struct efx_nic *efx = rx_queue->efx; |
| 733 | unsigned int max_fill, trigger, max_trigger; |
| 734 | |
| 735 | netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, |
| 736 | "initialising RX queue %d\n", efx_rx_queue_index(rx_queue)); |
| 737 | |
| 738 | /* Initialise ptr fields */ |
| 739 | rx_queue->added_count = 0; |
| 740 | rx_queue->notified_count = 0; |
| 741 | rx_queue->removed_count = 0; |
| 742 | rx_queue->min_fill = -1U; |
| 743 | efx_init_rx_recycle_ring(efx, rx_queue); |
| 744 | |
| 745 | rx_queue->page_remove = 0; |
| 746 | rx_queue->page_add = rx_queue->page_ptr_mask + 1; |
| 747 | rx_queue->page_recycle_count = 0; |
| 748 | rx_queue->page_recycle_failed = 0; |
| 749 | rx_queue->page_recycle_full = 0; |
| 750 | |
| 751 | /* Initialise limit fields */ |
| 752 | max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM; |
| 753 | max_trigger = |
| 754 | max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page; |
| 755 | if (rx_refill_threshold != 0) { |
| 756 | trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; |
| 757 | if (trigger > max_trigger) |
| 758 | trigger = max_trigger; |
| 759 | } else { |
| 760 | trigger = max_trigger; |
| 761 | } |
| 762 | |
| 763 | rx_queue->max_fill = max_fill; |
| 764 | rx_queue->fast_fill_trigger = trigger; |
| 765 | rx_queue->refill_enabled = true; |
| 766 | |
| 767 | /* Set up RX descriptor ring */ |
| 768 | efx_nic_init_rx(rx_queue); |
| 769 | } |
| 770 | |
| 771 | void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) |
| 772 | { |
| 773 | int i; |
| 774 | struct efx_nic *efx = rx_queue->efx; |
| 775 | struct efx_rx_buffer *rx_buf; |
| 776 | |
| 777 | netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, |
| 778 | "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue)); |
| 779 | |
| 780 | del_timer_sync(&rx_queue->slow_fill); |
| 781 | |
| 782 | /* Release RX buffers from the current read ptr to the write ptr */ |
| 783 | if (rx_queue->buffer) { |
| 784 | for (i = rx_queue->removed_count; i < rx_queue->added_count; |
| 785 | i++) { |
| 786 | unsigned index = i & rx_queue->ptr_mask; |
| 787 | rx_buf = efx_rx_buffer(rx_queue, index); |
| 788 | efx_fini_rx_buffer(rx_queue, rx_buf); |
| 789 | } |
| 790 | } |
| 791 | |
| 792 | /* Unmap and release the pages in the recycle ring. Remove the ring. */ |
| 793 | for (i = 0; i <= rx_queue->page_ptr_mask; i++) { |
| 794 | struct page *page = rx_queue->page_ring[i]; |
| 795 | struct efx_rx_page_state *state; |
| 796 | |
| 797 | if (page == NULL) |
| 798 | continue; |
| 799 | |
| 800 | state = page_address(page); |
| 801 | dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, |
| 802 | PAGE_SIZE << efx->rx_buffer_order, |
| 803 | DMA_FROM_DEVICE); |
| 804 | put_page(page); |
| 805 | } |
| 806 | kfree(rx_queue->page_ring); |
| 807 | rx_queue->page_ring = NULL; |
| 808 | } |
| 809 | |
| 810 | void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) |
| 811 | { |
| 812 | netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, |
| 813 | "destroying RX queue %d\n", efx_rx_queue_index(rx_queue)); |
| 814 | |
| 815 | efx_nic_remove_rx(rx_queue); |
| 816 | |
| 817 | kfree(rx_queue->buffer); |
| 818 | rx_queue->buffer = NULL; |
| 819 | } |
| 820 | |
| 821 | |
| 822 | module_param(rx_refill_threshold, uint, 0444); |
| 823 | MODULE_PARM_DESC(rx_refill_threshold, |
| 824 | "RX descriptor ring refill threshold (%)"); |
| 825 | |
| 826 | #ifdef CONFIG_RFS_ACCEL |
| 827 | |
| 828 | static void efx_filter_rfs_work(struct work_struct *data) |
| 829 | { |
| 830 | struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion, |
| 831 | work); |
| 832 | struct efx_nic *efx = netdev_priv(req->net_dev); |
| 833 | struct efx_channel *channel = efx_get_channel(efx, req->rxq_index); |
| 834 | int slot_idx = req - efx->rps_slot; |
| 835 | struct efx_arfs_rule *rule; |
| 836 | u16 arfs_id = 0; |
| 837 | int rc; |
| 838 | |
| 839 | rc = efx->type->filter_insert(efx, &req->spec, true); |
| 840 | if (rc >= 0) |
| 841 | rc %= efx->type->max_rx_ip_filters; |
| 842 | if (efx->rps_hash_table) { |
| 843 | spin_lock_bh(&efx->rps_hash_lock); |
| 844 | rule = efx_rps_hash_find(efx, &req->spec); |
| 845 | /* The rule might have already gone, if someone else's request |
| 846 | * for the same spec was already worked and then expired before |
| 847 | * we got around to our work. In that case we have nothing |
| 848 | * tying us to an arfs_id, meaning that as soon as the filter |
| 849 | * is considered for expiry it will be removed. |
| 850 | */ |
| 851 | if (rule) { |
| 852 | if (rc < 0) |
| 853 | rule->filter_id = EFX_ARFS_FILTER_ID_ERROR; |
| 854 | else |
| 855 | rule->filter_id = rc; |
| 856 | arfs_id = rule->arfs_id; |
| 857 | } |
| 858 | spin_unlock_bh(&efx->rps_hash_lock); |
| 859 | } |
| 860 | if (rc >= 0) { |
| 861 | /* Remember this so we can check whether to expire the filter |
| 862 | * later. |
| 863 | */ |
| 864 | mutex_lock(&efx->rps_mutex); |
| 865 | channel->rps_flow_id[rc] = req->flow_id; |
| 866 | ++channel->rfs_filters_added; |
| 867 | mutex_unlock(&efx->rps_mutex); |
| 868 | |
| 869 | if (req->spec.ether_type == htons(ETH_P_IP)) |
| 870 | netif_info(efx, rx_status, efx->net_dev, |
| 871 | "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n", |
| 872 | (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", |
| 873 | req->spec.rem_host, ntohs(req->spec.rem_port), |
| 874 | req->spec.loc_host, ntohs(req->spec.loc_port), |
| 875 | req->rxq_index, req->flow_id, rc, arfs_id); |
| 876 | else |
| 877 | netif_info(efx, rx_status, efx->net_dev, |
| 878 | "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n", |
| 879 | (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", |
| 880 | req->spec.rem_host, ntohs(req->spec.rem_port), |
| 881 | req->spec.loc_host, ntohs(req->spec.loc_port), |
| 882 | req->rxq_index, req->flow_id, rc, arfs_id); |
| 883 | } |
| 884 | |
| 885 | /* Release references */ |
| 886 | clear_bit(slot_idx, &efx->rps_slot_map); |
| 887 | dev_put(req->net_dev); |
| 888 | } |
| 889 | |
| 890 | int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, |
| 891 | u16 rxq_index, u32 flow_id) |
| 892 | { |
| 893 | struct efx_nic *efx = netdev_priv(net_dev); |
| 894 | struct efx_async_filter_insertion *req; |
| 895 | struct efx_arfs_rule *rule; |
| 896 | struct flow_keys fk; |
| 897 | int slot_idx; |
| 898 | bool new; |
| 899 | int rc; |
| 900 | |
| 901 | /* find a free slot */ |
| 902 | for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++) |
| 903 | if (!test_and_set_bit(slot_idx, &efx->rps_slot_map)) |
| 904 | break; |
| 905 | if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT) |
| 906 | return -EBUSY; |
| 907 | |
| 908 | if (flow_id == RPS_FLOW_ID_INVALID) { |
| 909 | rc = -EINVAL; |
| 910 | goto out_clear; |
| 911 | } |
| 912 | |
| 913 | if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) { |
| 914 | rc = -EPROTONOSUPPORT; |
| 915 | goto out_clear; |
| 916 | } |
| 917 | |
| 918 | if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) { |
| 919 | rc = -EPROTONOSUPPORT; |
| 920 | goto out_clear; |
| 921 | } |
| 922 | if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) { |
| 923 | rc = -EPROTONOSUPPORT; |
| 924 | goto out_clear; |
| 925 | } |
| 926 | |
| 927 | req = efx->rps_slot + slot_idx; |
| 928 | efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT, |
| 929 | efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0, |
| 930 | rxq_index); |
| 931 | req->spec.match_flags = |
| 932 | EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO | |
| 933 | EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT | |
| 934 | EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT; |
| 935 | req->spec.ether_type = fk.basic.n_proto; |
| 936 | req->spec.ip_proto = fk.basic.ip_proto; |
| 937 | |
| 938 | if (fk.basic.n_proto == htons(ETH_P_IP)) { |
| 939 | req->spec.rem_host[0] = fk.addrs.v4addrs.src; |
| 940 | req->spec.loc_host[0] = fk.addrs.v4addrs.dst; |
| 941 | } else { |
| 942 | memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src, |
| 943 | sizeof(struct in6_addr)); |
| 944 | memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst, |
| 945 | sizeof(struct in6_addr)); |
| 946 | } |
| 947 | |
| 948 | req->spec.rem_port = fk.ports.src; |
| 949 | req->spec.loc_port = fk.ports.dst; |
| 950 | |
| 951 | if (efx->rps_hash_table) { |
| 952 | /* Add it to ARFS hash table */ |
| 953 | spin_lock(&efx->rps_hash_lock); |
| 954 | rule = efx_rps_hash_add(efx, &req->spec, &new); |
| 955 | if (!rule) { |
| 956 | rc = -ENOMEM; |
| 957 | goto out_unlock; |
| 958 | } |
| 959 | if (new) |
| 960 | rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER; |
| 961 | rc = rule->arfs_id; |
| 962 | /* Skip if existing or pending filter already does the right thing */ |
| 963 | if (!new && rule->rxq_index == rxq_index && |
| 964 | rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING) |
| 965 | goto out_unlock; |
| 966 | rule->rxq_index = rxq_index; |
| 967 | rule->filter_id = EFX_ARFS_FILTER_ID_PENDING; |
| 968 | spin_unlock(&efx->rps_hash_lock); |
| 969 | } else { |
| 970 | /* Without an ARFS hash table, we just use arfs_id 0 for all |
| 971 | * filters. This means if multiple flows hash to the same |
| 972 | * flow_id, all but the most recently touched will be eligible |
| 973 | * for expiry. |
| 974 | */ |
| 975 | rc = 0; |
| 976 | } |
| 977 | |
| 978 | /* Queue the request */ |
| 979 | dev_hold(req->net_dev = net_dev); |
| 980 | INIT_WORK(&req->work, efx_filter_rfs_work); |
| 981 | req->rxq_index = rxq_index; |
| 982 | req->flow_id = flow_id; |
| 983 | schedule_work(&req->work); |
| 984 | return rc; |
| 985 | out_unlock: |
| 986 | spin_unlock(&efx->rps_hash_lock); |
| 987 | out_clear: |
| 988 | clear_bit(slot_idx, &efx->rps_slot_map); |
| 989 | return rc; |
| 990 | } |
| 991 | |
| 992 | bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota) |
| 993 | { |
| 994 | bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index); |
| 995 | unsigned int channel_idx, index, size; |
| 996 | u32 flow_id; |
| 997 | |
| 998 | if (!mutex_trylock(&efx->rps_mutex)) |
| 999 | return false; |
| 1000 | expire_one = efx->type->filter_rfs_expire_one; |
| 1001 | channel_idx = efx->rps_expire_channel; |
| 1002 | index = efx->rps_expire_index; |
| 1003 | size = efx->type->max_rx_ip_filters; |
| 1004 | while (quota--) { |
| 1005 | struct efx_channel *channel = efx_get_channel(efx, channel_idx); |
| 1006 | flow_id = channel->rps_flow_id[index]; |
| 1007 | |
| 1008 | if (flow_id != RPS_FLOW_ID_INVALID && |
| 1009 | expire_one(efx, flow_id, index)) { |
| 1010 | netif_info(efx, rx_status, efx->net_dev, |
| 1011 | "expired filter %d [queue %u flow %u]\n", |
| 1012 | index, channel_idx, flow_id); |
| 1013 | channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID; |
| 1014 | } |
| 1015 | if (++index == size) { |
| 1016 | if (++channel_idx == efx->n_channels) |
| 1017 | channel_idx = 0; |
| 1018 | index = 0; |
| 1019 | } |
| 1020 | } |
| 1021 | efx->rps_expire_channel = channel_idx; |
| 1022 | efx->rps_expire_index = index; |
| 1023 | |
| 1024 | mutex_unlock(&efx->rps_mutex); |
| 1025 | return true; |
| 1026 | } |
| 1027 | |
| 1028 | #endif /* CONFIG_RFS_ACCEL */ |
| 1029 | |
| 1030 | /** |
| 1031 | * efx_filter_is_mc_recipient - test whether spec is a multicast recipient |
| 1032 | * @spec: Specification to test |
| 1033 | * |
| 1034 | * Return: %true if the specification is a non-drop RX filter that |
| 1035 | * matches a local MAC address I/G bit value of 1 or matches a local |
| 1036 | * IPv4 or IPv6 address value in the respective multicast address |
| 1037 | * range. Otherwise %false. |
| 1038 | */ |
| 1039 | bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec) |
| 1040 | { |
| 1041 | if (!(spec->flags & EFX_FILTER_FLAG_RX) || |
| 1042 | spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP) |
| 1043 | return false; |
| 1044 | |
| 1045 | if (spec->match_flags & |
| 1046 | (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) && |
| 1047 | is_multicast_ether_addr(spec->loc_mac)) |
| 1048 | return true; |
| 1049 | |
| 1050 | if ((spec->match_flags & |
| 1051 | (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) == |
| 1052 | (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) { |
| 1053 | if (spec->ether_type == htons(ETH_P_IP) && |
| 1054 | ipv4_is_multicast(spec->loc_host[0])) |
| 1055 | return true; |
| 1056 | if (spec->ether_type == htons(ETH_P_IPV6) && |
| 1057 | ((const u8 *)spec->loc_host)[0] == 0xff) |
| 1058 | return true; |
| 1059 | } |
| 1060 | |
| 1061 | return false; |
| 1062 | } |