1 /****************************************************************************
2 * Solarflare driver for Xen network acceleration
4 * Copyright 2006-2008: Solarflare Communications Inc,
5 * 9501 Jeronimo Road, Suite 250,
6 * Irvine, CA 92618, USA
8 * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License version 2 as published
12 * by the Free Software Foundation, incorporated herein by reference.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 ****************************************************************************
25 #include <linux/if_ether.h>
27 #include <net/checksum.h>
31 #include "accel_util.h"
32 #include "accel_bufs.h"
33 #include "accel_tso.h"
34 #include "accel_ssr.h"
37 #include "etherfabric/ef_vi.h"
40 * Max available space in a buffer for data once meta-data has taken
43 #define NETFRONT_ACCEL_TX_BUF_LENGTH \
44 ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) \
45 - sizeof(struct netfront_accel_tso_buffer))
47 #define ACCEL_TX_MAX_BUFFERS (6)
48 #define ACCEL_VI_POLL_EVENTS (8)
51 int netfront_accel_vi_init_fini(netfront_accel_vnic *vnic,
52 struct net_accel_msg_hw *hw_msg)
54 struct ef_vi_nic_type nic_type;
55 struct net_accel_hw_falcon_b *hw_info;
56 void *io_kva, *evq_base, *rx_dma_kva, *tx_dma_kva, *doorbell_kva;
60 u8 vi_data[VI_MAPPINGS_SIZE];
65 /* And create the local macs table lock */
66 spin_lock_init(&vnic->table_lock);
68 /* Create fastpath table, initial size 8, key length 8 */
69 if (cuckoo_hash_init(&vnic->fastpath_table, 3, 8)) {
70 EPRINTK("failed to allocate fastpath table\n");
74 vnic->hw.falcon.type = hw_msg->type;
76 switch (hw_msg->type) {
77 case NET_ACCEL_MSG_HWTYPE_FALCON_A:
78 hw_info = &hw_msg->resources.falcon_a.common;
79 /* Need the extra rptr register page on A1 */
80 io_kva = net_accel_map_iomem_page
81 (vnic->dev, hw_msg->resources.falcon_a.evq_rptr_gnt,
82 &vnic->hw.falcon.evq_rptr_mapping);
84 EPRINTK("%s: evq_rptr permission failed\n", __FUNCTION__);
88 vnic->hw.falcon.evq_rptr = io_kva +
89 (hw_info->evq_rptr & (PAGE_SIZE - 1));
91 case NET_ACCEL_MSG_HWTYPE_FALCON_B:
92 case NET_ACCEL_MSG_HWTYPE_SIENA_A:
93 hw_info = &hw_msg->resources.falcon_b;
99 /**** Event Queue ****/
101 /* Map the event queue pages */
102 evq_gnts = hw_info->evq_mem_gnts;
103 evq_order = hw_info->evq_order;
105 EPRINTK_ON(hw_info->evq_offs != 0);
107 DPRINTK("Will map evq %d pages\n", 1 << evq_order);
110 net_accel_map_grants_contig(vnic->dev, evq_gnts, 1 << evq_order,
112 if (evq_base == NULL) {
113 EPRINTK("%s: evq_base failed\n", __FUNCTION__);
117 /**** Doorbells ****/
118 /* Set up the doorbell mappings. */
120 net_accel_map_iomem_page(vnic->dev, hw_info->doorbell_gnt,
121 &vnic->hw.falcon.doorbell_mapping);
122 if (doorbell_kva == NULL) {
123 EPRINTK("%s: doorbell permission failed\n", __FUNCTION__);
126 vnic->hw.falcon.doorbell = doorbell_kva;
128 /* On Falcon_B and Siena we get the rptr from the doorbell page */
129 if (hw_msg->type == NET_ACCEL_MSG_HWTYPE_FALCON_B ||
130 hw_msg->type == NET_ACCEL_MSG_HWTYPE_SIENA_A) {
131 vnic->hw.falcon.evq_rptr =
132 (u32 *)((char *)vnic->hw.falcon.doorbell
133 + hw_info->evq_rptr);
136 /**** DMA Queue ****/
138 /* Set up the DMA Queues from the message. */
139 tx_dma_kva = net_accel_map_grants_contig
140 (vnic->dev, &(hw_info->txdmaq_gnt), 1,
141 &vnic->hw.falcon.txdmaq_mapping);
142 if (tx_dma_kva == NULL) {
143 EPRINTK("%s: TX dma failed\n", __FUNCTION__);
147 rx_dma_kva = net_accel_map_grants_contig
148 (vnic->dev, &(hw_info->rxdmaq_gnt), 1,
149 &vnic->hw.falcon.rxdmaq_mapping);
150 if (rx_dma_kva == NULL) {
151 EPRINTK("%s: RX dma failed\n", __FUNCTION__);
155 /* Full confession */
157 " Tx DMAQ grant %x -> %p\n"
158 " Rx DMAQ grant %x -> %p\n"
159 " EVQ grant %x -> %p\n",
160 hw_info->txdmaq_gnt, tx_dma_kva,
161 hw_info->rxdmaq_gnt, rx_dma_kva,
162 evq_gnts[0], evq_base
165 memset(vi_data, 0, sizeof(vi_data));
167 /* TODO BUG11305: convert efhw_arch to ef_vi_arch
169 * arch = ef_vi_arch_from_efhw_arch(hw_info->nic_arch);
171 * nic_type.arch = arch;
173 nic_type.arch = (unsigned char)hw_info->nic_arch;
174 nic_type.variant = (char)hw_info->nic_variant;
175 nic_type.revision = (unsigned char)hw_info->nic_revision;
177 ef_vi_init_mapping_evq(vi_data, nic_type, hw_info->instance,
178 1 << (evq_order + PAGE_SHIFT), evq_base,
181 ef_vi_init_mapping_vi(vi_data, nic_type, hw_info->rx_capacity,
182 hw_info->tx_capacity, hw_info->instance,
183 doorbell_kva, rx_dma_kva, tx_dma_kva, 0);
185 vi_state_size = ef_vi_calc_state_bytes(hw_info->rx_capacity,
186 hw_info->tx_capacity);
187 vnic->vi_state = (ef_vi_state *)kmalloc(vi_state_size, GFP_KERNEL);
188 if (vnic->vi_state == NULL) {
189 EPRINTK("%s: kmalloc for VI state failed\n", __FUNCTION__);
192 ef_vi_init(&vnic->vi, vi_data, vnic->vi_state, &vnic->evq_state, 0);
194 ef_eventq_state_init(&vnic->vi);
196 ef_vi_state_init(&vnic->vi);
201 kfree(vnic->vi_state);
202 vnic->vi_state = NULL;
204 net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.rxdmaq_mapping);
206 net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.txdmaq_mapping);
208 net_accel_unmap_iomem_page(vnic->dev, vnic->hw.falcon.doorbell_mapping);
209 vnic->hw.falcon.doorbell = NULL;
211 net_accel_unmap_grants_contig(vnic->dev, vnic->evq_mapping);
213 if (vnic->hw.falcon.type == NET_ACCEL_MSG_HWTYPE_FALCON_A)
214 net_accel_unmap_iomem_page(vnic->dev,
215 vnic->hw.falcon.evq_rptr_mapping);
216 vnic->hw.falcon.evq_rptr = NULL;
219 cuckoo_hash_destroy(&vnic->fastpath_table);
225 void netfront_accel_vi_ctor(netfront_accel_vnic *vnic)
227 /* Just mark the VI as uninitialised. */
228 vnic->vi_state = NULL;
232 int netfront_accel_vi_init(netfront_accel_vnic *vnic, struct net_accel_msg_hw *hw_msg)
234 BUG_ON(hw_msg == NULL);
235 return netfront_accel_vi_init_fini(vnic, hw_msg);
239 void netfront_accel_vi_dtor(netfront_accel_vnic *vnic)
241 if (vnic->vi_state != NULL)
242 netfront_accel_vi_init_fini(vnic, NULL);
247 void netfront_accel_vi_post_rx(netfront_accel_vnic *vnic, u16 id,
248 netfront_accel_pkt_desc *buf)
251 int idx = vnic->rx_dma_batched;
254 VPRINTK("Posting buffer %d (0x%08x) for rx at index %d, space is %d\n",
255 id, buf->pkt_buff_addr, idx, ef_vi_receive_space(&vnic->vi));
257 /* Set up a virtual buffer descriptor */
258 ef_vi_receive_init(&vnic->vi, buf->pkt_buff_addr, id,
263 vnic->rx_dma_level++;
266 * Only push the descriptor to the card if we've reached the
267 * batch size. Otherwise, the descriptors can sit around for
268 * a while. There will be plenty available.
270 if (idx >= NETFRONT_ACCEL_RX_DESC_BATCH ||
271 vnic->rx_dma_level < NETFRONT_ACCEL_RX_DESC_BATCH) {
273 VPRINTK("Flushing %d rx descriptors.\n", idx);
276 /* Push buffer to hardware */
277 ef_vi_receive_push(&vnic->vi);
282 vnic->rx_dma_batched = idx;
287 void netfront_accel_vi_post_rx_or_free(netfront_accel_vnic *vnic, u16 id,
288 netfront_accel_pkt_desc *buf)
291 VPRINTK("%s: %d\n", __FUNCTION__, id);
293 if (ef_vi_receive_space(&vnic->vi) <= vnic->rx_dma_batched) {
294 VPRINTK("RX space is full\n");
295 netfront_accel_buf_put(vnic->rx_bufs, id);
299 VPRINTK("Completed buffer %d is reposted\n", id);
300 netfront_accel_vi_post_rx(vnic, id, buf);
303 * Let's see if there's any more to be pushed out to the NIC
306 while (ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
307 /* Try to allocate a buffer. */
308 buf = netfront_accel_buf_get(vnic->rx_bufs);
312 /* Add it to the rx dma queue. */
313 netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);
318 void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx)
322 ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
323 netfront_accel_pkt_desc *buf;
325 VPRINTK("%s: %d\n", __FUNCTION__, vnic->rx_dma_level);
327 /* Try to allocate a buffer. */
328 buf = netfront_accel_buf_get(vnic->rx_bufs);
333 /* Add it to the rx dma queue. */
334 netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);
337 VPRINTK("%s: done\n", __FUNCTION__);
341 struct netfront_accel_multi_state {
342 unsigned remaining_len;
346 struct netfront_accel_tso_buffer *output_buffers;
348 /* Where we are in the current fragment of the SKB. */
350 /* address of current position */
352 /* remaining length */
354 } ifc; /* == Input Fragment Cursor */
358 static inline void multi_post_start(struct netfront_accel_multi_state *st,
361 st->remaining_len = skb->len;
362 st->output_buffers = NULL;
364 st->ifc.len = skb_headlen(skb);
365 st->ifc.addr = skb->data;
368 static int multi_post_start_new_buffer(netfront_accel_vnic *vnic,
369 struct netfront_accel_multi_state *st)
371 struct netfront_accel_tso_buffer *tso_buf;
372 struct netfront_accel_pkt_desc *buf;
374 /* Get a mapped packet buffer */
375 buf = netfront_accel_buf_get(vnic->tx_bufs);
377 DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
381 /* Store a bit of meta-data at the end */
382 tso_buf = (struct netfront_accel_tso_buffer *)
383 (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
389 tso_buf->next = st->output_buffers;
390 st->output_buffers = tso_buf;
393 BUG_ON(st->buffers >= ACCEL_TX_MAX_BUFFERS);
396 * Store the context, set to NULL, last packet buffer will get
399 tso_buf->buf->skb = NULL;
406 multi_post_fill_buffer_with_fragment(netfront_accel_vnic *vnic,
407 struct netfront_accel_multi_state *st)
409 struct netfront_accel_tso_buffer *tso_buf;
412 BUG_ON(st->output_buffers == NULL);
413 tso_buf = st->output_buffers;
415 if (st->ifc.len == 0) return;
416 if (tso_buf->length == NETFRONT_ACCEL_TX_BUF_LENGTH) return;
418 BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
420 space = NETFRONT_ACCEL_TX_BUF_LENGTH - tso_buf->length;
421 n = min(st->ifc.len, space);
423 memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n);
425 st->remaining_len -= n;
427 tso_buf->length += n;
430 BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
436 static inline void multi_post_unwind(netfront_accel_vnic *vnic,
437 struct netfront_accel_multi_state *st)
439 struct netfront_accel_tso_buffer *tso_buf;
441 DPRINTK("%s\n", __FUNCTION__);
443 while (st->output_buffers != NULL) {
444 tso_buf = st->output_buffers;
445 st->output_buffers = tso_buf->next;
447 netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
449 BUG_ON(st->buffers != 0);
453 static enum netfront_accel_post_status
454 netfront_accel_enqueue_skb_multi(netfront_accel_vnic *vnic, struct sk_buff *skb)
456 struct netfront_accel_tso_buffer *tso_buf;
457 struct netfront_accel_multi_state state;
458 ef_iovec iovecs[ACCEL_TX_MAX_BUFFERS];
460 int frag_i, rc, dma_id;
462 multi_post_start(&state, skb);
466 if (skb->ip_summed == CHECKSUM_PARTIAL) {
467 /* Set to zero to encourage falcon to work it out for us */
468 *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0;
471 if (multi_post_start_new_buffer(vnic, &state)) {
472 DPRINTK("%s: out of buffers\n", __FUNCTION__);
477 multi_post_fill_buffer_with_fragment(vnic, &state);
479 /* Move onto the next fragment? */
480 if (state.ifc.len == 0) {
481 if (++frag_i >= skb_shinfo(skb)->nr_frags)
482 /* End of payload reached. */
484 f = &skb_shinfo(skb)->frags[frag_i];
485 state.ifc.len = skb_frag_size(f);
486 state.ifc.addr = page_address(skb_frag_page(f))
490 /* Start a new buffer? */
491 if ((state.output_buffers->length ==
492 NETFRONT_ACCEL_TX_BUF_LENGTH) &&
493 multi_post_start_new_buffer(vnic, &state)) {
494 DPRINTK("%s: out of buffers\n", __FUNCTION__);
499 /* Check for space */
500 if (ef_vi_transmit_space(&vnic->vi) < state.buffers) {
501 DPRINTK("%s: Not enough TX space (%d)\n", __FUNCTION__, state.buffers);
505 /* Store the skb in what will be the last buffer's context */
506 state.output_buffers->buf->skb = skb;
507 /* Remember dma_id of what will be the last buffer */
508 dma_id = state.output_buffers->buf->buf_id;
511 * Make an iovec of the buffers in the list, reversing the
512 * buffers as we go as they are constructed on a stack
514 tso_buf = state.output_buffers;
515 for (frag_i = state.buffers-1; frag_i >= 0; frag_i--) {
516 iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr;
517 iovecs[frag_i].iov_len = tso_buf->length;
518 tso_buf = tso_buf->next;
521 rc = ef_vi_transmitv(&vnic->vi, iovecs, state.buffers, dma_id);
522 /* We checked for space already, so it really should succeed */
525 /* Track number of tx fastpath stats */
526 vnic->netdev_stats.fastpath_tx_bytes += skb->len;
527 vnic->netdev_stats.fastpath_tx_pkts ++;
528 #if NETFRONT_ACCEL_STATS
531 n = vnic->netdev_stats.fastpath_tx_pkts -
532 (u32)vnic->stats.fastpath_tx_completions;
533 if (n > vnic->stats.fastpath_tx_pending_max)
534 vnic->stats.fastpath_tx_pending_max = n;
537 return NETFRONT_ACCEL_STATUS_GOOD;
540 multi_post_unwind(vnic, &state);
542 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
544 return NETFRONT_ACCEL_STATUS_BUSY;
548 static enum netfront_accel_post_status
549 netfront_accel_enqueue_skb_single(netfront_accel_vnic *vnic, struct sk_buff *skb)
551 struct netfront_accel_tso_buffer *tso_buf;
552 struct netfront_accel_pkt_desc *buf;
556 if (ef_vi_transmit_space(&vnic->vi) < 1) {
557 DPRINTK("%s: No TX space\n", __FUNCTION__);
558 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
559 return NETFRONT_ACCEL_STATUS_BUSY;
562 buf = netfront_accel_buf_get(vnic->tx_bufs);
564 DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
565 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
566 return NETFRONT_ACCEL_STATUS_BUSY;
569 /* Track number of tx fastpath stats */
570 vnic->netdev_stats.fastpath_tx_pkts++;
571 vnic->netdev_stats.fastpath_tx_bytes += skb->len;
573 #if NETFRONT_ACCEL_STATS
576 n = vnic->netdev_stats.fastpath_tx_pkts -
577 (u32)vnic->stats.fastpath_tx_completions;
578 if (n > vnic->stats.fastpath_tx_pending_max)
579 vnic->stats.fastpath_tx_pending_max = n;
583 /* Store the context */
588 if (skb->ip_summed == CHECKSUM_PARTIAL) {
589 /* Set to zero to encourage falcon to work it out for us */
590 *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0;
592 NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
593 (skb, idx, frag_data, frag_len, {
594 /* Copy in payload */
595 VPRINTK("*** Copying %d bytes to %p\n", frag_len, kva);
596 memcpy(kva, frag_data, frag_len);
600 VPRINTK("%s: id %d pkt %p kva %p buff_addr 0x%08x\n", __FUNCTION__,
601 buf->buf_id, buf, buf->pkt_kva, buf->pkt_buff_addr);
604 /* Set up the TSO meta-data for a single buffer/packet */
605 tso_buf = (struct netfront_accel_tso_buffer *)
606 (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
607 tso_buf->next = NULL;
609 tso_buf->length = skb->len;
611 rc = ef_vi_transmit(&vnic->vi, buf->pkt_buff_addr, skb->len,
613 /* We checked for space already, so it really should succeed */
616 return NETFRONT_ACCEL_STATUS_GOOD;
620 enum netfront_accel_post_status
621 netfront_accel_vi_tx_post(netfront_accel_vnic *vnic, struct sk_buff *skb)
623 struct ethhdr *pkt_eth_hdr;
624 struct iphdr *pkt_ipv4_hdr;
625 int value, try_fastpath;
628 * This assumes that the data field points to the dest mac
631 cuckoo_hash_mac_key key = cuckoo_mac_to_key(skb->data);
634 * NB very important that all things that could return "CANT"
635 * are tested before things that return "BUSY" as if it it
636 * returns "BUSY" it is assumed that it won't return "CANT"
637 * next time it is tried
641 * Do a fastpath send if fast path table lookup returns true.
642 * We do this without the table lock and so may get the wrong
643 * answer, but current opinion is that's not a big problem
645 try_fastpath = cuckoo_hash_lookup(&vnic->fastpath_table,
646 (cuckoo_hash_key *)(&key), &value);
649 VPRINTK("try fast path false for mac: %pM\n", skb->data);
651 return NETFRONT_ACCEL_STATUS_CANT;
654 /* Check to see if the packet can be sent. */
655 if (skb_headlen(skb) < sizeof(*pkt_eth_hdr) + sizeof(*pkt_ipv4_hdr)) {
656 EPRINTK("%s: Packet header is too small\n", __FUNCTION__);
657 return NETFRONT_ACCEL_STATUS_CANT;
660 pkt_eth_hdr = (void*)skb->data;
661 pkt_ipv4_hdr = (void*)(pkt_eth_hdr+1);
663 if (be16_to_cpu(pkt_eth_hdr->h_proto) != ETH_P_IP) {
664 DPRINTK("%s: Packet is not IPV4 (ether_type=0x%04x)\n", __FUNCTION__,
665 be16_to_cpu(pkt_eth_hdr->h_proto));
666 return NETFRONT_ACCEL_STATUS_CANT;
669 if (pkt_ipv4_hdr->protocol != IPPROTO_TCP &&
670 pkt_ipv4_hdr->protocol != IPPROTO_UDP) {
671 DPRINTK("%s: Packet is not TCP/UDP (ip_protocol=0x%02x)\n",
672 __FUNCTION__, pkt_ipv4_hdr->protocol);
673 return NETFRONT_ACCEL_STATUS_CANT;
676 VPRINTK("%s: %d bytes, gso %d\n", __FUNCTION__, skb->len,
677 skb_shinfo(skb)->gso_size);
679 if (skb_shinfo(skb)->gso_size) {
680 return netfront_accel_enqueue_skb_tso(vnic, skb);
683 if (skb->len <= NETFRONT_ACCEL_TX_BUF_LENGTH) {
684 return netfront_accel_enqueue_skb_single(vnic, skb);
687 return netfront_accel_enqueue_skb_multi(vnic, skb);
692 * Copy the data to required end destination. NB. len is the total new
693 * length of the socket buffer, not the amount of data to copy
696 int ef_vnic_copy_to_skb(netfront_accel_vnic *vnic, struct sk_buff *skb,
697 struct netfront_accel_pkt_desc *buf, int len)
699 int i, extra = len - skb->len;
701 int pkt_stride = vnic->rx_pkt_stride;
702 int skb_stride = vnic->rx_skb_stride;
706 * This pulls stuff into the cache - have seen performance
707 * benefit in this, but disabled by default
709 skb_start = skb->data;
711 for (i = 0; i < len; i += pkt_stride) {
712 c += ((volatile char*)(buf->pkt_kva))[i];
716 for (i = skb->len; i < len ; i += skb_stride) {
717 c += ((volatile char*)(skb_start))[i];
721 if (skb_tailroom(skb) >= extra) {
722 memcpy(skb_put(skb, extra), buf->pkt_kva, extra);
730 static void discard_jumbo_state(netfront_accel_vnic *vnic)
733 if (vnic->jumbo_state.skb != NULL) {
734 dev_kfree_skb_any(vnic->jumbo_state.skb);
736 vnic->jumbo_state.skb = NULL;
738 vnic->jumbo_state.in_progress = 0;
742 static void netfront_accel_vi_rx_complete(netfront_accel_vnic *vnic,
745 cuckoo_hash_mac_key key;
748 struct net_device *net_dev;
751 key = cuckoo_mac_to_key(skb->data + ETH_ALEN);
754 * If this is a MAC address that we want to do fast path TX
755 * to, and we don't already, add it to the fastpath table.
756 * The initial lookup is done without the table lock and so
757 * may get the wrong answer, but current opinion is that's not
760 if (is_valid_ether_addr(skb->data + ETH_ALEN) &&
761 !cuckoo_hash_lookup(&vnic->fastpath_table, (cuckoo_hash_key *)&key,
763 spin_lock_irqsave(&vnic->table_lock, flags);
765 cuckoo_hash_add_check(&vnic->fastpath_table,
766 (cuckoo_hash_key *)&key,
769 spin_unlock_irqrestore(&vnic->table_lock, flags);
772 if (compare_ether_addr(skb->data, vnic->mac)) {
773 struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN);
776 DPRINTK("%s: saw wrong MAC address %pM\n",
777 __FUNCTION__, skb->data);
779 if (ip->protocol == IPPROTO_TCP) {
780 struct tcphdr *tcp = (struct tcphdr *)
781 ((char *)ip + 4 * ip->ihl);
784 struct udphdr *udp = (struct udphdr *)
785 ((char *)ip + 4 * ip->ihl);
786 EPRINTK_ON(ip->protocol != IPPROTO_UDP);
790 netfront_accel_msg_tx_fastpath(vnic, skb->data,
795 net_dev = vnic->net_dev;
796 skb->protocol = eth_type_trans(skb, net_dev);
797 /* CHECKSUM_UNNECESSARY as hardware has done it already */
798 skb->ip_summed = CHECKSUM_UNNECESSARY;
800 if (!netfront_accel_ssr_skb(vnic, &vnic->ssr_state, skb))
801 netif_receive_skb(skb);
805 static int netfront_accel_vi_poll_process_rx(netfront_accel_vnic *vnic,
808 struct netfront_accel_bufinfo *bufinfo = vnic->rx_bufs;
809 struct netfront_accel_pkt_desc *buf = NULL;
811 int id, len, sop = 0, cont = 0;
813 VPRINTK("Rx event.\n");
815 * Complete the receive operation, and get the request id of
818 id = ef_vi_receive_done(&vnic->vi, ev);
820 if (id < 0 || id >= bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE) {
821 EPRINTK("Rx packet %d is invalid\n", id);
822 /* Carry on round the loop if more events */
825 /* Get our buffer descriptor */
826 buf = netfront_accel_buf_find(bufinfo, id);
828 len = EF_EVENT_RX_BYTES(*ev);
830 /* An RX buffer has been removed from the DMA ring. */
831 vnic->rx_dma_level--;
833 if (EF_EVENT_TYPE(*ev) == EF_EVENT_TYPE_RX) {
834 sop = EF_EVENT_RX_SOP(*ev);
835 cont = EF_EVENT_RX_CONT(*ev);
837 skb = vnic->jumbo_state.skb;
839 VPRINTK("Rx packet %d: %d bytes so far; sop %d; cont %d\n",
843 if (!vnic->jumbo_state.in_progress) {
844 vnic->jumbo_state.in_progress = 1;
845 BUG_ON(vnic->jumbo_state.skb != NULL);
848 * This fragment shows a missing tail in
849 * previous one, but is itself possibly OK
851 DPRINTK("sop and in_progress => no tail\n");
853 /* Release the socket buffer we already had */
854 discard_jumbo_state(vnic);
856 /* Now start processing this fragment */
857 vnic->jumbo_state.in_progress = 1;
860 } else if (!vnic->jumbo_state.in_progress) {
861 DPRINTK("!sop and !in_progress => missing head\n");
866 /* Update state for next time */
867 vnic->jumbo_state.in_progress = 0;
868 vnic->jumbo_state.skb = NULL;
869 } else if (!vnic->jumbo_state.in_progress) {
870 DPRINTK("cont and !in_progress => missing head\n");
878 skb = alloc_skb(len+NET_IP_ALIGN, GFP_ATOMIC);
880 skb = alloc_skb(vnic->net_dev->mtu+NET_IP_ALIGN,
884 DPRINTK("%s: Couldn't get an rx skb.\n",
886 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
888 * Dropping this fragment means we
889 * should discard the rest too
891 discard_jumbo_state(vnic);
893 /* Carry on round the loop if more events */
899 /* Copy the data to required end destination */
900 if (ef_vnic_copy_to_skb(vnic, skb, buf, len) != 0) {
902 * No space in the skb - suggests > MTU packet
905 EPRINTK("%s: Rx packet too large (%d)\n",
907 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
908 discard_jumbo_state(vnic);
912 /* Put the buffer back in the DMA queue. */
913 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
916 vnic->jumbo_state.skb = skb;
920 /* Track number of rx fastpath packets */
921 vnic->netdev_stats.fastpath_rx_pkts++;
922 vnic->netdev_stats.fastpath_rx_bytes += len;
924 netfront_accel_vi_rx_complete(vnic, skb);
929 BUG_ON(EF_EVENT_TYPE(*ev) != EF_EVENT_TYPE_RX_DISCARD);
931 if (EF_EVENT_RX_DISCARD_TYPE(*ev)
932 == EF_EVENT_RX_DISCARD_TRUNC) {
933 DPRINTK("%s: " EF_EVENT_FMT
934 " buffer %d FRM_TRUNC q_id %d\n",
935 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
936 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
937 NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_frm_trunc);
938 } else if (EF_EVENT_RX_DISCARD_TYPE(*ev)
939 == EF_EVENT_RX_DISCARD_OTHER) {
940 DPRINTK("%s: " EF_EVENT_FMT
941 " buffer %d RX_DISCARD_OTHER q_id %d\n",
942 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
943 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
944 NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_discard_other);
945 } else if (EF_EVENT_RX_DISCARD_TYPE(*ev) ==
946 EF_EVENT_RX_DISCARD_CSUM_BAD) {
947 DPRINTK("%s: " EF_EVENT_FMT
948 " buffer %d DISCARD CSUM_BAD q_id %d\n",
949 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
950 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
951 NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_csum_bad);
952 } else if (EF_EVENT_RX_DISCARD_TYPE(*ev) ==
953 EF_EVENT_RX_DISCARD_CRC_BAD) {
954 DPRINTK("%s: " EF_EVENT_FMT
955 " buffer %d DISCARD CRC_BAD q_id %d\n",
956 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
957 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
958 NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_crc_bad);
960 BUG_ON(EF_EVENT_RX_DISCARD_TYPE(*ev) !=
961 EF_EVENT_RX_DISCARD_RIGHTS);
962 DPRINTK("%s: " EF_EVENT_FMT
963 " buffer %d DISCARD RIGHTS q_id %d\n",
964 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
965 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
966 NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_rights_bad);
970 /* discard type drops through here */
973 /* Release the socket buffer we already had */
974 discard_jumbo_state(vnic);
977 BUG_ON(vnic->jumbo_state.in_progress != 0);
978 BUG_ON(vnic->jumbo_state.skb != NULL);
980 if (id >= 0 && id < bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE)
981 /* Put the buffer back in the DMA queue. */
982 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
984 vnic->netdev_stats.fastpath_rx_errors++;
986 DPRINTK("%s experienced bad packet/missing fragment error: %d \n",
987 __FUNCTION__, ev->rx.flags);
993 static void netfront_accel_vi_not_busy(netfront_accel_vnic *vnic)
995 struct netfront_info *np = ((struct netfront_info *)
996 netdev_priv(vnic->net_dev));
1001 * We hold the vnic tx_lock which is sufficient to exclude
1005 if (vnic->tx_skb != NULL) {
1006 DPRINTK("%s trying to send spare buffer\n", __FUNCTION__);
1008 handled = netfront_accel_vi_tx_post(vnic, vnic->tx_skb);
1010 if (handled != NETFRONT_ACCEL_STATUS_BUSY) {
1011 DPRINTK("%s restarting tx\n", __FUNCTION__);
1013 /* Need netfront tx_lock and vnic tx_lock to
1015 spin_lock_irqsave(&np->tx_lock, flags);
1017 vnic->tx_skb = NULL;
1019 if (netfront_check_queue_ready(vnic->net_dev)) {
1020 netif_wake_queue(vnic->net_dev);
1021 NETFRONT_ACCEL_STATS_OP
1022 (vnic->stats.queue_wakes++);
1024 spin_unlock_irqrestore(&np->tx_lock, flags);
1029 * Should never get a CANT, as it checks that before
1030 * deciding it was BUSY first time round
1032 BUG_ON(handled == NETFRONT_ACCEL_STATUS_CANT);
1037 static void netfront_accel_vi_tx_complete(netfront_accel_vnic *vnic,
1038 struct netfront_accel_tso_buffer *tso_buf,
1041 struct netfront_accel_tso_buffer *next;
1044 * We get a single completion for every call to
1045 * ef_vi_transmitv so handle any other buffers which are part
1046 * of the same packet
1048 while (tso_buf != NULL) {
1049 if (tso_buf->buf->skb != NULL) {
1050 dev_kfree_skb_any(tso_buf->buf->skb);
1051 tso_buf->buf->skb = NULL;
1054 next = tso_buf->next;
1056 netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
1062 * If this was the last one in the batch, we try and send any
1063 * pending tx_skb. There should now be buffers and
1067 netfront_accel_vi_not_busy(vnic);
1071 static void netfront_accel_vi_poll_process_tx(netfront_accel_vnic *vnic,
1074 struct netfront_accel_pkt_desc *buf;
1075 struct netfront_accel_tso_buffer *tso_buf;
1076 ef_request_id ids[EF_VI_TRANSMIT_BATCH];
1078 unsigned long flags;
1080 /* Get the request ids for this tx completion event. */
1081 n_ids = ef_vi_transmit_unbundle(&vnic->vi, ev, ids);
1083 /* Take the tx buffer spin lock and hold for the duration */
1084 spin_lock_irqsave(&vnic->tx_lock, flags);
1086 for (i = 0; i < n_ids; ++i) {
1087 VPRINTK("Tx packet %d complete\n", ids[i]);
1088 buf = netfront_accel_buf_find(vnic->tx_bufs, ids[i]);
1089 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_completions++);
1091 tso_buf = (struct netfront_accel_tso_buffer *)
1092 (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
1093 BUG_ON(tso_buf->buf != buf);
1095 netfront_accel_vi_tx_complete(vnic, tso_buf, i == (n_ids-1));
1098 spin_unlock_irqrestore(&vnic->tx_lock, flags);
1102 int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets)
1104 ef_event ev[ACCEL_VI_POLL_EVENTS];
1105 int rx_remain = rx_packets, rc, events, i;
1106 #if NETFRONT_ACCEL_STATS
1107 int n_evs_polled = 0, rx_evs_polled = 0, tx_evs_polled = 0;
1109 BUG_ON(rx_packets <= 0);
1111 events = ef_eventq_poll(&vnic->vi, ev,
1112 min(rx_remain, ACCEL_VI_POLL_EVENTS));
1114 NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
1116 VPRINTK("%s: %d events\n", __FUNCTION__, events);
1118 /* Loop over each event */
1120 VPRINTK("%s: Event "EF_EVENT_FMT", index %lu\n", __FUNCTION__,
1121 EF_EVENT_PRI_ARG(ev[i]),
1122 (unsigned long)(vnic->vi.evq_state->evq_ptr));
1124 if ((EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX) ||
1125 (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX_DISCARD)) {
1126 rc = netfront_accel_vi_poll_process_rx(vnic, &ev[i]);
1128 BUG_ON(rx_remain < 0);
1129 NETFRONT_ACCEL_STATS_OP(rx_evs_polled++);
1130 } else if (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_TX) {
1131 netfront_accel_vi_poll_process_tx(vnic, &ev[i]);
1132 NETFRONT_ACCEL_STATS_OP(tx_evs_polled++);
1133 } else if (EF_EVENT_TYPE(ev[i]) ==
1134 EF_EVENT_TYPE_RX_NO_DESC_TRUNC) {
1135 DPRINTK("%s: RX_NO_DESC_TRUNC " EF_EVENT_FMT "\n",
1136 __FUNCTION__, EF_EVENT_PRI_ARG(ev[i]));
1137 discard_jumbo_state(vnic);
1138 NETFRONT_ACCEL_STATS_OP(vnic->stats.rx_no_desc_trunc++);
1140 EPRINTK("Unexpected event " EF_EVENT_FMT "\n",
1141 EF_EVENT_PRI_ARG(ev[i]));
1142 NETFRONT_ACCEL_STATS_OP(vnic->stats.bad_event_count++);
1147 /* Carry on round the loop if more events and more space */
1152 events = ef_eventq_poll(&vnic->vi, ev,
1154 ACCEL_VI_POLL_EVENTS));
1156 NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
1160 #if NETFRONT_ACCEL_STATS
1161 vnic->stats.event_count += n_evs_polled;
1162 vnic->stats.event_count_since_irq += n_evs_polled;
1163 if (n_evs_polled > vnic->stats.events_per_poll_max)
1164 vnic->stats.events_per_poll_max = n_evs_polled;
1165 if (rx_evs_polled > vnic->stats.events_per_poll_rx_max)
1166 vnic->stats.events_per_poll_rx_max = rx_evs_polled;
1167 if (tx_evs_polled > vnic->stats.events_per_poll_tx_max)
1168 vnic->stats.events_per_poll_tx_max = tx_evs_polled;
1171 return rx_packets - rx_remain;
1175 int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic)
1179 VPRINTK("%s: checking for event on %p\n", __FUNCTION__, &vnic->vi.evq_state);
1181 BUG_ON(vnic == NULL);
1182 BUG_ON(vnic->vi.evq_state == NULL);
1184 /* Do a quick check for an event. */
1185 if (ef_eventq_has_event(&vnic->vi)) {
1186 VPRINTK("%s: found event\n", __FUNCTION__);
1190 VPRINTK("evq_ptr=0x%08x evq_mask=0x%08x\n",
1191 vnic->evq_state.evq_ptr, vnic->vi.evq_mask);
1193 /* Request a wakeup from the hardware. */
1194 sw_evq_ptr = vnic->evq_state.evq_ptr & vnic->vi.evq_mask;
1196 BUG_ON(vnic->hw.falcon.evq_rptr == NULL);
1198 VPRINTK("Requesting wakeup at 0x%08x, rptr %p\n", sw_evq_ptr,
1199 vnic->hw.falcon.evq_rptr);
1200 *(volatile u32 *)(vnic->hw.falcon.evq_rptr) = (sw_evq_ptr >> 3);