1 /****************************************************************************
2 * Solarflare driver for Xen network acceleration
4 * Copyright 2006-2008: Solarflare Communications Inc,
5 * 9501 Jeronimo Road, Suite 250,
6 * Irvine, CA 92618, USA
8 * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License version 2 as published
12 * by the Free Software Foundation, incorporated herein by reference.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 ****************************************************************************
25 #include <linux/if_ether.h>
27 #include <net/checksum.h>
31 #include "accel_util.h"
32 #include "accel_bufs.h"
33 #include "accel_tso.h"
34 #include "accel_ssr.h"
37 #include "etherfabric/ef_vi.h"
40 * Max available space in a buffer for data once meta-data has taken
43 #define NETFRONT_ACCEL_TX_BUF_LENGTH \
44 ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) \
45 - sizeof(struct netfront_accel_tso_buffer))
47 #define ACCEL_TX_MAX_BUFFERS (6)
48 #define ACCEL_VI_POLL_EVENTS (8)
51 int netfront_accel_vi_init_fini(netfront_accel_vnic *vnic,
52 struct net_accel_msg_hw *hw_msg)
54 struct ef_vi_nic_type nic_type;
55 struct net_accel_hw_falcon_b *hw_info;
56 void *io_kva, *evq_base, *rx_dma_kva, *tx_dma_kva, *doorbell_kva;
60 u8 vi_data[VI_MAPPINGS_SIZE];
65 /* And create the local macs table lock */
66 spin_lock_init(&vnic->table_lock);
68 /* Create fastpath table, initial size 8, key length 8 */
69 if (cuckoo_hash_init(&vnic->fastpath_table, 3, 8)) {
70 EPRINTK("failed to allocate fastpath table\n");
74 vnic->hw.falcon.type = hw_msg->type;
76 switch (hw_msg->type) {
77 case NET_ACCEL_MSG_HWTYPE_FALCON_A:
78 hw_info = &hw_msg->resources.falcon_a.common;
79 /* Need the extra rptr register page on A1 */
80 io_kva = net_accel_map_iomem_page
81 (vnic->dev, hw_msg->resources.falcon_a.evq_rptr_gnt,
82 &vnic->hw.falcon.evq_rptr_mapping);
84 EPRINTK("%s: evq_rptr permission failed\n", __FUNCTION__);
88 vnic->hw.falcon.evq_rptr = io_kva +
89 (hw_info->evq_rptr & (PAGE_SIZE - 1));
91 case NET_ACCEL_MSG_HWTYPE_FALCON_B:
92 case NET_ACCEL_MSG_HWTYPE_SIENA_A:
93 hw_info = &hw_msg->resources.falcon_b;
99 /**** Event Queue ****/
101 /* Map the event queue pages */
102 evq_gnts = hw_info->evq_mem_gnts;
103 evq_order = hw_info->evq_order;
105 EPRINTK_ON(hw_info->evq_offs != 0);
107 DPRINTK("Will map evq %d pages\n", 1 << evq_order);
110 net_accel_map_grants_contig(vnic->dev, evq_gnts, 1 << evq_order,
112 if (evq_base == NULL) {
113 EPRINTK("%s: evq_base failed\n", __FUNCTION__);
117 /**** Doorbells ****/
118 /* Set up the doorbell mappings. */
120 net_accel_map_iomem_page(vnic->dev, hw_info->doorbell_gnt,
121 &vnic->hw.falcon.doorbell_mapping);
122 if (doorbell_kva == NULL) {
123 EPRINTK("%s: doorbell permission failed\n", __FUNCTION__);
126 vnic->hw.falcon.doorbell = doorbell_kva;
128 /* On Falcon_B and Siena we get the rptr from the doorbell page */
129 if (hw_msg->type == NET_ACCEL_MSG_HWTYPE_FALCON_B ||
130 hw_msg->type == NET_ACCEL_MSG_HWTYPE_SIENA_A) {
131 vnic->hw.falcon.evq_rptr =
132 (u32 *)((char *)vnic->hw.falcon.doorbell
133 + hw_info->evq_rptr);
136 /**** DMA Queue ****/
138 /* Set up the DMA Queues from the message. */
139 tx_dma_kva = net_accel_map_grants_contig
140 (vnic->dev, &(hw_info->txdmaq_gnt), 1,
141 &vnic->hw.falcon.txdmaq_mapping);
142 if (tx_dma_kva == NULL) {
143 EPRINTK("%s: TX dma failed\n", __FUNCTION__);
147 rx_dma_kva = net_accel_map_grants_contig
148 (vnic->dev, &(hw_info->rxdmaq_gnt), 1,
149 &vnic->hw.falcon.rxdmaq_mapping);
150 if (rx_dma_kva == NULL) {
151 EPRINTK("%s: RX dma failed\n", __FUNCTION__);
155 /* Full confession */
157 " Tx DMAQ grant %x -> %p\n"
158 " Rx DMAQ grant %x -> %p\n"
159 " EVQ grant %x -> %p\n",
160 hw_info->txdmaq_gnt, tx_dma_kva,
161 hw_info->rxdmaq_gnt, rx_dma_kva,
162 evq_gnts[0], evq_base
165 memset(vi_data, 0, sizeof(vi_data));
167 /* TODO BUG11305: convert efhw_arch to ef_vi_arch
169 * arch = ef_vi_arch_from_efhw_arch(hw_info->nic_arch);
171 * nic_type.arch = arch;
173 nic_type.arch = (unsigned char)hw_info->nic_arch;
174 nic_type.variant = (char)hw_info->nic_variant;
175 nic_type.revision = (unsigned char)hw_info->nic_revision;
177 ef_vi_init_mapping_evq(vi_data, nic_type, hw_info->instance,
178 1 << (evq_order + PAGE_SHIFT), evq_base,
181 ef_vi_init_mapping_vi(vi_data, nic_type, hw_info->rx_capacity,
182 hw_info->tx_capacity, hw_info->instance,
183 doorbell_kva, rx_dma_kva, tx_dma_kva, 0);
185 vi_state_size = ef_vi_calc_state_bytes(hw_info->rx_capacity,
186 hw_info->tx_capacity);
187 vnic->vi_state = (ef_vi_state *)kmalloc(vi_state_size, GFP_KERNEL);
188 if (vnic->vi_state == NULL) {
189 EPRINTK("%s: kmalloc for VI state failed\n", __FUNCTION__);
192 ef_vi_init(&vnic->vi, vi_data, vnic->vi_state, &vnic->evq_state, 0);
194 ef_eventq_state_init(&vnic->vi);
196 ef_vi_state_init(&vnic->vi);
201 kfree(vnic->vi_state);
202 vnic->vi_state = NULL;
204 net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.rxdmaq_mapping);
206 net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.txdmaq_mapping);
208 net_accel_unmap_iomem_page(vnic->dev, vnic->hw.falcon.doorbell_mapping);
209 vnic->hw.falcon.doorbell = NULL;
211 net_accel_unmap_grants_contig(vnic->dev, vnic->evq_mapping);
213 if (vnic->hw.falcon.type == NET_ACCEL_MSG_HWTYPE_FALCON_A)
214 net_accel_unmap_iomem_page(vnic->dev,
215 vnic->hw.falcon.evq_rptr_mapping);
216 vnic->hw.falcon.evq_rptr = NULL;
219 cuckoo_hash_destroy(&vnic->fastpath_table);
225 void netfront_accel_vi_ctor(netfront_accel_vnic *vnic)
227 /* Just mark the VI as uninitialised. */
228 vnic->vi_state = NULL;
232 int netfront_accel_vi_init(netfront_accel_vnic *vnic, struct net_accel_msg_hw *hw_msg)
234 BUG_ON(hw_msg == NULL);
235 return netfront_accel_vi_init_fini(vnic, hw_msg);
239 void netfront_accel_vi_dtor(netfront_accel_vnic *vnic)
241 if (vnic->vi_state != NULL)
242 netfront_accel_vi_init_fini(vnic, NULL);
247 void netfront_accel_vi_post_rx(netfront_accel_vnic *vnic, u16 id,
248 netfront_accel_pkt_desc *buf)
251 int idx = vnic->rx_dma_batched;
254 VPRINTK("Posting buffer %d (0x%08x) for rx at index %d, space is %d\n",
255 id, buf->pkt_buff_addr, idx, ef_vi_receive_space(&vnic->vi));
257 /* Set up a virtual buffer descriptor */
258 ef_vi_receive_init(&vnic->vi, buf->pkt_buff_addr, id,
263 vnic->rx_dma_level++;
266 * Only push the descriptor to the card if we've reached the
267 * batch size. Otherwise, the descriptors can sit around for
268 * a while. There will be plenty available.
270 if (idx >= NETFRONT_ACCEL_RX_DESC_BATCH ||
271 vnic->rx_dma_level < NETFRONT_ACCEL_RX_DESC_BATCH) {
273 VPRINTK("Flushing %d rx descriptors.\n", idx);
276 /* Push buffer to hardware */
277 ef_vi_receive_push(&vnic->vi);
282 vnic->rx_dma_batched = idx;
287 void netfront_accel_vi_post_rx_or_free(netfront_accel_vnic *vnic, u16 id,
288 netfront_accel_pkt_desc *buf)
291 VPRINTK("%s: %d\n", __FUNCTION__, id);
293 if (ef_vi_receive_space(&vnic->vi) <= vnic->rx_dma_batched) {
294 VPRINTK("RX space is full\n");
295 netfront_accel_buf_put(vnic->rx_bufs, id);
299 VPRINTK("Completed buffer %d is reposted\n", id);
300 netfront_accel_vi_post_rx(vnic, id, buf);
303 * Let's see if there's any more to be pushed out to the NIC
306 while (ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
307 /* Try to allocate a buffer. */
308 buf = netfront_accel_buf_get(vnic->rx_bufs);
312 /* Add it to the rx dma queue. */
313 netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);
318 void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx)
322 ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
323 netfront_accel_pkt_desc *buf;
325 VPRINTK("%s: %d\n", __FUNCTION__, vnic->rx_dma_level);
327 /* Try to allocate a buffer. */
328 buf = netfront_accel_buf_get(vnic->rx_bufs);
333 /* Add it to the rx dma queue. */
334 netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);
337 VPRINTK("%s: done\n", __FUNCTION__);
341 struct netfront_accel_multi_state {
342 unsigned remaining_len;
346 struct netfront_accel_tso_buffer *output_buffers;
348 /* Where we are in the current fragment of the SKB. */
350 /* address of current position */
352 /* remaining length */
354 } ifc; /* == Input Fragment Cursor */
358 static inline void multi_post_start(struct netfront_accel_multi_state *st,
361 st->remaining_len = skb->len;
362 st->output_buffers = NULL;
364 st->ifc.len = skb_headlen(skb);
365 st->ifc.addr = skb->data;
368 static int multi_post_start_new_buffer(netfront_accel_vnic *vnic,
369 struct netfront_accel_multi_state *st)
371 struct netfront_accel_tso_buffer *tso_buf;
372 struct netfront_accel_pkt_desc *buf;
374 /* Get a mapped packet buffer */
375 buf = netfront_accel_buf_get(vnic->tx_bufs);
377 DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
381 /* Store a bit of meta-data at the end */
382 tso_buf = (struct netfront_accel_tso_buffer *)
383 (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
389 tso_buf->next = st->output_buffers;
390 st->output_buffers = tso_buf;
393 BUG_ON(st->buffers >= ACCEL_TX_MAX_BUFFERS);
396 * Store the context, set to NULL, last packet buffer will get
399 tso_buf->buf->skb = NULL;
406 multi_post_fill_buffer_with_fragment(netfront_accel_vnic *vnic,
407 struct netfront_accel_multi_state *st)
409 struct netfront_accel_tso_buffer *tso_buf;
412 BUG_ON(st->output_buffers == NULL);
413 tso_buf = st->output_buffers;
415 if (st->ifc.len == 0) return;
416 if (tso_buf->length == NETFRONT_ACCEL_TX_BUF_LENGTH) return;
418 BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
420 space = NETFRONT_ACCEL_TX_BUF_LENGTH - tso_buf->length;
421 n = min(st->ifc.len, space);
423 memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n);
425 st->remaining_len -= n;
427 tso_buf->length += n;
430 BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
436 static inline void multi_post_unwind(netfront_accel_vnic *vnic,
437 struct netfront_accel_multi_state *st)
439 struct netfront_accel_tso_buffer *tso_buf;
441 DPRINTK("%s\n", __FUNCTION__);
443 while (st->output_buffers != NULL) {
444 tso_buf = st->output_buffers;
445 st->output_buffers = tso_buf->next;
447 netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
449 BUG_ON(st->buffers != 0);
453 static enum netfront_accel_post_status
454 netfront_accel_enqueue_skb_multi(netfront_accel_vnic *vnic, struct sk_buff *skb)
456 struct netfront_accel_tso_buffer *tso_buf;
457 struct netfront_accel_multi_state state;
458 ef_iovec iovecs[ACCEL_TX_MAX_BUFFERS];
460 int frag_i, rc, dma_id;
462 multi_post_start(&state, skb);
466 if (skb->ip_summed == CHECKSUM_PARTIAL) {
467 /* Set to zero to encourage falcon to work it out for us */
468 *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0;
471 if (multi_post_start_new_buffer(vnic, &state)) {
472 DPRINTK("%s: out of buffers\n", __FUNCTION__);
477 multi_post_fill_buffer_with_fragment(vnic, &state);
479 /* Move onto the next fragment? */
480 if (state.ifc.len == 0) {
481 if (++frag_i >= skb_shinfo(skb)->nr_frags)
482 /* End of payload reached. */
484 f = &skb_shinfo(skb)->frags[frag_i];
485 state.ifc.len = f->size;
486 state.ifc.addr = page_address(f->page) + f->page_offset;
489 /* Start a new buffer? */
490 if ((state.output_buffers->length ==
491 NETFRONT_ACCEL_TX_BUF_LENGTH) &&
492 multi_post_start_new_buffer(vnic, &state)) {
493 DPRINTK("%s: out of buffers\n", __FUNCTION__);
498 /* Check for space */
499 if (ef_vi_transmit_space(&vnic->vi) < state.buffers) {
500 DPRINTK("%s: Not enough TX space (%d)\n", __FUNCTION__, state.buffers);
504 /* Store the skb in what will be the last buffer's context */
505 state.output_buffers->buf->skb = skb;
506 /* Remember dma_id of what will be the last buffer */
507 dma_id = state.output_buffers->buf->buf_id;
510 * Make an iovec of the buffers in the list, reversing the
511 * buffers as we go as they are constructed on a stack
513 tso_buf = state.output_buffers;
514 for (frag_i = state.buffers-1; frag_i >= 0; frag_i--) {
515 iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr;
516 iovecs[frag_i].iov_len = tso_buf->length;
517 tso_buf = tso_buf->next;
520 rc = ef_vi_transmitv(&vnic->vi, iovecs, state.buffers, dma_id);
522 /* Track number of tx fastpath stats */
523 vnic->netdev_stats.fastpath_tx_bytes += skb->len;
524 vnic->netdev_stats.fastpath_tx_pkts ++;
525 #if NETFRONT_ACCEL_STATS
528 n = vnic->netdev_stats.fastpath_tx_pkts -
529 (u32)vnic->stats.fastpath_tx_completions;
530 if (n > vnic->stats.fastpath_tx_pending_max)
531 vnic->stats.fastpath_tx_pending_max = n;
534 return NETFRONT_ACCEL_STATUS_GOOD;
537 multi_post_unwind(vnic, &state);
539 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
541 return NETFRONT_ACCEL_STATUS_BUSY;
545 static enum netfront_accel_post_status
546 netfront_accel_enqueue_skb_single(netfront_accel_vnic *vnic, struct sk_buff *skb)
548 struct netfront_accel_tso_buffer *tso_buf;
549 struct netfront_accel_pkt_desc *buf;
553 if (ef_vi_transmit_space(&vnic->vi) < 1) {
554 DPRINTK("%s: No TX space\n", __FUNCTION__);
555 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
556 return NETFRONT_ACCEL_STATUS_BUSY;
559 buf = netfront_accel_buf_get(vnic->tx_bufs);
561 DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
562 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
563 return NETFRONT_ACCEL_STATUS_BUSY;
566 /* Track number of tx fastpath stats */
567 vnic->netdev_stats.fastpath_tx_pkts++;
568 vnic->netdev_stats.fastpath_tx_bytes += skb->len;
570 #if NETFRONT_ACCEL_STATS
573 n = vnic->netdev_stats.fastpath_tx_pkts -
574 (u32)vnic->stats.fastpath_tx_completions;
575 if (n > vnic->stats.fastpath_tx_pending_max)
576 vnic->stats.fastpath_tx_pending_max = n;
580 /* Store the context */
585 if (skb->ip_summed == CHECKSUM_PARTIAL) {
586 /* Set to zero to encourage falcon to work it out for us */
587 *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0;
589 NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
590 (skb, idx, frag_data, frag_len, {
591 /* Copy in payload */
592 VPRINTK("*** Copying %d bytes to %p\n", frag_len, kva);
593 memcpy(kva, frag_data, frag_len);
597 VPRINTK("%s: id %d pkt %p kva %p buff_addr 0x%08x\n", __FUNCTION__,
598 buf->buf_id, buf, buf->pkt_kva, buf->pkt_buff_addr);
601 /* Set up the TSO meta-data for a single buffer/packet */
602 tso_buf = (struct netfront_accel_tso_buffer *)
603 (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
604 tso_buf->next = NULL;
606 tso_buf->length = skb->len;
608 rc = ef_vi_transmit(&vnic->vi, buf->pkt_buff_addr, skb->len,
610 /* We checked for space already, so it really should succeed */
613 return NETFRONT_ACCEL_STATUS_GOOD;
617 enum netfront_accel_post_status
618 netfront_accel_vi_tx_post(netfront_accel_vnic *vnic, struct sk_buff *skb)
620 struct ethhdr *pkt_eth_hdr;
621 struct iphdr *pkt_ipv4_hdr;
622 int value, try_fastpath;
625 * This assumes that the data field points to the dest mac
628 cuckoo_hash_mac_key key = cuckoo_mac_to_key(skb->data);
631 * NB very important that all things that could return "CANT"
632 * are tested before things that return "BUSY" as if it it
633 * returns "BUSY" it is assumed that it won't return "CANT"
634 * next time it is tried
638 * Do a fastpath send if fast path table lookup returns true.
639 * We do this without the table lock and so may get the wrong
640 * answer, but current opinion is that's not a big problem
642 try_fastpath = cuckoo_hash_lookup(&vnic->fastpath_table,
643 (cuckoo_hash_key *)(&key), &value);
646 VPRINTK("try fast path false for mac: %pM\n", skb->data);
648 return NETFRONT_ACCEL_STATUS_CANT;
651 /* Check to see if the packet can be sent. */
652 if (skb_headlen(skb) < sizeof(*pkt_eth_hdr) + sizeof(*pkt_ipv4_hdr)) {
653 EPRINTK("%s: Packet header is too small\n", __FUNCTION__);
654 return NETFRONT_ACCEL_STATUS_CANT;
657 pkt_eth_hdr = (void*)skb->data;
658 pkt_ipv4_hdr = (void*)(pkt_eth_hdr+1);
660 if (be16_to_cpu(pkt_eth_hdr->h_proto) != ETH_P_IP) {
661 DPRINTK("%s: Packet is not IPV4 (ether_type=0x%04x)\n", __FUNCTION__,
662 be16_to_cpu(pkt_eth_hdr->h_proto));
663 return NETFRONT_ACCEL_STATUS_CANT;
666 if (pkt_ipv4_hdr->protocol != IPPROTO_TCP &&
667 pkt_ipv4_hdr->protocol != IPPROTO_UDP) {
668 DPRINTK("%s: Packet is not TCP/UDP (ip_protocol=0x%02x)\n",
669 __FUNCTION__, pkt_ipv4_hdr->protocol);
670 return NETFRONT_ACCEL_STATUS_CANT;
673 VPRINTK("%s: %d bytes, gso %d\n", __FUNCTION__, skb->len,
674 skb_shinfo(skb)->gso_size);
676 if (skb_shinfo(skb)->gso_size) {
677 return netfront_accel_enqueue_skb_tso(vnic, skb);
680 if (skb->len <= NETFRONT_ACCEL_TX_BUF_LENGTH) {
681 return netfront_accel_enqueue_skb_single(vnic, skb);
684 return netfront_accel_enqueue_skb_multi(vnic, skb);
689 * Copy the data to required end destination. NB. len is the total new
690 * length of the socket buffer, not the amount of data to copy
693 int ef_vnic_copy_to_skb(netfront_accel_vnic *vnic, struct sk_buff *skb,
694 struct netfront_accel_pkt_desc *buf, int len)
696 int i, extra = len - skb->len;
698 int pkt_stride = vnic->rx_pkt_stride;
699 int skb_stride = vnic->rx_skb_stride;
703 * This pulls stuff into the cache - have seen performance
704 * benefit in this, but disabled by default
706 skb_start = skb->data;
708 for (i = 0; i < len; i += pkt_stride) {
709 c += ((volatile char*)(buf->pkt_kva))[i];
713 for (i = skb->len; i < len ; i += skb_stride) {
714 c += ((volatile char*)(skb_start))[i];
718 if (skb_tailroom(skb) >= extra) {
719 memcpy(skb_put(skb, extra), buf->pkt_kva, extra);
727 static void discard_jumbo_state(netfront_accel_vnic *vnic)
730 if (vnic->jumbo_state.skb != NULL) {
731 dev_kfree_skb_any(vnic->jumbo_state.skb);
733 vnic->jumbo_state.skb = NULL;
735 vnic->jumbo_state.in_progress = 0;
739 static void netfront_accel_vi_rx_complete(netfront_accel_vnic *vnic,
742 cuckoo_hash_mac_key key;
745 struct net_device *net_dev;
748 key = cuckoo_mac_to_key(skb->data + ETH_ALEN);
751 * If this is a MAC address that we want to do fast path TX
752 * to, and we don't already, add it to the fastpath table.
753 * The initial lookup is done without the table lock and so
754 * may get the wrong answer, but current opinion is that's not
757 if (is_valid_ether_addr(skb->data + ETH_ALEN) &&
758 !cuckoo_hash_lookup(&vnic->fastpath_table, (cuckoo_hash_key *)&key,
760 spin_lock_irqsave(&vnic->table_lock, flags);
762 cuckoo_hash_add_check(&vnic->fastpath_table,
763 (cuckoo_hash_key *)&key,
766 spin_unlock_irqrestore(&vnic->table_lock, flags);
769 if (compare_ether_addr(skb->data, vnic->mac)) {
770 struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN);
773 DPRINTK("%s: saw wrong MAC address %pM\n",
774 __FUNCTION__, skb->data);
776 if (ip->protocol == IPPROTO_TCP) {
777 struct tcphdr *tcp = (struct tcphdr *)
778 ((char *)ip + 4 * ip->ihl);
781 struct udphdr *udp = (struct udphdr *)
782 ((char *)ip + 4 * ip->ihl);
783 EPRINTK_ON(ip->protocol != IPPROTO_UDP);
787 netfront_accel_msg_tx_fastpath(vnic, skb->data,
792 net_dev = vnic->net_dev;
794 skb->protocol = eth_type_trans(skb, net_dev);
795 /* CHECKSUM_UNNECESSARY as hardware has done it already */
796 skb->ip_summed = CHECKSUM_UNNECESSARY;
798 if (!netfront_accel_ssr_skb(vnic, &vnic->ssr_state, skb))
799 netif_receive_skb(skb);
803 static int netfront_accel_vi_poll_process_rx(netfront_accel_vnic *vnic,
806 struct netfront_accel_bufinfo *bufinfo = vnic->rx_bufs;
807 struct netfront_accel_pkt_desc *buf = NULL;
809 int id, len, sop = 0, cont = 0;
811 VPRINTK("Rx event.\n");
813 * Complete the receive operation, and get the request id of
816 id = ef_vi_receive_done(&vnic->vi, ev);
818 if (id < 0 || id >= bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE) {
819 EPRINTK("Rx packet %d is invalid\n", id);
820 /* Carry on round the loop if more events */
823 /* Get our buffer descriptor */
824 buf = netfront_accel_buf_find(bufinfo, id);
826 len = EF_EVENT_RX_BYTES(*ev);
828 /* An RX buffer has been removed from the DMA ring. */
829 vnic->rx_dma_level--;
831 if (EF_EVENT_TYPE(*ev) == EF_EVENT_TYPE_RX) {
832 sop = EF_EVENT_RX_SOP(*ev);
833 cont = EF_EVENT_RX_CONT(*ev);
835 skb = vnic->jumbo_state.skb;
837 VPRINTK("Rx packet %d: %d bytes so far; sop %d; cont %d\n",
841 if (!vnic->jumbo_state.in_progress) {
842 vnic->jumbo_state.in_progress = 1;
843 BUG_ON(vnic->jumbo_state.skb != NULL);
846 * This fragment shows a missing tail in
847 * previous one, but is itself possibly OK
849 DPRINTK("sop and in_progress => no tail\n");
851 /* Release the socket buffer we already had */
852 discard_jumbo_state(vnic);
854 /* Now start processing this fragment */
855 vnic->jumbo_state.in_progress = 1;
858 } else if (!vnic->jumbo_state.in_progress) {
859 DPRINTK("!sop and !in_progress => missing head\n");
864 /* Update state for next time */
865 vnic->jumbo_state.in_progress = 0;
866 vnic->jumbo_state.skb = NULL;
867 } else if (!vnic->jumbo_state.in_progress) {
868 DPRINTK("cont and !in_progress => missing head\n");
876 skb = alloc_skb(len+NET_IP_ALIGN, GFP_ATOMIC);
878 skb = alloc_skb(vnic->net_dev->mtu+NET_IP_ALIGN,
882 DPRINTK("%s: Couldn't get an rx skb.\n",
884 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
886 * Dropping this fragment means we
887 * should discard the rest too
889 discard_jumbo_state(vnic);
891 /* Carry on round the loop if more events */
897 /* Copy the data to required end destination */
898 if (ef_vnic_copy_to_skb(vnic, skb, buf, len) != 0) {
900 * No space in the skb - suggests > MTU packet
903 EPRINTK("%s: Rx packet too large (%d)\n",
905 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
906 discard_jumbo_state(vnic);
910 /* Put the buffer back in the DMA queue. */
911 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
914 vnic->jumbo_state.skb = skb;
918 /* Track number of rx fastpath packets */
919 vnic->netdev_stats.fastpath_rx_pkts++;
920 vnic->netdev_stats.fastpath_rx_bytes += len;
922 netfront_accel_vi_rx_complete(vnic, skb);
927 BUG_ON(EF_EVENT_TYPE(*ev) != EF_EVENT_TYPE_RX_DISCARD);
929 if (EF_EVENT_RX_DISCARD_TYPE(*ev)
930 == EF_EVENT_RX_DISCARD_TRUNC) {
931 DPRINTK("%s: " EF_EVENT_FMT
932 " buffer %d FRM_TRUNC q_id %d\n",
933 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
934 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
935 NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_frm_trunc);
936 } else if (EF_EVENT_RX_DISCARD_TYPE(*ev)
937 == EF_EVENT_RX_DISCARD_OTHER) {
938 DPRINTK("%s: " EF_EVENT_FMT
939 " buffer %d RX_DISCARD_OTHER q_id %d\n",
940 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
941 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
942 NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_discard_other);
943 } else if (EF_EVENT_RX_DISCARD_TYPE(*ev) ==
944 EF_EVENT_RX_DISCARD_CSUM_BAD) {
945 DPRINTK("%s: " EF_EVENT_FMT
946 " buffer %d DISCARD CSUM_BAD q_id %d\n",
947 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
948 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
949 NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_csum_bad);
950 } else if (EF_EVENT_RX_DISCARD_TYPE(*ev) ==
951 EF_EVENT_RX_DISCARD_CRC_BAD) {
952 DPRINTK("%s: " EF_EVENT_FMT
953 " buffer %d DISCARD CRC_BAD q_id %d\n",
954 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
955 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
956 NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_crc_bad);
958 BUG_ON(EF_EVENT_RX_DISCARD_TYPE(*ev) !=
959 EF_EVENT_RX_DISCARD_RIGHTS);
960 DPRINTK("%s: " EF_EVENT_FMT
961 " buffer %d DISCARD RIGHTS q_id %d\n",
962 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
963 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
964 NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_rights_bad);
968 /* discard type drops through here */
971 /* Release the socket buffer we already had */
972 discard_jumbo_state(vnic);
975 BUG_ON(vnic->jumbo_state.in_progress != 0);
976 BUG_ON(vnic->jumbo_state.skb != NULL);
978 if (id >= 0 && id < bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE)
979 /* Put the buffer back in the DMA queue. */
980 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
982 vnic->netdev_stats.fastpath_rx_errors++;
984 DPRINTK("%s experienced bad packet/missing fragment error: %d \n",
985 __FUNCTION__, ev->rx.flags);
991 static void netfront_accel_vi_not_busy(netfront_accel_vnic *vnic)
993 struct netfront_info *np = ((struct netfront_info *)
994 netdev_priv(vnic->net_dev));
999 * We hold the vnic tx_lock which is sufficient to exclude
1003 if (vnic->tx_skb != NULL) {
1004 DPRINTK("%s trying to send spare buffer\n", __FUNCTION__);
1006 handled = netfront_accel_vi_tx_post(vnic, vnic->tx_skb);
1008 if (handled != NETFRONT_ACCEL_STATUS_BUSY) {
1009 DPRINTK("%s restarting tx\n", __FUNCTION__);
1011 /* Need netfront tx_lock and vnic tx_lock to
1013 spin_lock_irqsave(&np->tx_lock, flags);
1015 vnic->tx_skb = NULL;
1017 if (netfront_check_queue_ready(vnic->net_dev)) {
1018 netif_wake_queue(vnic->net_dev);
1019 NETFRONT_ACCEL_STATS_OP
1020 (vnic->stats.queue_wakes++);
1022 spin_unlock_irqrestore(&np->tx_lock, flags);
1027 * Should never get a CANT, as it checks that before
1028 * deciding it was BUSY first time round
1030 BUG_ON(handled == NETFRONT_ACCEL_STATUS_CANT);
1035 static void netfront_accel_vi_tx_complete(netfront_accel_vnic *vnic,
1036 struct netfront_accel_tso_buffer *tso_buf,
1039 struct netfront_accel_tso_buffer *next;
1042 * We get a single completion for every call to
1043 * ef_vi_transmitv so handle any other buffers which are part
1044 * of the same packet
1046 while (tso_buf != NULL) {
1047 if (tso_buf->buf->skb != NULL) {
1048 dev_kfree_skb_any(tso_buf->buf->skb);
1049 tso_buf->buf->skb = NULL;
1052 next = tso_buf->next;
1054 netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
1060 * If this was the last one in the batch, we try and send any
1061 * pending tx_skb. There should now be buffers and
1065 netfront_accel_vi_not_busy(vnic);
1069 static void netfront_accel_vi_poll_process_tx(netfront_accel_vnic *vnic,
1072 struct netfront_accel_pkt_desc *buf;
1073 struct netfront_accel_tso_buffer *tso_buf;
1074 ef_request_id ids[EF_VI_TRANSMIT_BATCH];
1076 unsigned long flags;
1078 /* Get the request ids for this tx completion event. */
1079 n_ids = ef_vi_transmit_unbundle(&vnic->vi, ev, ids);
1081 /* Take the tx buffer spin lock and hold for the duration */
1082 spin_lock_irqsave(&vnic->tx_lock, flags);
1084 for (i = 0; i < n_ids; ++i) {
1085 VPRINTK("Tx packet %d complete\n", ids[i]);
1086 buf = netfront_accel_buf_find(vnic->tx_bufs, ids[i]);
1087 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_completions++);
1089 tso_buf = (struct netfront_accel_tso_buffer *)
1090 (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
1091 BUG_ON(tso_buf->buf != buf);
1093 netfront_accel_vi_tx_complete(vnic, tso_buf, i == (n_ids-1));
1096 spin_unlock_irqrestore(&vnic->tx_lock, flags);
1100 int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets)
1102 ef_event ev[ACCEL_VI_POLL_EVENTS];
1103 int rx_remain = rx_packets, rc, events, i;
1104 #if NETFRONT_ACCEL_STATS
1105 int n_evs_polled = 0, rx_evs_polled = 0, tx_evs_polled = 0;
1107 BUG_ON(rx_packets <= 0);
1109 events = ef_eventq_poll(&vnic->vi, ev,
1110 min(rx_remain, ACCEL_VI_POLL_EVENTS));
1112 NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
1114 VPRINTK("%s: %d events\n", __FUNCTION__, events);
1116 /* Loop over each event */
1118 VPRINTK("%s: Event "EF_EVENT_FMT", index %lu\n", __FUNCTION__,
1119 EF_EVENT_PRI_ARG(ev[i]),
1120 (unsigned long)(vnic->vi.evq_state->evq_ptr));
1122 if ((EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX) ||
1123 (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX_DISCARD)) {
1124 rc = netfront_accel_vi_poll_process_rx(vnic, &ev[i]);
1126 BUG_ON(rx_remain < 0);
1127 NETFRONT_ACCEL_STATS_OP(rx_evs_polled++);
1128 } else if (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_TX) {
1129 netfront_accel_vi_poll_process_tx(vnic, &ev[i]);
1130 NETFRONT_ACCEL_STATS_OP(tx_evs_polled++);
1131 } else if (EF_EVENT_TYPE(ev[i]) ==
1132 EF_EVENT_TYPE_RX_NO_DESC_TRUNC) {
1133 DPRINTK("%s: RX_NO_DESC_TRUNC " EF_EVENT_FMT "\n",
1134 __FUNCTION__, EF_EVENT_PRI_ARG(ev[i]));
1135 discard_jumbo_state(vnic);
1136 NETFRONT_ACCEL_STATS_OP(vnic->stats.rx_no_desc_trunc++);
1138 EPRINTK("Unexpected event " EF_EVENT_FMT "\n",
1139 EF_EVENT_PRI_ARG(ev[i]));
1140 NETFRONT_ACCEL_STATS_OP(vnic->stats.bad_event_count++);
1145 /* Carry on round the loop if more events and more space */
1150 events = ef_eventq_poll(&vnic->vi, ev,
1152 ACCEL_VI_POLL_EVENTS));
1154 NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
1158 #if NETFRONT_ACCEL_STATS
1159 vnic->stats.event_count += n_evs_polled;
1160 vnic->stats.event_count_since_irq += n_evs_polled;
1161 if (n_evs_polled > vnic->stats.events_per_poll_max)
1162 vnic->stats.events_per_poll_max = n_evs_polled;
1163 if (rx_evs_polled > vnic->stats.events_per_poll_rx_max)
1164 vnic->stats.events_per_poll_rx_max = rx_evs_polled;
1165 if (tx_evs_polled > vnic->stats.events_per_poll_tx_max)
1166 vnic->stats.events_per_poll_tx_max = tx_evs_polled;
1169 return rx_packets - rx_remain;
1173 int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic)
1177 VPRINTK("%s: checking for event on %p\n", __FUNCTION__, &vnic->vi.evq_state);
1179 BUG_ON(vnic == NULL);
1180 BUG_ON(vnic->vi.evq_state == NULL);
1182 /* Do a quick check for an event. */
1183 if (ef_eventq_has_event(&vnic->vi)) {
1184 VPRINTK("%s: found event\n", __FUNCTION__);
1188 VPRINTK("evq_ptr=0x%08x evq_mask=0x%08x\n",
1189 vnic->evq_state.evq_ptr, vnic->vi.evq_mask);
1191 /* Request a wakeup from the hardware. */
1192 sw_evq_ptr = vnic->evq_state.evq_ptr & vnic->vi.evq_mask;
1194 BUG_ON(vnic->hw.falcon.evq_rptr == NULL);
1196 VPRINTK("Requesting wakeup at 0x%08x, rptr %p\n", sw_evq_ptr,
1197 vnic->hw.falcon.evq_rptr);
1198 *(volatile u32 *)(vnic->hw.falcon.evq_rptr) = (sw_evq_ptr >> 3);