1 /****************************************************************************
2 * Solarflare driver for Xen network acceleration
4 * Copyright 2006-2008: Solarflare Communications Inc,
5 * 9501 Jeronimo Road, Suite 250,
6 * Irvine, CA 92618, USA
8 * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License version 2 as published
12 * by the Free Software Foundation, incorporated herein by reference.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 ****************************************************************************
25 #include <linux/if_ether.h>
27 #include <net/checksum.h>
31 #include "accel_util.h"
32 #include "accel_bufs.h"
33 #include "accel_tso.h"
34 #include "accel_ssr.h"
37 #include "etherfabric/ef_vi.h"
40 * Max available space in a buffer for data once meta-data has taken
43 #define NETFRONT_ACCEL_TX_BUF_LENGTH \
44 ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) \
45 - sizeof(struct netfront_accel_tso_buffer))
47 #define ACCEL_TX_MAX_BUFFERS (6)
48 #define ACCEL_VI_POLL_EVENTS (8)
51 int netfront_accel_vi_init_fini(netfront_accel_vnic *vnic,
52 struct net_accel_msg_hw *hw_msg)
54 struct ef_vi_nic_type nic_type;
55 struct net_accel_hw_falcon_b *hw_info;
56 void *io_kva, *evq_base, *rx_dma_kva, *tx_dma_kva, *doorbell_kva;
60 u8 vi_data[VI_MAPPINGS_SIZE];
65 /* And create the local macs table lock */
66 spin_lock_init(&vnic->table_lock);
68 /* Create fastpath table, initial size 8, key length 8 */
69 if (cuckoo_hash_init(&vnic->fastpath_table, 3, 8)) {
70 EPRINTK("failed to allocate fastpath table\n");
74 vnic->hw.falcon.type = hw_msg->type;
76 switch (hw_msg->type) {
77 case NET_ACCEL_MSG_HWTYPE_FALCON_A:
78 hw_info = &hw_msg->resources.falcon_a.common;
79 /* Need the extra rptr register page on A1 */
80 io_kva = net_accel_map_iomem_page
81 (vnic->dev, hw_msg->resources.falcon_a.evq_rptr_gnt,
82 &vnic->hw.falcon.evq_rptr_mapping);
84 EPRINTK("%s: evq_rptr permission failed\n", __FUNCTION__);
88 vnic->hw.falcon.evq_rptr = io_kva +
89 (hw_info->evq_rptr & (PAGE_SIZE - 1));
91 case NET_ACCEL_MSG_HWTYPE_FALCON_B:
92 hw_info = &hw_msg->resources.falcon_b;
98 /**** Event Queue ****/
100 /* Map the event queue pages */
101 evq_gnts = hw_info->evq_mem_gnts;
102 evq_order = hw_info->evq_order;
104 EPRINTK_ON(hw_info->evq_offs != 0);
106 DPRINTK("Will map evq %d pages\n", 1 << evq_order);
109 net_accel_map_grants_contig(vnic->dev, evq_gnts, 1 << evq_order,
111 if (evq_base == NULL) {
112 EPRINTK("%s: evq_base failed\n", __FUNCTION__);
116 /**** Doorbells ****/
117 /* Set up the doorbell mappings. */
119 net_accel_map_iomem_page(vnic->dev, hw_info->doorbell_gnt,
120 &vnic->hw.falcon.doorbell_mapping);
121 if (doorbell_kva == NULL) {
122 EPRINTK("%s: doorbell permission failed\n", __FUNCTION__);
125 vnic->hw.falcon.doorbell = doorbell_kva;
127 /* On Falcon_B we get the rptr from the doorbell page */
128 if (hw_msg->type == NET_ACCEL_MSG_HWTYPE_FALCON_B) {
129 vnic->hw.falcon.evq_rptr =
130 (u32 *)((char *)vnic->hw.falcon.doorbell
131 + hw_info->evq_rptr);
134 /**** DMA Queue ****/
136 /* Set up the DMA Queues from the message. */
137 tx_dma_kva = net_accel_map_grants_contig
138 (vnic->dev, &(hw_info->txdmaq_gnt), 1,
139 &vnic->hw.falcon.txdmaq_mapping);
140 if (tx_dma_kva == NULL) {
141 EPRINTK("%s: TX dma failed\n", __FUNCTION__);
145 rx_dma_kva = net_accel_map_grants_contig
146 (vnic->dev, &(hw_info->rxdmaq_gnt), 1,
147 &vnic->hw.falcon.rxdmaq_mapping);
148 if (rx_dma_kva == NULL) {
149 EPRINTK("%s: RX dma failed\n", __FUNCTION__);
153 /* Full confession */
155 " Tx DMAQ grant %x -> %p\n"
156 " Rx DMAQ grant %x -> %p\n"
157 " EVQ grant %x -> %p\n",
158 hw_info->txdmaq_gnt, tx_dma_kva,
159 hw_info->rxdmaq_gnt, rx_dma_kva,
160 evq_gnts[0], evq_base
163 memset(vi_data, 0, sizeof(vi_data));
165 /* TODO BUG11305: convert efhw_arch to ef_vi_arch
167 * arch = ef_vi_arch_from_efhw_arch(hw_info->nic_arch);
169 * nic_type.arch = arch;
171 nic_type.arch = (unsigned char)hw_info->nic_arch;
172 nic_type.variant = (char)hw_info->nic_variant;
173 nic_type.revision = (unsigned char)hw_info->nic_revision;
175 ef_vi_init_mapping_evq(vi_data, nic_type, hw_info->instance,
176 1 << (evq_order + PAGE_SHIFT), evq_base,
179 ef_vi_init_mapping_vi(vi_data, nic_type, hw_info->rx_capacity,
180 hw_info->tx_capacity, hw_info->instance,
181 doorbell_kva, rx_dma_kva, tx_dma_kva, 0);
183 vi_state_size = ef_vi_calc_state_bytes(hw_info->rx_capacity,
184 hw_info->tx_capacity);
185 vnic->vi_state = (ef_vi_state *)kmalloc(vi_state_size, GFP_KERNEL);
186 if (vnic->vi_state == NULL) {
187 EPRINTK("%s: kmalloc for VI state failed\n", __FUNCTION__);
190 ef_vi_init(&vnic->vi, vi_data, vnic->vi_state, &vnic->evq_state, 0);
192 ef_eventq_state_init(&vnic->vi);
194 ef_vi_state_init(&vnic->vi);
199 kfree(vnic->vi_state);
200 vnic->vi_state = NULL;
202 net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.rxdmaq_mapping);
204 net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.txdmaq_mapping);
206 net_accel_unmap_iomem_page(vnic->dev, vnic->hw.falcon.doorbell_mapping);
207 vnic->hw.falcon.doorbell = NULL;
209 net_accel_unmap_grants_contig(vnic->dev, vnic->evq_mapping);
211 if (vnic->hw.falcon.type == NET_ACCEL_MSG_HWTYPE_FALCON_A)
212 net_accel_unmap_iomem_page(vnic->dev,
213 vnic->hw.falcon.evq_rptr_mapping);
214 vnic->hw.falcon.evq_rptr = NULL;
217 cuckoo_hash_destroy(&vnic->fastpath_table);
223 void netfront_accel_vi_ctor(netfront_accel_vnic *vnic)
225 /* Just mark the VI as uninitialised. */
226 vnic->vi_state = NULL;
230 int netfront_accel_vi_init(netfront_accel_vnic *vnic, struct net_accel_msg_hw *hw_msg)
232 BUG_ON(hw_msg == NULL);
233 return netfront_accel_vi_init_fini(vnic, hw_msg);
237 void netfront_accel_vi_dtor(netfront_accel_vnic *vnic)
239 if (vnic->vi_state != NULL)
240 netfront_accel_vi_init_fini(vnic, NULL);
245 void netfront_accel_vi_post_rx(netfront_accel_vnic *vnic, u16 id,
246 netfront_accel_pkt_desc *buf)
249 int idx = vnic->rx_dma_batched;
252 VPRINTK("Posting buffer %d (0x%08x) for rx at index %d, space is %d\n",
253 id, buf->pkt_buff_addr, idx, ef_vi_receive_space(&vnic->vi));
255 /* Set up a virtual buffer descriptor */
256 ef_vi_receive_init(&vnic->vi, buf->pkt_buff_addr, id,
261 vnic->rx_dma_level++;
264 * Only push the descriptor to the card if we've reached the
265 * batch size. Otherwise, the descriptors can sit around for
266 * a while. There will be plenty available.
268 if (idx >= NETFRONT_ACCEL_RX_DESC_BATCH ||
269 vnic->rx_dma_level < NETFRONT_ACCEL_RX_DESC_BATCH) {
271 VPRINTK("Flushing %d rx descriptors.\n", idx);
274 /* Push buffer to hardware */
275 ef_vi_receive_push(&vnic->vi);
280 vnic->rx_dma_batched = idx;
285 void netfront_accel_vi_post_rx_or_free(netfront_accel_vnic *vnic, u16 id,
286 netfront_accel_pkt_desc *buf)
289 VPRINTK("%s: %d\n", __FUNCTION__, id);
291 if (ef_vi_receive_space(&vnic->vi) <= vnic->rx_dma_batched) {
292 VPRINTK("RX space is full\n");
293 netfront_accel_buf_put(vnic->rx_bufs, id);
297 VPRINTK("Completed buffer %d is reposted\n", id);
298 netfront_accel_vi_post_rx(vnic, id, buf);
301 * Let's see if there's any more to be pushed out to the NIC
304 while (ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
305 /* Try to allocate a buffer. */
306 buf = netfront_accel_buf_get(vnic->rx_bufs);
310 /* Add it to the rx dma queue. */
311 netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);
316 void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx)
320 ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
321 netfront_accel_pkt_desc *buf;
323 VPRINTK("%s: %d\n", __FUNCTION__, vnic->rx_dma_level);
325 /* Try to allocate a buffer. */
326 buf = netfront_accel_buf_get(vnic->rx_bufs);
331 /* Add it to the rx dma queue. */
332 netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);
335 VPRINTK("%s: done\n", __FUNCTION__);
339 struct netfront_accel_multi_state {
340 unsigned remaining_len;
344 struct netfront_accel_tso_buffer *output_buffers;
346 /* Where we are in the current fragment of the SKB. */
348 /* address of current position */
350 /* remaining length */
352 } ifc; /* == Input Fragment Cursor */
356 static inline void multi_post_start(struct netfront_accel_multi_state *st,
359 st->remaining_len = skb->len;
360 st->output_buffers = NULL;
362 st->ifc.len = skb_headlen(skb);
363 st->ifc.addr = skb->data;
366 static int multi_post_start_new_buffer(netfront_accel_vnic *vnic,
367 struct netfront_accel_multi_state *st)
369 struct netfront_accel_tso_buffer *tso_buf;
370 struct netfront_accel_pkt_desc *buf;
372 /* Get a mapped packet buffer */
373 buf = netfront_accel_buf_get(vnic->tx_bufs);
375 DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
379 /* Store a bit of meta-data at the end */
380 tso_buf = (struct netfront_accel_tso_buffer *)
381 (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
387 tso_buf->next = st->output_buffers;
388 st->output_buffers = tso_buf;
391 BUG_ON(st->buffers >= ACCEL_TX_MAX_BUFFERS);
394 * Store the context, set to NULL, last packet buffer will get
397 tso_buf->buf->skb = NULL;
404 multi_post_fill_buffer_with_fragment(netfront_accel_vnic *vnic,
405 struct netfront_accel_multi_state *st)
407 struct netfront_accel_tso_buffer *tso_buf;
410 BUG_ON(st->output_buffers == NULL);
411 tso_buf = st->output_buffers;
413 if (st->ifc.len == 0) return;
414 if (tso_buf->length == NETFRONT_ACCEL_TX_BUF_LENGTH) return;
416 BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
418 space = NETFRONT_ACCEL_TX_BUF_LENGTH - tso_buf->length;
419 n = min(st->ifc.len, space);
421 memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n);
423 st->remaining_len -= n;
425 tso_buf->length += n;
428 BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
434 static inline void multi_post_unwind(netfront_accel_vnic *vnic,
435 struct netfront_accel_multi_state *st)
437 struct netfront_accel_tso_buffer *tso_buf;
439 DPRINTK("%s\n", __FUNCTION__);
441 while (st->output_buffers != NULL) {
442 tso_buf = st->output_buffers;
443 st->output_buffers = tso_buf->next;
445 netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
447 BUG_ON(st->buffers != 0);
451 static enum netfront_accel_post_status
452 netfront_accel_enqueue_skb_multi(netfront_accel_vnic *vnic, struct sk_buff *skb)
454 struct netfront_accel_tso_buffer *tso_buf;
455 struct netfront_accel_multi_state state;
456 ef_iovec iovecs[ACCEL_TX_MAX_BUFFERS];
458 int frag_i, rc, dma_id;
460 multi_post_start(&state, skb);
464 if (skb->ip_summed == CHECKSUM_PARTIAL) {
465 /* Set to zero to encourage falcon to work it out for us */
466 *(u16*)(skb_transport_header(skb) + skb->csum) = 0;
469 if (multi_post_start_new_buffer(vnic, &state)) {
470 DPRINTK("%s: out of buffers\n", __FUNCTION__);
475 multi_post_fill_buffer_with_fragment(vnic, &state);
477 /* Move onto the next fragment? */
478 if (state.ifc.len == 0) {
479 if (++frag_i >= skb_shinfo(skb)->nr_frags)
480 /* End of payload reached. */
482 f = &skb_shinfo(skb)->frags[frag_i];
483 state.ifc.len = f->size;
484 state.ifc.addr = page_address(f->page) + f->page_offset;
487 /* Start a new buffer? */
488 if ((state.output_buffers->length ==
489 NETFRONT_ACCEL_TX_BUF_LENGTH) &&
490 multi_post_start_new_buffer(vnic, &state)) {
491 DPRINTK("%s: out of buffers\n", __FUNCTION__);
496 /* Check for space */
497 if (ef_vi_transmit_space(&vnic->vi) < state.buffers) {
498 DPRINTK("%s: Not enough TX space (%d)\n", __FUNCTION__, state.buffers);
502 /* Store the skb in what will be the last buffer's context */
503 state.output_buffers->buf->skb = skb;
504 /* Remember dma_id of what will be the last buffer */
505 dma_id = state.output_buffers->buf->buf_id;
508 * Make an iovec of the buffers in the list, reversing the
509 * buffers as we go as they are constructed on a stack
511 tso_buf = state.output_buffers;
512 for (frag_i = state.buffers-1; frag_i >= 0; frag_i--) {
513 iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr;
514 iovecs[frag_i].iov_len = tso_buf->length;
515 tso_buf = tso_buf->next;
518 rc = ef_vi_transmitv(&vnic->vi, iovecs, state.buffers, dma_id);
520 /* Track number of tx fastpath stats */
521 vnic->netdev_stats.fastpath_tx_bytes += skb->len;
522 vnic->netdev_stats.fastpath_tx_pkts ++;
523 #if NETFRONT_ACCEL_STATS
526 n = vnic->netdev_stats.fastpath_tx_pkts -
527 (u32)vnic->stats.fastpath_tx_completions;
528 if (n > vnic->stats.fastpath_tx_pending_max)
529 vnic->stats.fastpath_tx_pending_max = n;
532 return NETFRONT_ACCEL_STATUS_GOOD;
535 multi_post_unwind(vnic, &state);
537 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
539 return NETFRONT_ACCEL_STATUS_BUSY;
543 static enum netfront_accel_post_status
544 netfront_accel_enqueue_skb_single(netfront_accel_vnic *vnic, struct sk_buff *skb)
546 struct netfront_accel_tso_buffer *tso_buf;
547 struct netfront_accel_pkt_desc *buf;
551 if (ef_vi_transmit_space(&vnic->vi) < 1) {
552 DPRINTK("%s: No TX space\n", __FUNCTION__);
553 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
554 return NETFRONT_ACCEL_STATUS_BUSY;
557 buf = netfront_accel_buf_get(vnic->tx_bufs);
559 DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
560 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
561 return NETFRONT_ACCEL_STATUS_BUSY;
564 /* Track number of tx fastpath stats */
565 vnic->netdev_stats.fastpath_tx_pkts++;
566 vnic->netdev_stats.fastpath_tx_bytes += skb->len;
568 #if NETFRONT_ACCEL_STATS
571 n = vnic->netdev_stats.fastpath_tx_pkts -
572 (u32)vnic->stats.fastpath_tx_completions;
573 if (n > vnic->stats.fastpath_tx_pending_max)
574 vnic->stats.fastpath_tx_pending_max = n;
578 /* Store the context */
583 if (skb->ip_summed == CHECKSUM_PARTIAL) {
584 /* Set to zero to encourage falcon to work it out for us */
585 *(u16*)(skb_transport_header(skb) + skb->csum) = 0;
587 NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
588 (skb, idx, frag_data, frag_len, {
589 /* Copy in payload */
590 VPRINTK("*** Copying %d bytes to %p\n", frag_len, kva);
591 memcpy(kva, frag_data, frag_len);
595 VPRINTK("%s: id %d pkt %p kva %p buff_addr 0x%08x\n", __FUNCTION__,
596 buf->buf_id, buf, buf->pkt_kva, buf->pkt_buff_addr);
599 /* Set up the TSO meta-data for a single buffer/packet */
600 tso_buf = (struct netfront_accel_tso_buffer *)
601 (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
602 tso_buf->next = NULL;
604 tso_buf->length = skb->len;
606 rc = ef_vi_transmit(&vnic->vi, buf->pkt_buff_addr, skb->len,
608 /* We checked for space already, so it really should succeed */
611 return NETFRONT_ACCEL_STATUS_GOOD;
615 enum netfront_accel_post_status
616 netfront_accel_vi_tx_post(netfront_accel_vnic *vnic, struct sk_buff *skb)
618 struct ethhdr *pkt_eth_hdr;
619 struct iphdr *pkt_ipv4_hdr;
620 int value, try_fastpath;
623 * This assumes that the data field points to the dest mac
626 cuckoo_hash_mac_key key = cuckoo_mac_to_key(skb->data);
629 * NB very important that all things that could return "CANT"
630 * are tested before things that return "BUSY" as if it it
631 * returns "BUSY" it is assumed that it won't return "CANT"
632 * next time it is tried
636 * Do a fastpath send if fast path table lookup returns true.
637 * We do this without the table lock and so may get the wrong
638 * answer, but current opinion is that's not a big problem
640 try_fastpath = cuckoo_hash_lookup(&vnic->fastpath_table,
641 (cuckoo_hash_key *)(&key), &value);
644 DECLARE_MAC_BUF(buf);
646 VPRINTK("try fast path false for mac: " %s "\n",
647 print_mac(buf, skb->data));
649 return NETFRONT_ACCEL_STATUS_CANT;
652 /* Check to see if the packet can be sent. */
653 if (skb_headlen(skb) < sizeof(*pkt_eth_hdr) + sizeof(*pkt_ipv4_hdr)) {
654 EPRINTK("%s: Packet header is too small\n", __FUNCTION__);
655 return NETFRONT_ACCEL_STATUS_CANT;
658 pkt_eth_hdr = (void*)skb->data;
659 pkt_ipv4_hdr = (void*)(pkt_eth_hdr+1);
661 if (be16_to_cpu(pkt_eth_hdr->h_proto) != ETH_P_IP) {
662 DPRINTK("%s: Packet is not IPV4 (ether_type=0x%04x)\n", __FUNCTION__,
663 be16_to_cpu(pkt_eth_hdr->h_proto));
664 return NETFRONT_ACCEL_STATUS_CANT;
667 if (pkt_ipv4_hdr->protocol != IPPROTO_TCP &&
668 pkt_ipv4_hdr->protocol != IPPROTO_UDP) {
669 DPRINTK("%s: Packet is not TCP/UDP (ip_protocol=0x%02x)\n",
670 __FUNCTION__, pkt_ipv4_hdr->protocol);
671 return NETFRONT_ACCEL_STATUS_CANT;
674 VPRINTK("%s: %d bytes, gso %d\n", __FUNCTION__, skb->len,
675 skb_shinfo(skb)->gso_size);
677 if (skb_shinfo(skb)->gso_size) {
678 return netfront_accel_enqueue_skb_tso(vnic, skb);
681 if (skb->len <= NETFRONT_ACCEL_TX_BUF_LENGTH) {
682 return netfront_accel_enqueue_skb_single(vnic, skb);
685 return netfront_accel_enqueue_skb_multi(vnic, skb);
690 * Copy the data to required end destination. NB. len is the total new
691 * length of the socket buffer, not the amount of data to copy
694 int ef_vnic_copy_to_skb(netfront_accel_vnic *vnic, struct sk_buff *skb,
695 struct netfront_accel_pkt_desc *buf, int len)
697 int i, extra = len - skb->len;
699 int pkt_stride = vnic->rx_pkt_stride;
700 int skb_stride = vnic->rx_skb_stride;
704 * This pulls stuff into the cache - have seen performance
705 * benefit in this, but disabled by default
707 skb_start = skb->data;
709 for (i = 0; i < len; i += pkt_stride) {
710 c += ((volatile char*)(buf->pkt_kva))[i];
714 for (i = skb->len; i < len ; i += skb_stride) {
715 c += ((volatile char*)(skb_start))[i];
719 if (skb_tailroom(skb) >= extra) {
720 memcpy(skb_put(skb, extra), buf->pkt_kva, extra);
728 static void discard_jumbo_state(netfront_accel_vnic *vnic)
731 if (vnic->jumbo_state.skb != NULL) {
732 dev_kfree_skb_any(vnic->jumbo_state.skb);
734 vnic->jumbo_state.skb = NULL;
736 vnic->jumbo_state.in_progress = 0;
740 static void netfront_accel_vi_rx_complete(netfront_accel_vnic *vnic,
743 cuckoo_hash_mac_key key;
746 struct net_device *net_dev;
749 key = cuckoo_mac_to_key(skb->data + ETH_ALEN);
752 * If this is a MAC address that we want to do fast path TX
753 * to, and we don't already, add it to the fastpath table.
754 * The initial lookup is done without the table lock and so
755 * may get the wrong answer, but current opinion is that's not
758 if (is_valid_ether_addr(skb->data + ETH_ALEN) &&
759 !cuckoo_hash_lookup(&vnic->fastpath_table, (cuckoo_hash_key *)&key,
761 spin_lock_irqsave(&vnic->table_lock, flags);
763 cuckoo_hash_add_check(&vnic->fastpath_table,
764 (cuckoo_hash_key *)&key,
767 spin_unlock_irqrestore(&vnic->table_lock, flags);
770 if (compare_ether_addr(skb->data, vnic->mac)) {
771 struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN);
773 DECLARE_MAC_BUF(buf);
775 DPRINTK("%s: saw wrong MAC address " %s "\n",
776 __FUNCTION__, print_mac(buf, skb->data));
778 if (ip->protocol == IPPROTO_TCP) {
779 struct tcphdr *tcp = (struct tcphdr *)
780 ((char *)ip + 4 * ip->ihl);
783 struct udphdr *udp = (struct udphdr *)
784 ((char *)ip + 4 * ip->ihl);
785 EPRINTK_ON(ip->protocol != IPPROTO_UDP);
789 netfront_accel_msg_tx_fastpath(vnic, skb->data,
794 net_dev = vnic->net_dev;
796 skb->protocol = eth_type_trans(skb, net_dev);
797 /* CHECKSUM_UNNECESSARY as hardware has done it already */
798 skb->ip_summed = CHECKSUM_UNNECESSARY;
800 if (!netfront_accel_ssr_skb(vnic, &vnic->ssr_state, skb))
801 netif_receive_skb(skb);
805 static int netfront_accel_vi_poll_process_rx(netfront_accel_vnic *vnic,
808 struct netfront_accel_bufinfo *bufinfo = vnic->rx_bufs;
809 struct netfront_accel_pkt_desc *buf = NULL;
811 int id, len, sop = 0, cont = 0;
813 VPRINTK("Rx event.\n");
815 * Complete the receive operation, and get the request id of
818 id = ef_vi_receive_done(&vnic->vi, ev);
820 if (id < 0 || id >= bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE) {
821 EPRINTK("Rx packet %d is invalid\n", id);
822 /* Carry on round the loop if more events */
825 /* Get our buffer descriptor */
826 buf = netfront_accel_buf_find(bufinfo, id);
828 len = EF_EVENT_RX_BYTES(*ev);
830 /* An RX buffer has been removed from the DMA ring. */
831 vnic->rx_dma_level--;
833 if (EF_EVENT_TYPE(*ev) == EF_EVENT_TYPE_RX) {
834 sop = EF_EVENT_RX_SOP(*ev);
835 cont = EF_EVENT_RX_CONT(*ev);
837 skb = vnic->jumbo_state.skb;
839 VPRINTK("Rx packet %d: %d bytes so far; sop %d; cont %d\n",
843 if (!vnic->jumbo_state.in_progress) {
844 vnic->jumbo_state.in_progress = 1;
845 BUG_ON(vnic->jumbo_state.skb != NULL);
848 * This fragment shows a missing tail in
849 * previous one, but is itself possibly OK
851 DPRINTK("sop and in_progress => no tail\n");
853 /* Release the socket buffer we already had */
854 discard_jumbo_state(vnic);
856 /* Now start processing this fragment */
857 vnic->jumbo_state.in_progress = 1;
860 } else if (!vnic->jumbo_state.in_progress) {
861 DPRINTK("!sop and !in_progress => missing head\n");
866 /* Update state for next time */
867 vnic->jumbo_state.in_progress = 0;
868 vnic->jumbo_state.skb = NULL;
869 } else if (!vnic->jumbo_state.in_progress) {
870 DPRINTK("cont and !in_progress => missing head\n");
878 skb = alloc_skb(len+NET_IP_ALIGN, GFP_ATOMIC);
880 skb = alloc_skb(vnic->net_dev->mtu+NET_IP_ALIGN,
884 DPRINTK("%s: Couldn't get an rx skb.\n",
886 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
888 * Dropping this fragment means we
889 * should discard the rest too
891 discard_jumbo_state(vnic);
893 /* Carry on round the loop if more events */
899 /* Copy the data to required end destination */
900 if (ef_vnic_copy_to_skb(vnic, skb, buf, len) != 0) {
902 * No space in the skb - suggests > MTU packet
905 EPRINTK("%s: Rx packet too large (%d)\n",
907 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
908 discard_jumbo_state(vnic);
912 /* Put the buffer back in the DMA queue. */
913 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
916 vnic->jumbo_state.skb = skb;
920 /* Track number of rx fastpath packets */
921 vnic->netdev_stats.fastpath_rx_pkts++;
922 vnic->netdev_stats.fastpath_rx_bytes += len;
924 netfront_accel_vi_rx_complete(vnic, skb);
929 BUG_ON(EF_EVENT_TYPE(*ev) != EF_EVENT_TYPE_RX_DISCARD);
931 if (EF_EVENT_RX_DISCARD_TYPE(*ev)
932 == EF_EVENT_RX_DISCARD_TRUNC) {
933 DPRINTK("%s: " EF_EVENT_FMT
934 " buffer %d FRM_TRUNC q_id %d\n",
935 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
936 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
937 NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_frm_trunc);
938 } else if (EF_EVENT_RX_DISCARD_TYPE(*ev)
939 == EF_EVENT_RX_DISCARD_OTHER) {
940 DPRINTK("%s: " EF_EVENT_FMT
941 " buffer %d RX_DISCARD_OTHER q_id %d\n",
942 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
943 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
945 * Probably tail of packet for which error has
946 * already been logged, so don't count in
950 EPRINTK("%s: " EF_EVENT_FMT
951 " buffer %d rx discard type %d q_id %d\n",
952 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
953 EF_EVENT_RX_DISCARD_TYPE(*ev),
954 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
955 NETFRONT_ACCEL_STATS_OP(++vnic->stats.bad_event_count);
959 /* discard type drops through here */
962 /* Release the socket buffer we already had */
963 discard_jumbo_state(vnic);
966 BUG_ON(vnic->jumbo_state.in_progress != 0);
967 BUG_ON(vnic->jumbo_state.skb != NULL);
969 if (id >= 0 && id < bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE)
970 /* Put the buffer back in the DMA queue. */
971 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
973 vnic->netdev_stats.fastpath_rx_errors++;
975 DPRINTK("%s experienced bad packet/missing fragment error: %d \n",
976 __FUNCTION__, ev->rx.flags);
982 static void netfront_accel_vi_not_busy(netfront_accel_vnic *vnic)
984 struct netfront_info *np = ((struct netfront_info *)
985 netdev_priv(vnic->net_dev));
991 * TODO if we could safely check tx_skb == NULL and return
992 * early without taking the lock, that would obviously help
996 /* Take the netfront lock which protects tx_skb. */
997 spin_lock_irqsave(&np->tx_lock, flags);
998 if (vnic->tx_skb != NULL) {
999 DPRINTK("%s trying to send spare buffer\n", __FUNCTION__);
1002 vnic->tx_skb = NULL;
1004 spin_unlock_irqrestore(&np->tx_lock, flags);
1006 handled = netfront_accel_vi_tx_post(vnic, skb);
1008 spin_lock_irqsave(&np->tx_lock, flags);
1010 if (handled != NETFRONT_ACCEL_STATUS_BUSY) {
1011 DPRINTK("%s restarting tx\n", __FUNCTION__);
1012 if (netfront_check_queue_ready(vnic->net_dev)) {
1013 netif_wake_queue(vnic->net_dev);
1014 NETFRONT_ACCEL_STATS_OP
1015 (vnic->stats.queue_wakes++);
1022 * Should never get a CANT, as it checks that before
1023 * deciding it was BUSY first time round
1025 BUG_ON(handled == NETFRONT_ACCEL_STATUS_CANT);
1027 spin_unlock_irqrestore(&np->tx_lock, flags);
1031 static void netfront_accel_vi_tx_complete(netfront_accel_vnic *vnic,
1032 struct netfront_accel_tso_buffer *tso_buf,
1035 struct netfront_accel_tso_buffer *next;
1038 * We get a single completion for every call to
1039 * ef_vi_transmitv so handle any other buffers which are part
1040 * of the same packet
1042 while (tso_buf != NULL) {
1043 if (tso_buf->buf->skb != NULL) {
1044 dev_kfree_skb_any(tso_buf->buf->skb);
1045 tso_buf->buf->skb = NULL;
1048 next = tso_buf->next;
1050 netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
1056 * If this was the last one in the batch, we try and send any
1057 * pending tx_skb. There should now be buffers and
1061 netfront_accel_vi_not_busy(vnic);
1065 static void netfront_accel_vi_poll_process_tx(netfront_accel_vnic *vnic,
1068 struct netfront_accel_pkt_desc *buf;
1069 struct netfront_accel_tso_buffer *tso_buf;
1070 ef_request_id ids[EF_VI_TRANSMIT_BATCH];
1072 unsigned long flags;
1074 /* Get the request ids for this tx completion event. */
1075 n_ids = ef_vi_transmit_unbundle(&vnic->vi, ev, ids);
1077 /* Take the tx buffer spin lock and hold for the duration */
1078 spin_lock_irqsave(&vnic->tx_lock, flags);
1080 for (i = 0; i < n_ids; ++i) {
1081 VPRINTK("Tx packet %d complete\n", ids[i]);
1082 buf = netfront_accel_buf_find(vnic->tx_bufs, ids[i]);
1083 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_completions++);
1085 tso_buf = (struct netfront_accel_tso_buffer *)
1086 (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
1087 BUG_ON(tso_buf->buf != buf);
1089 netfront_accel_vi_tx_complete(vnic, tso_buf, i == (n_ids-1));
1092 spin_unlock_irqrestore(&vnic->tx_lock, flags);
1096 int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets)
1098 ef_event ev[ACCEL_VI_POLL_EVENTS];
1099 int rx_remain = rx_packets, rc, events, i;
1100 #if NETFRONT_ACCEL_STATS
1101 int n_evs_polled = 0, rx_evs_polled = 0, tx_evs_polled = 0;
1103 BUG_ON(rx_packets <= 0);
1105 events = ef_eventq_poll(&vnic->vi, ev,
1106 min(rx_remain, ACCEL_VI_POLL_EVENTS));
1108 NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
1110 VPRINTK("%s: %d events\n", __FUNCTION__, events);
1112 /* Loop over each event */
1114 VPRINTK("%s: Event "EF_EVENT_FMT", index %lu\n", __FUNCTION__,
1115 EF_EVENT_PRI_ARG(ev[i]),
1116 (unsigned long)(vnic->vi.evq_state->evq_ptr));
1118 if ((EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX) ||
1119 (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX_DISCARD)) {
1120 rc = netfront_accel_vi_poll_process_rx(vnic, &ev[i]);
1122 BUG_ON(rx_remain < 0);
1123 NETFRONT_ACCEL_STATS_OP(rx_evs_polled++);
1124 } else if (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_TX) {
1125 netfront_accel_vi_poll_process_tx(vnic, &ev[i]);
1126 NETFRONT_ACCEL_STATS_OP(tx_evs_polled++);
1127 } else if (EF_EVENT_TYPE(ev[i]) ==
1128 EF_EVENT_TYPE_RX_NO_DESC_TRUNC) {
1129 DPRINTK("%s: RX_NO_DESC_TRUNC " EF_EVENT_FMT "\n",
1130 __FUNCTION__, EF_EVENT_PRI_ARG(ev[i]));
1131 discard_jumbo_state(vnic);
1132 NETFRONT_ACCEL_STATS_OP(vnic->stats.rx_no_desc_trunc++);
1134 EPRINTK("Unexpected event " EF_EVENT_FMT "\n",
1135 EF_EVENT_PRI_ARG(ev[i]));
1136 NETFRONT_ACCEL_STATS_OP(vnic->stats.bad_event_count++);
1141 /* Carry on round the loop if more events and more space */
1146 events = ef_eventq_poll(&vnic->vi, ev,
1148 ACCEL_VI_POLL_EVENTS));
1150 NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
1154 #if NETFRONT_ACCEL_STATS
1155 vnic->stats.event_count += n_evs_polled;
1156 vnic->stats.event_count_since_irq += n_evs_polled;
1157 if (n_evs_polled > vnic->stats.events_per_poll_max)
1158 vnic->stats.events_per_poll_max = n_evs_polled;
1159 if (rx_evs_polled > vnic->stats.events_per_poll_rx_max)
1160 vnic->stats.events_per_poll_rx_max = rx_evs_polled;
1161 if (tx_evs_polled > vnic->stats.events_per_poll_tx_max)
1162 vnic->stats.events_per_poll_tx_max = tx_evs_polled;
1165 return rx_packets - rx_remain;
1169 int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic)
1173 VPRINTK("%s: checking for event on %p\n", __FUNCTION__, &vnic->vi.evq_state);
1175 BUG_ON(vnic == NULL);
1176 BUG_ON(vnic->vi.evq_state == NULL);
1178 /* Do a quick check for an event. */
1179 if (ef_eventq_has_event(&vnic->vi)) {
1180 VPRINTK("%s: found event\n", __FUNCTION__);
1184 VPRINTK("evq_ptr=0x%08x evq_mask=0x%08x\n",
1185 vnic->evq_state.evq_ptr, vnic->vi.evq_mask);
1187 /* Request a wakeup from the hardware. */
1188 sw_evq_ptr = vnic->evq_state.evq_ptr & vnic->vi.evq_mask;
1190 BUG_ON(vnic->hw.falcon.evq_rptr == NULL);
1192 VPRINTK("Requesting wakeup at 0x%08x, rptr %p\n", sw_evq_ptr,
1193 vnic->hw.falcon.evq_rptr);
1194 *(volatile u32 *)(vnic->hw.falcon.evq_rptr) = (sw_evq_ptr >> 3);