- patches.rt/0001-sched-count-of-queued-RT-tasks.patch: Delete.
[linux-flexiantxendom0-3.2.10.git] / drivers / xen / sfc_netfront / accel_vi.c
1 /****************************************************************************
2  * Solarflare driver for Xen network acceleration
3  *
4  * Copyright 2006-2008: Solarflare Communications Inc,
5  *                      9501 Jeronimo Road, Suite 250,
6  *                      Irvine, CA 92618, USA
7  *
8  * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
9  *
10  * This program is free software; you can redistribute it and/or modify it
11  * under the terms of the GNU General Public License version 2 as published
12  * by the Free Software Foundation, incorporated herein by reference.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
22  ****************************************************************************
23  */
24
25 #include <linux/if_ether.h>
26 #include <linux/ip.h>
27 #include <net/checksum.h>
28 #include <asm/io.h>
29
30 #include "accel.h"
31 #include "accel_util.h"
32 #include "accel_bufs.h"
33 #include "accel_tso.h"
34 #include "accel_ssr.h"
35 #include "netfront.h"
36
37 #include "etherfabric/ef_vi.h"
38
39 /*
40  * Max available space in a buffer for data once meta-data has taken
41  * its place
42  */
43 #define NETFRONT_ACCEL_TX_BUF_LENGTH                                    \
44         ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE)                     \
45          - sizeof(struct netfront_accel_tso_buffer))
46
47 #define ACCEL_TX_MAX_BUFFERS (6)
48 #define ACCEL_VI_POLL_EVENTS (8)
49
50 static
51 int netfront_accel_vi_init_fini(netfront_accel_vnic *vnic, 
52                                 struct net_accel_msg_hw *hw_msg)
53 {
54         struct ef_vi_nic_type nic_type;
55         struct net_accel_hw_falcon_b *hw_info;
56         void *io_kva, *evq_base, *rx_dma_kva, *tx_dma_kva, *doorbell_kva;
57         u32 *evq_gnts;
58         u32 evq_order;
59         int vi_state_size;
60         u8 vi_data[VI_MAPPINGS_SIZE];
61
62         if (hw_msg == NULL)
63                 goto fini;
64
65         /* And create the local macs table lock */
66         spin_lock_init(&vnic->table_lock);
67         
68         /* Create fastpath table, initial size 8, key length 8 */
69         if (cuckoo_hash_init(&vnic->fastpath_table, 3, 8)) {
70                 EPRINTK("failed to allocate fastpath table\n");
71                 goto fail_cuckoo;
72         }
73
74         vnic->hw.falcon.type = hw_msg->type;
75
76         switch (hw_msg->type) {
77         case NET_ACCEL_MSG_HWTYPE_FALCON_A:
78                 hw_info = &hw_msg->resources.falcon_a.common;
79                 /* Need the extra rptr register page on A1 */
80                 io_kva = net_accel_map_iomem_page
81                         (vnic->dev, hw_msg->resources.falcon_a.evq_rptr_gnt,
82                          &vnic->hw.falcon.evq_rptr_mapping);
83                 if (io_kva == NULL) {
84                         EPRINTK("%s: evq_rptr permission failed\n", __FUNCTION__);
85                         goto evq_rptr_fail;
86                 }
87
88                 vnic->hw.falcon.evq_rptr = io_kva + 
89                         (hw_info->evq_rptr & (PAGE_SIZE - 1));
90                 break;
91         case NET_ACCEL_MSG_HWTYPE_FALCON_B:
92                 hw_info = &hw_msg->resources.falcon_b;
93                 break;
94         default:
95                 goto bad_type;
96         }
97
98         /**** Event Queue ****/
99
100         /* Map the event queue pages */
101         evq_gnts = hw_info->evq_mem_gnts;
102         evq_order = hw_info->evq_order;
103
104         EPRINTK_ON(hw_info->evq_offs != 0);
105
106         DPRINTK("Will map evq %d pages\n", 1 << evq_order);
107
108         evq_base =
109                 net_accel_map_grants_contig(vnic->dev, evq_gnts, 1 << evq_order,
110                                             &vnic->evq_mapping);
111         if (evq_base == NULL) {
112                 EPRINTK("%s: evq_base failed\n", __FUNCTION__);
113                 goto evq_fail;
114         }
115
116         /**** Doorbells ****/
117         /* Set up the doorbell mappings. */
118         doorbell_kva = 
119                 net_accel_map_iomem_page(vnic->dev, hw_info->doorbell_gnt,
120                                          &vnic->hw.falcon.doorbell_mapping);
121         if (doorbell_kva == NULL) {
122                 EPRINTK("%s: doorbell permission failed\n", __FUNCTION__);
123                 goto doorbell_fail;
124         }
125         vnic->hw.falcon.doorbell = doorbell_kva;
126
127         /* On Falcon_B we get the rptr from the doorbell page */
128         if (hw_msg->type == NET_ACCEL_MSG_HWTYPE_FALCON_B) {
129                 vnic->hw.falcon.evq_rptr = 
130                         (u32 *)((char *)vnic->hw.falcon.doorbell 
131                                 + hw_info->evq_rptr);
132         }
133
134         /**** DMA Queue ****/
135
136         /* Set up the DMA Queues from the message. */
137         tx_dma_kva = net_accel_map_grants_contig
138                 (vnic->dev, &(hw_info->txdmaq_gnt), 1, 
139                  &vnic->hw.falcon.txdmaq_mapping);
140         if (tx_dma_kva == NULL) {
141                 EPRINTK("%s: TX dma failed\n", __FUNCTION__);
142                 goto tx_dma_fail;
143         }
144
145         rx_dma_kva = net_accel_map_grants_contig
146                 (vnic->dev, &(hw_info->rxdmaq_gnt), 1, 
147                  &vnic->hw.falcon.rxdmaq_mapping);
148         if (rx_dma_kva == NULL) {
149                 EPRINTK("%s: RX dma failed\n", __FUNCTION__);
150                 goto rx_dma_fail;
151         }
152
153         /* Full confession */
154         DPRINTK("Mapped H/W"
155                 "  Tx DMAQ grant %x -> %p\n"
156                 "  Rx DMAQ grant %x -> %p\n"
157                 "  EVQ grant %x -> %p\n",
158                 hw_info->txdmaq_gnt, tx_dma_kva,
159                 hw_info->rxdmaq_gnt, rx_dma_kva,
160                 evq_gnts[0], evq_base
161                 );
162
163         memset(vi_data, 0, sizeof(vi_data));
164         
165         /* TODO BUG11305: convert efhw_arch to ef_vi_arch
166          * e.g.
167          * arch = ef_vi_arch_from_efhw_arch(hw_info->nic_arch);
168          * assert(arch >= 0);
169          * nic_type.arch = arch;
170          */
171         nic_type.arch = (unsigned char)hw_info->nic_arch;
172         nic_type.variant = (char)hw_info->nic_variant;
173         nic_type.revision = (unsigned char)hw_info->nic_revision;
174         
175         ef_vi_init_mapping_evq(vi_data, nic_type, hw_info->instance, 
176                                1 << (evq_order + PAGE_SHIFT), evq_base, 
177                                (void *)0xdeadbeef);
178
179         ef_vi_init_mapping_vi(vi_data, nic_type, hw_info->rx_capacity, 
180                               hw_info->tx_capacity, hw_info->instance, 
181                               doorbell_kva, rx_dma_kva, tx_dma_kva, 0);
182
183         vi_state_size = ef_vi_calc_state_bytes(hw_info->rx_capacity,
184                                                hw_info->tx_capacity);
185         vnic->vi_state = (ef_vi_state *)kmalloc(vi_state_size, GFP_KERNEL);
186         if (vnic->vi_state == NULL) {
187                 EPRINTK("%s: kmalloc for VI state failed\n", __FUNCTION__);
188                 goto vi_state_fail;
189         }
190         ef_vi_init(&vnic->vi, vi_data, vnic->vi_state, &vnic->evq_state, 0);
191
192         ef_eventq_state_init(&vnic->vi);
193
194         ef_vi_state_init(&vnic->vi);
195
196         return 0;
197
198 fini:
199         kfree(vnic->vi_state);
200         vnic->vi_state = NULL;
201 vi_state_fail:
202         net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.rxdmaq_mapping);
203 rx_dma_fail:
204         net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.txdmaq_mapping);
205 tx_dma_fail:
206         net_accel_unmap_iomem_page(vnic->dev, vnic->hw.falcon.doorbell_mapping);
207         vnic->hw.falcon.doorbell = NULL;
208 doorbell_fail:
209         net_accel_unmap_grants_contig(vnic->dev, vnic->evq_mapping);
210 evq_fail:
211         if (vnic->hw.falcon.type == NET_ACCEL_MSG_HWTYPE_FALCON_A)
212                 net_accel_unmap_iomem_page(vnic->dev, 
213                                            vnic->hw.falcon.evq_rptr_mapping);
214         vnic->hw.falcon.evq_rptr = NULL;
215 evq_rptr_fail:
216 bad_type:
217         cuckoo_hash_destroy(&vnic->fastpath_table);
218 fail_cuckoo:
219         return -EIO;
220 }
221
222
223 void netfront_accel_vi_ctor(netfront_accel_vnic *vnic)
224 {
225         /* Just mark the VI as uninitialised. */
226         vnic->vi_state = NULL;
227 }
228
229
230 int netfront_accel_vi_init(netfront_accel_vnic *vnic, struct net_accel_msg_hw *hw_msg)
231 {
232         BUG_ON(hw_msg == NULL);
233         return netfront_accel_vi_init_fini(vnic, hw_msg);
234 }
235
236
237 void netfront_accel_vi_dtor(netfront_accel_vnic *vnic)
238 {
239         if (vnic->vi_state != NULL)
240                 netfront_accel_vi_init_fini(vnic, NULL);
241 }
242
243
244 static
245 void netfront_accel_vi_post_rx(netfront_accel_vnic *vnic, u16 id,
246                                netfront_accel_pkt_desc *buf)
247 {
248
249         int idx = vnic->rx_dma_batched;
250
251 #if 0
252         VPRINTK("Posting buffer %d (0x%08x) for rx at index %d, space is %d\n",
253                 id, buf->pkt_buff_addr, idx, ef_vi_receive_space(&vnic->vi));
254 #endif
255         /* Set up a virtual buffer descriptor */
256         ef_vi_receive_init(&vnic->vi, buf->pkt_buff_addr, id,
257                            /*rx_bytes=max*/0);
258
259         idx++;
260
261         vnic->rx_dma_level++;
262         
263         /* 
264          * Only push the descriptor to the card if we've reached the
265          * batch size.  Otherwise, the descriptors can sit around for
266          * a while.  There will be plenty available.
267          */
268         if (idx >= NETFRONT_ACCEL_RX_DESC_BATCH ||
269             vnic->rx_dma_level < NETFRONT_ACCEL_RX_DESC_BATCH) {
270 #if 0
271                 VPRINTK("Flushing %d rx descriptors.\n", idx);
272 #endif
273
274                 /* Push buffer to hardware */
275                 ef_vi_receive_push(&vnic->vi);
276                 
277                 idx = 0;
278         }
279         
280         vnic->rx_dma_batched = idx;
281 }
282
283
284 inline
285 void netfront_accel_vi_post_rx_or_free(netfront_accel_vnic *vnic, u16 id,
286                                        netfront_accel_pkt_desc *buf)
287 {
288
289         VPRINTK("%s: %d\n", __FUNCTION__, id);
290
291         if (ef_vi_receive_space(&vnic->vi) <= vnic->rx_dma_batched) {
292                 VPRINTK("RX space is full\n");
293                 netfront_accel_buf_put(vnic->rx_bufs, id);
294                 return;
295         }
296
297         VPRINTK("Completed buffer %d is reposted\n", id);
298         netfront_accel_vi_post_rx(vnic, id, buf);
299         
300         /*
301          * Let's see if there's any more to be pushed out to the NIC
302          * while we're here
303          */
304         while (ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
305                 /* Try to allocate a buffer. */
306                 buf = netfront_accel_buf_get(vnic->rx_bufs);
307                 if (buf == NULL)
308                         break;
309                 
310                 /* Add it to the rx dma queue. */
311                 netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);      
312         }
313 }
314
315
316 void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx)
317 {
318
319         while (is_rx && 
320                ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
321                 netfront_accel_pkt_desc *buf;
322                 
323                 VPRINTK("%s: %d\n", __FUNCTION__, vnic->rx_dma_level);
324                 
325                 /* Try to allocate a buffer. */
326                 buf = netfront_accel_buf_get(vnic->rx_bufs);
327
328                 if (buf == NULL)
329                         break;
330                 
331                 /* Add it to the rx dma queue. */
332                 netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);
333         }
334
335         VPRINTK("%s: done\n", __FUNCTION__);
336 }
337
338
339 struct netfront_accel_multi_state {
340         unsigned remaining_len;
341
342         unsigned buffers;
343
344         struct netfront_accel_tso_buffer *output_buffers;
345
346         /* Where we are in the current fragment of the SKB. */
347         struct {
348                 /* address of current position */
349                 void *addr;
350                 /* remaining length */    
351                 unsigned int len;
352         } ifc; /*  == Input Fragment Cursor */
353 };
354
355
356 static inline void multi_post_start(struct netfront_accel_multi_state *st, 
357                                     struct sk_buff *skb)
358 {
359         st->remaining_len = skb->len;
360         st->output_buffers = NULL;
361         st->buffers = 0;
362         st->ifc.len = skb_headlen(skb);
363         st->ifc.addr = skb->data;
364 }
365
366 static int multi_post_start_new_buffer(netfront_accel_vnic *vnic, 
367                                        struct netfront_accel_multi_state *st)
368 {
369         struct netfront_accel_tso_buffer *tso_buf;
370         struct netfront_accel_pkt_desc *buf;
371
372         /* Get a mapped packet buffer */
373         buf = netfront_accel_buf_get(vnic->tx_bufs);
374         if (buf == NULL) {
375                 DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
376                 return -1;
377         }
378
379         /* Store a bit of meta-data at the end */
380         tso_buf = (struct netfront_accel_tso_buffer *)
381                 (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
382
383         tso_buf->buf = buf;
384
385         tso_buf->length = 0;
386         
387         tso_buf->next = st->output_buffers;
388         st->output_buffers = tso_buf;
389         st->buffers++;
390
391         BUG_ON(st->buffers >= ACCEL_TX_MAX_BUFFERS);
392
393         /*
394          * Store the context, set to NULL, last packet buffer will get
395          * non-NULL later
396          */
397         tso_buf->buf->skb = NULL;
398         
399         return 0;
400 }
401
402
403 static void
404 multi_post_fill_buffer_with_fragment(netfront_accel_vnic *vnic,
405                                      struct netfront_accel_multi_state *st)
406 {
407         struct netfront_accel_tso_buffer *tso_buf;
408         unsigned n, space;
409
410         BUG_ON(st->output_buffers == NULL);
411         tso_buf = st->output_buffers;
412
413         if (st->ifc.len == 0) return;
414         if (tso_buf->length == NETFRONT_ACCEL_TX_BUF_LENGTH) return;
415
416         BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
417
418         space = NETFRONT_ACCEL_TX_BUF_LENGTH - tso_buf->length;
419         n = min(st->ifc.len, space);
420
421         memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n);
422
423         st->remaining_len -= n;
424         st->ifc.len -= n;
425         tso_buf->length += n;
426         st->ifc.addr += n;
427
428         BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
429
430         return;
431 }
432
433
434 static inline void multi_post_unwind(netfront_accel_vnic *vnic,
435                                      struct netfront_accel_multi_state *st)
436 {
437         struct netfront_accel_tso_buffer *tso_buf;
438
439         DPRINTK("%s\n", __FUNCTION__);
440
441         while (st->output_buffers != NULL) {
442                 tso_buf = st->output_buffers;
443                 st->output_buffers = tso_buf->next;
444                 st->buffers--;
445                 netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
446         }
447         BUG_ON(st->buffers != 0);
448 }
449
450
451 static enum netfront_accel_post_status
452 netfront_accel_enqueue_skb_multi(netfront_accel_vnic *vnic, struct sk_buff *skb)
453 {
454         struct netfront_accel_tso_buffer *tso_buf;
455         struct netfront_accel_multi_state state;
456         ef_iovec iovecs[ACCEL_TX_MAX_BUFFERS];
457         skb_frag_t *f;
458         int frag_i, rc, dma_id;
459
460         multi_post_start(&state, skb);
461
462         frag_i = -1;
463
464         if (skb->ip_summed == CHECKSUM_PARTIAL) {
465                 /* Set to zero to encourage falcon to work it out for us */
466                 *(u16*)(skb_transport_header(skb) + skb->csum) = 0;
467         }
468
469         if (multi_post_start_new_buffer(vnic, &state)) {
470                 DPRINTK("%s: out of buffers\n", __FUNCTION__);
471                 goto unwind;
472         }
473
474         while (1) {
475                 multi_post_fill_buffer_with_fragment(vnic, &state);
476
477                 /* Move onto the next fragment? */
478                 if (state.ifc.len == 0) {
479                         if (++frag_i >= skb_shinfo(skb)->nr_frags)
480                                 /* End of payload reached. */
481                                 break;
482                         f = &skb_shinfo(skb)->frags[frag_i];
483                         state.ifc.len = f->size;
484                         state.ifc.addr = page_address(f->page) + f->page_offset;
485                 }
486
487                 /* Start a new buffer? */
488                 if ((state.output_buffers->length == 
489                      NETFRONT_ACCEL_TX_BUF_LENGTH) &&
490                     multi_post_start_new_buffer(vnic, &state)) {
491                         DPRINTK("%s: out of buffers\n", __FUNCTION__);
492                         goto unwind;
493                 }
494         }
495
496         /* Check for space */
497         if (ef_vi_transmit_space(&vnic->vi) < state.buffers) {
498                 DPRINTK("%s: Not enough TX space (%d)\n", __FUNCTION__, state.buffers);
499                 goto unwind;
500         }
501
502         /* Store the skb in what will be the last buffer's context */
503         state.output_buffers->buf->skb = skb;
504         /* Remember dma_id of what will be the last buffer */ 
505         dma_id = state.output_buffers->buf->buf_id;
506
507         /*
508          * Make an iovec of the buffers in the list, reversing the
509          * buffers as we go as they are constructed on a stack
510          */
511         tso_buf = state.output_buffers;
512         for (frag_i = state.buffers-1; frag_i >= 0; frag_i--) {
513                 iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr;
514                 iovecs[frag_i].iov_len = tso_buf->length;
515                 tso_buf = tso_buf->next;
516         }
517         
518         rc = ef_vi_transmitv(&vnic->vi, iovecs, state.buffers, dma_id);
519
520         /* Track number of tx fastpath stats */
521         vnic->netdev_stats.fastpath_tx_bytes += skb->len;
522         vnic->netdev_stats.fastpath_tx_pkts ++;
523 #if NETFRONT_ACCEL_STATS
524         {
525                 u32 n;
526                 n = vnic->netdev_stats.fastpath_tx_pkts -
527                         (u32)vnic->stats.fastpath_tx_completions;
528                 if (n > vnic->stats.fastpath_tx_pending_max)
529                         vnic->stats.fastpath_tx_pending_max = n;
530         }
531 #endif
532         return NETFRONT_ACCEL_STATUS_GOOD;
533
534 unwind:
535         multi_post_unwind(vnic, &state);
536
537         NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
538
539         return NETFRONT_ACCEL_STATUS_BUSY;
540 }
541
542
543 static enum netfront_accel_post_status 
544 netfront_accel_enqueue_skb_single(netfront_accel_vnic *vnic, struct sk_buff *skb)
545 {
546         struct netfront_accel_tso_buffer *tso_buf;
547         struct netfront_accel_pkt_desc *buf;
548         u8 *kva;
549         int rc;
550
551         if (ef_vi_transmit_space(&vnic->vi) < 1) {
552                 DPRINTK("%s: No TX space\n", __FUNCTION__);
553                 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
554                 return NETFRONT_ACCEL_STATUS_BUSY;
555         }
556
557         buf = netfront_accel_buf_get(vnic->tx_bufs);
558         if (buf == NULL) {
559                 DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
560                 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
561                 return NETFRONT_ACCEL_STATUS_BUSY;
562         }
563
564         /* Track number of tx fastpath stats */
565         vnic->netdev_stats.fastpath_tx_pkts++;
566         vnic->netdev_stats.fastpath_tx_bytes += skb->len;
567
568 #if NETFRONT_ACCEL_STATS
569         {
570                 u32 n;
571                 n = vnic->netdev_stats.fastpath_tx_pkts - 
572                         (u32)vnic->stats.fastpath_tx_completions;
573                 if (n > vnic->stats.fastpath_tx_pending_max)
574                         vnic->stats.fastpath_tx_pending_max = n;
575         }
576 #endif
577         
578         /* Store the context */
579         buf->skb = skb;
580         
581         kva = buf->pkt_kva;
582
583         if (skb->ip_summed == CHECKSUM_PARTIAL) {
584                 /* Set to zero to encourage falcon to work it out for us */
585                 *(u16*)(skb_transport_header(skb) + skb->csum) = 0;
586         }
587         NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
588                 (skb, idx, frag_data, frag_len, {
589                         /* Copy in payload */
590                         VPRINTK("*** Copying %d bytes to %p\n", frag_len, kva);
591                         memcpy(kva, frag_data, frag_len);
592                         kva += frag_len;
593                 });
594
595         VPRINTK("%s: id %d pkt %p kva %p buff_addr 0x%08x\n", __FUNCTION__,
596                 buf->buf_id, buf, buf->pkt_kva, buf->pkt_buff_addr);
597
598
599         /* Set up the TSO meta-data for a single buffer/packet */
600         tso_buf = (struct netfront_accel_tso_buffer *)
601                 (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
602         tso_buf->next = NULL;
603         tso_buf->buf = buf;
604         tso_buf->length = skb->len;
605
606         rc = ef_vi_transmit(&vnic->vi, buf->pkt_buff_addr, skb->len,
607                             buf->buf_id);
608         /* We checked for space already, so it really should succeed */
609         BUG_ON(rc != 0);
610
611         return NETFRONT_ACCEL_STATUS_GOOD;
612 }
613
614
615 enum netfront_accel_post_status 
616 netfront_accel_vi_tx_post(netfront_accel_vnic *vnic, struct sk_buff *skb)
617 {
618         struct ethhdr *pkt_eth_hdr;
619         struct iphdr *pkt_ipv4_hdr;
620         int value, try_fastpath;
621
622         /*
623          * This assumes that the data field points to the dest mac
624          * address.
625          */
626         cuckoo_hash_mac_key key = cuckoo_mac_to_key(skb->data);
627
628         /*
629          * NB very important that all things that could return "CANT"
630          * are tested before things that return "BUSY" as if it it
631          * returns "BUSY" it is assumed that it won't return "CANT"
632          * next time it is tried
633          */
634
635         /*
636          * Do a fastpath send if fast path table lookup returns true.
637          * We do this without the table lock and so may get the wrong
638          * answer, but current opinion is that's not a big problem 
639          */
640         try_fastpath = cuckoo_hash_lookup(&vnic->fastpath_table, 
641                                           (cuckoo_hash_key *)(&key), &value);
642
643         if (!try_fastpath) {
644                 DECLARE_MAC_BUF(buf);
645
646                 VPRINTK("try fast path false for mac: " %s "\n",
647                         print_mac(buf, skb->data));
648                 
649                 return NETFRONT_ACCEL_STATUS_CANT;
650         }
651
652         /* Check to see if the packet can be sent. */
653         if (skb_headlen(skb) < sizeof(*pkt_eth_hdr) + sizeof(*pkt_ipv4_hdr)) {
654                 EPRINTK("%s: Packet header is too small\n", __FUNCTION__);
655                 return NETFRONT_ACCEL_STATUS_CANT;
656         }
657
658         pkt_eth_hdr  = (void*)skb->data;
659         pkt_ipv4_hdr = (void*)(pkt_eth_hdr+1);
660
661         if (be16_to_cpu(pkt_eth_hdr->h_proto) != ETH_P_IP) {
662                 DPRINTK("%s: Packet is not IPV4 (ether_type=0x%04x)\n", __FUNCTION__,
663                         be16_to_cpu(pkt_eth_hdr->h_proto));
664                 return NETFRONT_ACCEL_STATUS_CANT;
665         }
666         
667         if (pkt_ipv4_hdr->protocol != IPPROTO_TCP &&
668             pkt_ipv4_hdr->protocol != IPPROTO_UDP) {
669                 DPRINTK("%s: Packet is not TCP/UDP (ip_protocol=0x%02x)\n",
670                         __FUNCTION__, pkt_ipv4_hdr->protocol);
671                 return NETFRONT_ACCEL_STATUS_CANT;
672         }
673         
674         VPRINTK("%s: %d bytes, gso %d\n", __FUNCTION__, skb->len, 
675                 skb_shinfo(skb)->gso_size);
676         
677         if (skb_shinfo(skb)->gso_size) {
678                 return netfront_accel_enqueue_skb_tso(vnic, skb);
679         }
680
681         if (skb->len <= NETFRONT_ACCEL_TX_BUF_LENGTH) {
682                 return netfront_accel_enqueue_skb_single(vnic, skb);
683         }
684
685         return netfront_accel_enqueue_skb_multi(vnic, skb);
686 }
687
688
689 /*
690  * Copy the data to required end destination. NB. len is the total new
691  * length of the socket buffer, not the amount of data to copy
692  */
693 inline
694 int ef_vnic_copy_to_skb(netfront_accel_vnic *vnic, struct sk_buff *skb, 
695                         struct netfront_accel_pkt_desc *buf, int len)
696 {
697         int i, extra = len - skb->len;
698         char c;
699         int pkt_stride = vnic->rx_pkt_stride;
700         int skb_stride = vnic->rx_skb_stride;
701         char *skb_start;
702         
703         /*
704          * This pulls stuff into the cache - have seen performance
705          * benefit in this, but disabled by default
706          */
707         skb_start = skb->data;
708         if (pkt_stride) {
709                 for (i = 0; i < len; i += pkt_stride) {
710                         c += ((volatile char*)(buf->pkt_kva))[i];
711                 }
712         }
713         if (skb_stride) {
714                 for (i = skb->len; i < len ; i += skb_stride) {
715                         c += ((volatile char*)(skb_start))[i];
716                 }
717         }
718
719         if (skb_tailroom(skb) >= extra) {
720                 memcpy(skb_put(skb, extra), buf->pkt_kva, extra);
721                 return 0;
722         }
723
724         return -ENOSPC;
725 }
726
727
728 static void discard_jumbo_state(netfront_accel_vnic *vnic) 
729 {
730
731         if (vnic->jumbo_state.skb != NULL) {
732                 dev_kfree_skb_any(vnic->jumbo_state.skb);
733
734                 vnic->jumbo_state.skb = NULL;
735         }
736         vnic->jumbo_state.in_progress = 0;
737 }
738
739
740 static void  netfront_accel_vi_rx_complete(netfront_accel_vnic *vnic,
741                                            struct sk_buff *skb)
742 {
743         cuckoo_hash_mac_key key;
744         unsigned long flags;
745         int value;
746         struct net_device *net_dev;
747
748
749         key = cuckoo_mac_to_key(skb->data + ETH_ALEN);
750
751         /*
752          * If this is a MAC address that we want to do fast path TX
753          * to, and we don't already, add it to the fastpath table.
754          * The initial lookup is done without the table lock and so
755          * may get the wrong answer, but current opinion is that's not
756          * a big problem
757          */
758         if (is_valid_ether_addr(skb->data + ETH_ALEN) &&
759             !cuckoo_hash_lookup(&vnic->fastpath_table, (cuckoo_hash_key *)&key,
760                                 &value)) {
761                 spin_lock_irqsave(&vnic->table_lock, flags);
762                    
763                 cuckoo_hash_add_check(&vnic->fastpath_table,
764                                       (cuckoo_hash_key *)&key,
765                                       1, 1);
766                 
767                 spin_unlock_irqrestore(&vnic->table_lock, flags);
768         }
769
770         if (compare_ether_addr(skb->data, vnic->mac)) {
771                 struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN);
772                 u16 port;
773                 DECLARE_MAC_BUF(buf);
774
775                 DPRINTK("%s: saw wrong MAC address " %s "\n",
776                         __FUNCTION__, print_mac(buf, skb->data));
777
778                 if (ip->protocol == IPPROTO_TCP) {
779                         struct tcphdr *tcp = (struct tcphdr *)
780                                 ((char *)ip + 4 * ip->ihl);
781                         port = tcp->dest;
782                 } else {
783                         struct udphdr *udp = (struct udphdr *)
784                                 ((char *)ip + 4 * ip->ihl);
785                         EPRINTK_ON(ip->protocol != IPPROTO_UDP);
786                         port = udp->dest;
787                 }
788
789                 netfront_accel_msg_tx_fastpath(vnic, skb->data,
790                                                ip->daddr, port,
791                                                ip->protocol);
792         }
793
794         net_dev = vnic->net_dev;
795         skb->dev = net_dev;
796         skb->protocol = eth_type_trans(skb, net_dev);
797         /* CHECKSUM_UNNECESSARY as hardware has done it already */
798         skb->ip_summed = CHECKSUM_UNNECESSARY;
799
800         if (!netfront_accel_ssr_skb(vnic, &vnic->ssr_state, skb))
801                 netif_receive_skb(skb);
802 }
803
804
805 static int netfront_accel_vi_poll_process_rx(netfront_accel_vnic *vnic, 
806                                              ef_event *ev)
807 {
808         struct netfront_accel_bufinfo *bufinfo = vnic->rx_bufs;
809         struct netfront_accel_pkt_desc *buf = NULL;
810         struct sk_buff *skb;
811         int id, len, sop = 0, cont = 0;
812
813         VPRINTK("Rx event.\n");
814         /*
815          * Complete the receive operation, and get the request id of
816          * the buffer
817          */
818         id = ef_vi_receive_done(&vnic->vi, ev);
819
820         if (id < 0 || id >= bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE) {
821                 EPRINTK("Rx packet %d is invalid\n", id);
822                 /* Carry on round the loop if more events */
823                 goto bad_packet;
824         }
825         /* Get our buffer descriptor */
826         buf = netfront_accel_buf_find(bufinfo, id);
827
828         len = EF_EVENT_RX_BYTES(*ev);
829
830         /* An RX buffer has been removed from the DMA ring. */
831         vnic->rx_dma_level--;
832
833         if (EF_EVENT_TYPE(*ev) == EF_EVENT_TYPE_RX) {
834                 sop = EF_EVENT_RX_SOP(*ev);
835                 cont = EF_EVENT_RX_CONT(*ev);
836
837                 skb = vnic->jumbo_state.skb;
838
839                 VPRINTK("Rx packet %d: %d bytes so far; sop %d; cont %d\n", 
840                         id, len, sop, cont);
841
842                 if (sop) {
843                         if (!vnic->jumbo_state.in_progress) {
844                                 vnic->jumbo_state.in_progress = 1;
845                                 BUG_ON(vnic->jumbo_state.skb != NULL);
846                         } else {
847                                 /*
848                                  * This fragment shows a missing tail in 
849                                  * previous one, but is itself possibly OK
850                                  */
851                                 DPRINTK("sop and in_progress => no tail\n");
852
853                                 /* Release the socket buffer we already had */
854                                 discard_jumbo_state(vnic);
855
856                                 /* Now start processing this fragment */
857                                 vnic->jumbo_state.in_progress = 1;
858                                 skb = NULL;
859                         }
860                 } else if (!vnic->jumbo_state.in_progress) {
861                         DPRINTK("!sop and !in_progress => missing head\n");
862                         goto missing_head;
863                 }
864
865                 if (!cont) {
866                         /* Update state for next time */
867                         vnic->jumbo_state.in_progress = 0;
868                         vnic->jumbo_state.skb = NULL;
869                 } else if (!vnic->jumbo_state.in_progress) {
870                         DPRINTK("cont and !in_progress => missing head\n");
871                         goto missing_head;
872                 }
873
874                 if (skb == NULL) {
875                         BUG_ON(!sop);
876
877                         if (!cont)
878                                 skb = alloc_skb(len+NET_IP_ALIGN, GFP_ATOMIC);
879                         else
880                                 skb = alloc_skb(vnic->net_dev->mtu+NET_IP_ALIGN, 
881                                                 GFP_ATOMIC);
882
883                         if (skb == NULL) {
884                                 DPRINTK("%s: Couldn't get an rx skb.\n",
885                                         __FUNCTION__);
886                                 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
887                                 /*
888                                  * Dropping this fragment means we
889                                  * should discard the rest too
890                                  */
891                                 discard_jumbo_state(vnic);
892
893                                 /* Carry on round the loop if more events */
894                                 return 0;
895                         }
896
897                 }
898                 
899                 /* Copy the data to required end destination */
900                 if (ef_vnic_copy_to_skb(vnic, skb, buf, len) != 0) {
901                         /*
902                          * No space in the skb - suggests > MTU packet
903                          * received
904                          */
905                         EPRINTK("%s: Rx packet too large (%d)\n",
906                                 __FUNCTION__, len);
907                         netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
908                         discard_jumbo_state(vnic);
909                         return 0;
910                 }
911                 
912                 /* Put the buffer back in the DMA queue. */
913                 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
914
915                 if (cont) {
916                         vnic->jumbo_state.skb = skb;
917
918                         return 0;
919                 } else {
920                         /* Track number of rx fastpath packets */
921                         vnic->netdev_stats.fastpath_rx_pkts++;
922                         vnic->netdev_stats.fastpath_rx_bytes += len;
923
924                         netfront_accel_vi_rx_complete(vnic, skb);
925
926                         return 1;
927                 }
928         } else {
929                 BUG_ON(EF_EVENT_TYPE(*ev) != EF_EVENT_TYPE_RX_DISCARD);
930
931                 if (EF_EVENT_RX_DISCARD_TYPE(*ev) 
932                     == EF_EVENT_RX_DISCARD_TRUNC) {
933                         DPRINTK("%s: " EF_EVENT_FMT 
934                                 " buffer %d FRM_TRUNC q_id %d\n",
935                                 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
936                                 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
937                         NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_frm_trunc);
938                 } else if (EF_EVENT_RX_DISCARD_TYPE(*ev) 
939                           == EF_EVENT_RX_DISCARD_OTHER) {
940                         DPRINTK("%s: " EF_EVENT_FMT 
941                                 " buffer %d RX_DISCARD_OTHER q_id %d\n",
942                                 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
943                                 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
944                         /*
945                          * Probably tail of packet for which error has
946                          * already been logged, so don't count in
947                          * stats
948                          */
949                 } else {
950                         EPRINTK("%s: " EF_EVENT_FMT 
951                                 " buffer %d rx discard type %d q_id %d\n",
952                                 __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
953                                 EF_EVENT_RX_DISCARD_TYPE(*ev), 
954                                 EF_EVENT_RX_DISCARD_Q_ID(*ev) );
955                         NETFRONT_ACCEL_STATS_OP(++vnic->stats.bad_event_count);
956                 }
957         }
958
959         /* discard type drops through here */
960
961 bad_packet:
962         /* Release the socket buffer we already had */
963         discard_jumbo_state(vnic);
964
965 missing_head:
966         BUG_ON(vnic->jumbo_state.in_progress != 0);
967         BUG_ON(vnic->jumbo_state.skb != NULL);
968
969         if (id >= 0 && id < bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE)
970                 /* Put the buffer back in the DMA queue. */
971                 netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
972
973         vnic->netdev_stats.fastpath_rx_errors++;
974
975         DPRINTK("%s experienced bad packet/missing fragment error: %d \n",
976                 __FUNCTION__, ev->rx.flags);
977
978         return 0;
979 }
980
981
982 static void netfront_accel_vi_not_busy(netfront_accel_vnic *vnic)
983 {
984         struct netfront_info *np = ((struct netfront_info *)
985                                     netdev_priv(vnic->net_dev));
986         struct sk_buff *skb;
987         int handled;
988         unsigned long flags;
989         
990         /*
991          * TODO if we could safely check tx_skb == NULL and return
992          * early without taking the lock, that would obviously help
993          * performance
994          */
995
996         /* Take the netfront lock which protects tx_skb. */
997         spin_lock_irqsave(&np->tx_lock, flags);
998         if (vnic->tx_skb != NULL) {
999                 DPRINTK("%s trying to send spare buffer\n", __FUNCTION__);
1000                 
1001                 skb = vnic->tx_skb;
1002                 vnic->tx_skb = NULL;
1003
1004                 spin_unlock_irqrestore(&np->tx_lock, flags);
1005
1006                 handled = netfront_accel_vi_tx_post(vnic, skb);
1007                 
1008                 spin_lock_irqsave(&np->tx_lock, flags);
1009
1010                 if (handled != NETFRONT_ACCEL_STATUS_BUSY) {
1011                         DPRINTK("%s restarting tx\n", __FUNCTION__);
1012                         if (netfront_check_queue_ready(vnic->net_dev)) {
1013                                 netif_wake_queue(vnic->net_dev);
1014                                 NETFRONT_ACCEL_STATS_OP
1015                                         (vnic->stats.queue_wakes++);
1016                         }
1017                 } else {
1018                         vnic->tx_skb = skb;
1019                 }
1020                 
1021                 /*
1022                  * Should never get a CANT, as it checks that before
1023                  * deciding it was BUSY first time round 
1024                  */
1025                 BUG_ON(handled == NETFRONT_ACCEL_STATUS_CANT);
1026         }
1027         spin_unlock_irqrestore(&np->tx_lock, flags);
1028 }
1029
1030
1031 static void netfront_accel_vi_tx_complete(netfront_accel_vnic *vnic, 
1032                                           struct netfront_accel_tso_buffer *tso_buf,
1033                                           int is_last)
1034 {
1035         struct netfront_accel_tso_buffer *next;
1036
1037         /* 
1038          * We get a single completion for every call to
1039          * ef_vi_transmitv so handle any other buffers which are part
1040          * of the same packet 
1041          */
1042         while (tso_buf != NULL) {
1043                 if (tso_buf->buf->skb != NULL) {
1044                         dev_kfree_skb_any(tso_buf->buf->skb);
1045                         tso_buf->buf->skb = NULL;
1046                 }
1047
1048                 next = tso_buf->next;
1049
1050                 netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
1051
1052                 tso_buf = next;
1053         }
1054
1055         /*
1056          * If this was the last one in the batch, we try and send any
1057          * pending tx_skb. There should now be buffers and
1058          * descriptors
1059          */
1060         if (is_last)
1061                 netfront_accel_vi_not_busy(vnic);
1062 }
1063
1064
1065 static void netfront_accel_vi_poll_process_tx(netfront_accel_vnic *vnic,
1066                                               ef_event *ev)
1067 {
1068         struct netfront_accel_pkt_desc *buf;
1069         struct netfront_accel_tso_buffer *tso_buf;
1070         ef_request_id ids[EF_VI_TRANSMIT_BATCH];
1071         int i, n_ids;
1072         unsigned long flags;
1073
1074         /* Get the request ids for this tx completion event. */
1075         n_ids = ef_vi_transmit_unbundle(&vnic->vi, ev, ids);
1076
1077         /* Take the tx buffer spin lock and hold for the duration */
1078         spin_lock_irqsave(&vnic->tx_lock, flags);
1079
1080         for (i = 0; i < n_ids; ++i) {
1081                 VPRINTK("Tx packet %d complete\n", ids[i]);
1082                 buf = netfront_accel_buf_find(vnic->tx_bufs, ids[i]);
1083                 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_completions++);
1084
1085                 tso_buf = (struct netfront_accel_tso_buffer *)
1086                         (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
1087                 BUG_ON(tso_buf->buf != buf);
1088
1089                 netfront_accel_vi_tx_complete(vnic, tso_buf, i == (n_ids-1));
1090         }
1091
1092         spin_unlock_irqrestore(&vnic->tx_lock, flags);
1093 }
1094
1095
1096 int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets)
1097 {
1098         ef_event ev[ACCEL_VI_POLL_EVENTS];
1099         int rx_remain = rx_packets, rc, events, i;
1100 #if NETFRONT_ACCEL_STATS
1101         int n_evs_polled = 0, rx_evs_polled = 0, tx_evs_polled = 0;
1102 #endif
1103         BUG_ON(rx_packets <= 0);
1104
1105         events = ef_eventq_poll(&vnic->vi, ev, 
1106                                 min(rx_remain, ACCEL_VI_POLL_EVENTS));
1107         i = 0;
1108         NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
1109
1110         VPRINTK("%s: %d events\n", __FUNCTION__, events);
1111
1112         /* Loop over each event */
1113         while (events) {
1114                 VPRINTK("%s: Event "EF_EVENT_FMT", index %lu\n", __FUNCTION__, 
1115                         EF_EVENT_PRI_ARG(ev[i]),        
1116                         (unsigned long)(vnic->vi.evq_state->evq_ptr));
1117
1118                 if ((EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX) ||
1119                     (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX_DISCARD)) {
1120                         rc = netfront_accel_vi_poll_process_rx(vnic, &ev[i]);
1121                         rx_remain -= rc;
1122                         BUG_ON(rx_remain < 0);
1123                         NETFRONT_ACCEL_STATS_OP(rx_evs_polled++);
1124                 } else if (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_TX) {
1125                         netfront_accel_vi_poll_process_tx(vnic, &ev[i]);
1126                         NETFRONT_ACCEL_STATS_OP(tx_evs_polled++);
1127                 } else if (EF_EVENT_TYPE(ev[i]) == 
1128                            EF_EVENT_TYPE_RX_NO_DESC_TRUNC) {
1129                         DPRINTK("%s: RX_NO_DESC_TRUNC " EF_EVENT_FMT "\n",
1130                                 __FUNCTION__, EF_EVENT_PRI_ARG(ev[i]));
1131                         discard_jumbo_state(vnic);
1132                         NETFRONT_ACCEL_STATS_OP(vnic->stats.rx_no_desc_trunc++);
1133                 } else {
1134                         EPRINTK("Unexpected event " EF_EVENT_FMT "\n", 
1135                                 EF_EVENT_PRI_ARG(ev[i]));
1136                         NETFRONT_ACCEL_STATS_OP(vnic->stats.bad_event_count++);
1137                 }
1138
1139                 i++;
1140
1141                 /* Carry on round the loop if more events and more space */
1142                 if (i == events) {
1143                         if (rx_remain == 0)
1144                                 break;
1145
1146                         events = ef_eventq_poll(&vnic->vi, ev, 
1147                                                 min(rx_remain, 
1148                                                     ACCEL_VI_POLL_EVENTS));
1149                         i = 0;
1150                         NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
1151                 }
1152         }
1153         
1154 #if NETFRONT_ACCEL_STATS
1155         vnic->stats.event_count += n_evs_polled;
1156         vnic->stats.event_count_since_irq += n_evs_polled;
1157         if (n_evs_polled > vnic->stats.events_per_poll_max)
1158                 vnic->stats.events_per_poll_max = n_evs_polled;
1159         if (rx_evs_polled > vnic->stats.events_per_poll_rx_max)
1160                 vnic->stats.events_per_poll_rx_max = rx_evs_polled;
1161         if (tx_evs_polled > vnic->stats.events_per_poll_tx_max)
1162                 vnic->stats.events_per_poll_tx_max = tx_evs_polled;
1163 #endif
1164
1165         return rx_packets - rx_remain;
1166 }
1167
1168
1169 int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic)
1170 {
1171         u32 sw_evq_ptr;
1172
1173         VPRINTK("%s: checking for event on %p\n", __FUNCTION__, &vnic->vi.evq_state);
1174
1175         BUG_ON(vnic == NULL);
1176         BUG_ON(vnic->vi.evq_state == NULL);
1177
1178         /* Do a quick check for an event. */
1179         if (ef_eventq_has_event(&vnic->vi)) {
1180                 VPRINTK("%s: found event\n",  __FUNCTION__);
1181                 return 0;
1182         }
1183
1184         VPRINTK("evq_ptr=0x%08x  evq_mask=0x%08x\n",
1185                 vnic->evq_state.evq_ptr, vnic->vi.evq_mask);
1186   
1187         /* Request a wakeup from the hardware. */
1188         sw_evq_ptr = vnic->evq_state.evq_ptr & vnic->vi.evq_mask;
1189
1190         BUG_ON(vnic->hw.falcon.evq_rptr == NULL);
1191
1192         VPRINTK("Requesting wakeup at 0x%08x, rptr %p\n", sw_evq_ptr,
1193                 vnic->hw.falcon.evq_rptr);
1194         *(volatile u32 *)(vnic->hw.falcon.evq_rptr) = (sw_evq_ptr >> 3);
1195
1196         return 1;
1197 }