- Update Xen patches to 3.3-rc5 and c/s 1157.
[linux-flexiantxendom0-3.2.10.git] / drivers / xen / sfc_netfront / accel_tso.c
1 /****************************************************************************
2  * Solarflare driver for Xen network acceleration
3  *
4  * Copyright 2006-2008: Solarflare Communications Inc,
5  *                      9501 Jeronimo Road, Suite 250,
6  *                      Irvine, CA 92618, USA
7  *
8  * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
9  *
10  * This program is free software; you can redistribute it and/or modify it
11  * under the terms of the GNU General Public License version 2 as published
12  * by the Free Software Foundation, incorporated herein by reference.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
22  ****************************************************************************
23  */
24
25 #include <linux/pci.h>
26 #include <linux/tcp.h>
27 #include <linux/ip.h>
28 #include <linux/in.h>
29 #include <linux/if_ether.h>
30
31 #include "accel.h"
32 #include "accel_util.h"
33
34 #include "accel_tso.h"
35
36 #define ETH_HDR_LEN(skb)  skb_network_offset(skb)
37 #define SKB_TCP_OFF(skb)  skb_transport_offset(skb)
38 #define SKB_IP_OFF(skb)   skb_network_offset(skb)
39
40 /*
41  * Set a maximum number of buffers in each output packet to make life
42  * a little simpler - if this is reached it will just move on to
43  * another packet 
44  */
45 #define ACCEL_TSO_MAX_BUFFERS (6)
46
47 /** TSO State.
48  *
49  * The state used during segmentation.  It is put into this data structure
50  * just to make it easy to pass into inline functions.
51  */
52 struct netfront_accel_tso_state {
53         /** bytes of data we've yet to segment */
54         unsigned remaining_len;
55
56         /** current sequence number */
57         unsigned seqnum;
58
59         /** remaining space in current packet */
60         unsigned packet_space;
61
62         /** List of packets to be output, containing the buffers and
63          *  iovecs to describe each packet 
64          */
65         struct netfront_accel_tso_output_packet *output_packets;
66
67         /** Total number of buffers in output_packets */
68         unsigned buffers;
69
70         /** Total number of packets in output_packets */
71         unsigned packets;
72
73         /** Input Fragment Cursor.
74          *
75          * Where we are in the current fragment of the incoming SKB.  These
76          * values get updated in place when we split a fragment over
77          * multiple packets.
78          */
79         struct {
80                 /** address of current position */
81                 void *addr;
82                 /** remaining length */   
83                 unsigned int len;
84         } ifc; /*  == ifc Input Fragment Cursor */
85
86         /** Parameters.
87          *
88          * These values are set once at the start of the TSO send and do
89          * not get changed as the routine progresses.
90          */
91         struct {
92                 /* the number of bytes of header */
93                 unsigned int header_length;
94
95                 /* The number of bytes to put in each outgoing segment. */
96                 int full_packet_size;
97                 
98                 /* Current IP ID, host endian. */
99                 unsigned ip_id;
100
101                 /* Max size of each output packet payload */
102                 int gso_size;
103         } p;
104 };
105
106
107 /**
108  * Verify that our various assumptions about sk_buffs and the conditions
109  * under which TSO will be attempted hold true.
110  *
111  * @v skb              The sk_buff to check.
112  */
113 static inline void tso_check_safe(struct sk_buff *skb) {
114         EPRINTK_ON(skb->protocol != htons (ETH_P_IP));
115         EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP));
116         EPRINTK_ON(ip_hdr(skb)->protocol != IPPROTO_TCP);
117         EPRINTK_ON((SKB_TCP_OFF(skb) + tcp_hdrlen(skb)) > skb_headlen(skb));
118 }
119
120
121
122 /** Parse the SKB header and initialise state. */
123 static inline void tso_start(struct netfront_accel_tso_state *st, 
124                              struct sk_buff *skb) {
125
126         /*
127          * All ethernet/IP/TCP headers combined size is TCP header size
128          * plus offset of TCP header relative to start of packet.
129          */
130         st->p.header_length = tcp_hdrlen(skb) + SKB_TCP_OFF(skb);
131         st->p.full_packet_size = (st->p.header_length
132                                   + skb_shinfo(skb)->gso_size);
133         st->p.gso_size = skb_shinfo(skb)->gso_size;
134
135         st->p.ip_id = htons(ip_hdr(skb)->id);
136         st->seqnum = ntohl(tcp_hdr(skb)->seq);
137
138         EPRINTK_ON(tcp_hdr(skb)->urg);
139         EPRINTK_ON(tcp_hdr(skb)->syn);
140         EPRINTK_ON(tcp_hdr(skb)->rst);
141
142         st->remaining_len = skb->len - st->p.header_length;
143
144         st->output_packets = NULL;
145         st->buffers = 0;
146         st->packets = 0;
147
148         VPRINTK("Starting new TSO: hl %d ps %d gso %d seq %x len %d\n",
149                 st->p.header_length, st->p.full_packet_size, st->p.gso_size,
150                 st->seqnum, skb->len);
151 }
152
153 /**
154  * Add another NIC mapped buffer onto an output packet  
155  */ 
156 static inline int tso_start_new_buffer(netfront_accel_vnic *vnic,
157                                        struct netfront_accel_tso_state *st,
158                                        int first)
159 {
160         struct netfront_accel_tso_buffer *tso_buf;
161         struct netfront_accel_pkt_desc *buf;
162
163         /* Get a mapped packet buffer */
164         buf = netfront_accel_buf_get(vnic->tx_bufs);
165         if (buf == NULL) {
166                 DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
167                 return -1;
168         }
169
170         /* Store a bit of meta-data at the end */
171         tso_buf =(struct netfront_accel_tso_buffer *)
172                 (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH
173                  + sizeof(struct netfront_accel_tso_output_packet));
174
175         tso_buf->buf = buf;
176
177         tso_buf->length = 0;
178         
179         if (first) {
180                 struct netfront_accel_tso_output_packet *output_packet 
181                         = (struct netfront_accel_tso_output_packet *)
182                         (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH);
183                 output_packet->next = st->output_packets;
184                 st->output_packets = output_packet;
185                 tso_buf->next = NULL;
186                 st->output_packets->tso_bufs = tso_buf;
187                 st->output_packets->tso_bufs_len = 1;
188         } else {
189                 tso_buf->next = st->output_packets->tso_bufs;
190                 st->output_packets->tso_bufs = tso_buf;
191                 st->output_packets->tso_bufs_len ++;
192         }
193
194         BUG_ON(st->output_packets->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS);
195         
196         st->buffers ++;
197
198         /*
199          * Store the context, set to NULL, last packet buffer will get
200          * non-NULL later
201          */
202         tso_buf->buf->skb = NULL;
203
204         return 0;
205 }
206
207
208 /* Generate a new header, and prepare for the new packet.
209  *
210  * @v vnic            VNIC
211  * @v skb              Socket buffer
212  * @v st                TSO state
213  * @ret rc            0 on success, or -1 if failed to alloc header
214  */
215
216 static inline 
217 int tso_start_new_packet(netfront_accel_vnic *vnic,
218                          struct sk_buff *skb,
219                          struct netfront_accel_tso_state *st) 
220 {
221         struct netfront_accel_tso_buffer *tso_buf;
222         struct iphdr *tsoh_iph;
223         struct tcphdr *tsoh_th;
224         unsigned ip_length;
225
226         if (tso_start_new_buffer(vnic, st, 1) < 0) {
227                 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
228                 return -1;              
229         }
230
231         /* This has been set up by tso_start_new_buffer() */
232         tso_buf = st->output_packets->tso_bufs;
233
234         /* Copy in the header */
235         memcpy(tso_buf->buf->pkt_kva, skb->data, st->p.header_length);
236         tso_buf->length = st->p.header_length;
237
238         tsoh_th = (struct tcphdr*) 
239                 (tso_buf->buf->pkt_kva + SKB_TCP_OFF(skb));
240         tsoh_iph = (struct iphdr*) 
241                 (tso_buf->buf->pkt_kva + SKB_IP_OFF(skb));
242
243         /* Set to zero to encourage falcon to fill these in */
244         tsoh_th->check  = 0;
245         tsoh_iph->check = 0;
246
247         tsoh_th->seq = htonl(st->seqnum);
248         st->seqnum += st->p.gso_size;
249
250         if (st->remaining_len > st->p.gso_size) {
251                 /* This packet will not finish the TSO burst. */
252                 ip_length = st->p.full_packet_size - ETH_HDR_LEN(skb);
253                 tsoh_th->fin = 0;
254                 tsoh_th->psh = 0;
255         } else {
256                 /* This packet will be the last in the TSO burst. */
257                 ip_length = (st->p.header_length - ETH_HDR_LEN(skb)
258                              + st->remaining_len);
259                 tsoh_th->fin = tcp_hdr(skb)->fin;
260                 tsoh_th->psh = tcp_hdr(skb)->psh;
261         }
262
263         tsoh_iph->tot_len = htons(ip_length);
264
265         /* Linux leaves suitable gaps in the IP ID space for us to fill. */
266         tsoh_iph->id = st->p.ip_id++;
267         tsoh_iph->id = htons(tsoh_iph->id);
268
269         st->packet_space = st->p.gso_size; 
270
271         st->packets++;
272
273         return 0;
274 }
275
276
277
278 static inline void tso_get_fragment(struct netfront_accel_tso_state *st, 
279                                     int len, void *addr)
280 {
281         st->ifc.len = len;
282         st->ifc.addr = addr;
283         return;
284 }
285
286
287 static inline void tso_unwind(netfront_accel_vnic *vnic, 
288                               struct netfront_accel_tso_state *st)
289 {
290         struct netfront_accel_tso_buffer *tso_buf;
291         struct netfront_accel_tso_output_packet *output_packet;
292
293         DPRINTK("%s\n", __FUNCTION__);
294
295         while (st->output_packets != NULL) {
296                 output_packet = st->output_packets;
297                 st->output_packets = output_packet->next;
298                 while (output_packet->tso_bufs != NULL) {
299                         tso_buf = output_packet->tso_bufs;
300                         output_packet->tso_bufs = tso_buf->next;
301
302                         st->buffers --;
303                         output_packet->tso_bufs_len --;
304
305                         netfront_accel_buf_put(vnic->tx_bufs, 
306                                                tso_buf->buf->buf_id);
307                 }
308         }
309         BUG_ON(st->buffers != 0);
310 }
311
312
313
314 static inline
315 void tso_fill_packet_with_fragment(netfront_accel_vnic *vnic,
316                                    struct netfront_accel_tso_state *st) 
317 {
318         struct netfront_accel_tso_buffer *tso_buf;
319         int n, space;
320
321         BUG_ON(st->output_packets == NULL);
322         BUG_ON(st->output_packets->tso_bufs == NULL);
323
324         tso_buf = st->output_packets->tso_bufs;
325
326         if (st->ifc.len == 0)  return;
327         if (st->packet_space == 0)  return;
328         if (tso_buf->length == NETFRONT_ACCEL_TSO_BUF_LENGTH) return;
329
330         n = min(st->ifc.len, st->packet_space);
331
332         space = NETFRONT_ACCEL_TSO_BUF_LENGTH - tso_buf->length;
333         n = min(n, space);
334
335         st->packet_space -= n;
336         st->remaining_len -= n;
337         st->ifc.len -= n;
338
339         memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n);
340
341         tso_buf->length += n;
342
343         BUG_ON(tso_buf->length > NETFRONT_ACCEL_TSO_BUF_LENGTH);
344
345         st->ifc.addr += n;
346
347         return;
348 }
349
350
351 int netfront_accel_enqueue_skb_tso(netfront_accel_vnic *vnic,
352                                    struct sk_buff *skb)
353 {
354         struct netfront_accel_tso_state state;
355         struct netfront_accel_tso_buffer *tso_buf = NULL;
356         struct netfront_accel_tso_output_packet *reversed_list = NULL;
357         struct netfront_accel_tso_output_packet *tmp_pkt;
358         ef_iovec iovecs[ACCEL_TSO_MAX_BUFFERS];
359         int frag_i, rc, dma_id;
360         skb_frag_t *f;
361
362         tso_check_safe(skb);
363
364         if (skb->ip_summed != CHECKSUM_PARTIAL)
365                 EPRINTK("Trying to TSO send a packet without HW checksum\n");
366
367         tso_start(&state, skb);
368
369         /*
370          * Setup the first payload fragment.  If the skb header area
371          * contains exactly the headers and all payload is in the frag
372          * list things are little simpler
373          */
374         if (skb_headlen(skb) == state.p.header_length) {
375                 /* Grab the first payload fragment. */
376                 BUG_ON(skb_shinfo(skb)->nr_frags < 1);
377                 frag_i = 0;
378                 f = &skb_shinfo(skb)->frags[frag_i];
379                 tso_get_fragment(&state, skb_frag_size(f),
380                                  page_address(skb_frag_page(f)) + f->page_offset);
381         } else {
382                 int hl = state.p.header_length;
383                 tso_get_fragment(&state,  skb_headlen(skb) - hl, 
384                                  skb->data + hl);
385                 frag_i = -1;
386         }
387
388         if (tso_start_new_packet(vnic, skb, &state) < 0) {
389                 DPRINTK("%s: out of first start-packet memory\n",
390                         __FUNCTION__);
391                 goto unwind;
392         }
393
394         while (1) {
395                 tso_fill_packet_with_fragment(vnic, &state);
396                 
397                 /* Move onto the next fragment? */
398                 if (state.ifc.len == 0) {
399                         if (++frag_i >= skb_shinfo(skb)->nr_frags)
400                                 /* End of payload reached. */
401                                 break;
402                         f = &skb_shinfo(skb)->frags[frag_i];
403                         tso_get_fragment(&state, skb_frag_size(f),
404                                          page_address(skb_frag_page(f)) +
405                                          f->page_offset);
406                 }
407
408                 /* Start a new buffer? */
409                 if ((state.output_packets->tso_bufs->length == 
410                      NETFRONT_ACCEL_TSO_BUF_LENGTH) &&
411                     tso_start_new_buffer(vnic, &state, 0)) {
412                         DPRINTK("%s: out of start-buffer memory\n",
413                                 __FUNCTION__);
414                         goto unwind;
415                 }
416
417                 /* Start at new packet? */
418                 if ((state.packet_space == 0 || 
419                      ((state.output_packets->tso_bufs_len >=
420                        ACCEL_TSO_MAX_BUFFERS) &&
421                       (state.output_packets->tso_bufs->length >= 
422                        NETFRONT_ACCEL_TSO_BUF_LENGTH))) &&
423                     tso_start_new_packet(vnic, skb, &state) < 0) {
424                         DPRINTK("%s: out of start-packet memory\n",
425                                 __FUNCTION__);
426                         goto unwind;
427                 }
428
429         }
430
431         /* Check for space */
432         if (ef_vi_transmit_space(&vnic->vi) < state.buffers) {
433                 DPRINTK("%s: Not enough TX space (%d)\n",
434                         __FUNCTION__, state.buffers);
435                 goto unwind;
436         }
437
438         /*
439          * Store the skb context in the most recent buffer (i.e. the
440          * last buffer that will be sent)
441          */
442         state.output_packets->tso_bufs->buf->skb = skb;
443
444         /* Reverse the list of packets as we construct it on a stack */
445         while (state.output_packets != NULL) {
446                 tmp_pkt = state.output_packets;
447                 state.output_packets = tmp_pkt->next;
448                 tmp_pkt->next = reversed_list;
449                 reversed_list = tmp_pkt;
450         }
451
452         /* Pass off to hardware */
453         while (reversed_list != NULL) {
454                 tmp_pkt = reversed_list;
455                 reversed_list = tmp_pkt->next;
456
457                 BUG_ON(tmp_pkt->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS);
458                 BUG_ON(tmp_pkt->tso_bufs_len == 0);
459
460                 dma_id = tmp_pkt->tso_bufs->buf->buf_id;
461
462                 /*
463                  * Make an iovec of the buffers in the list, reversing
464                  * the buffers as we go as they are constructed on a
465                  * stack
466                  */
467                 tso_buf = tmp_pkt->tso_bufs;
468                 for (frag_i = tmp_pkt->tso_bufs_len - 1;
469                      frag_i >= 0;
470                      frag_i--) {
471                         iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr;
472                         iovecs[frag_i].iov_len = tso_buf->length;
473                         tso_buf = tso_buf->next;
474                 }
475
476                 rc = ef_vi_transmitv(&vnic->vi, iovecs, tmp_pkt->tso_bufs_len,
477                                      dma_id);
478                 /*
479                  * We checked for space already, so it really should
480                  * succeed
481                  */
482                 BUG_ON(rc != 0);
483         }
484
485         /* Track number of tx fastpath stats */
486         vnic->netdev_stats.fastpath_tx_bytes += skb->len;
487         vnic->netdev_stats.fastpath_tx_pkts += state.packets;
488 #if NETFRONT_ACCEL_STATS
489         {
490                 unsigned n;
491                 n = vnic->netdev_stats.fastpath_tx_pkts -
492                         vnic->stats.fastpath_tx_completions;
493                 if (n > vnic->stats.fastpath_tx_pending_max)
494                         vnic->stats.fastpath_tx_pending_max = n;
495         }
496 #endif
497
498         return NETFRONT_ACCEL_STATUS_GOOD;
499  
500  unwind:
501         tso_unwind(vnic, &state);
502
503         NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
504
505         return NETFRONT_ACCEL_STATUS_BUSY;
506 }
507
508
509