- Update Xen patches to 3.3-rc5 and c/s 1157.
[linux-flexiantxendom0-3.2.10.git] / drivers / xen / sfc_netfront / accel_ssr.c
1 /****************************************************************************
2  * Solarflare driver for Xen network acceleration
3  *
4  * Copyright 2006-2008: Solarflare Communications Inc,
5  *                      9501 Jeronimo Road, Suite 250,
6  *                      Irvine, CA 92618, USA
7  *
8  * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
9  *
10  * This program is free software; you can redistribute it and/or modify it
11  * under the terms of the GNU General Public License version 2 as published
12  * by the Free Software Foundation, incorporated herein by reference.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
22  ****************************************************************************
23  */
24
25 #include <linux/socket.h>
26 #include <linux/in.h>
27 #include <linux/ip.h>
28 #include <linux/tcp.h>
29 #include <linux/list.h>
30 #include <net/ip.h>
31 #include <net/checksum.h>
32
33 #include "accel.h"
34 #include "accel_util.h"
35 #include "accel_bufs.h"
36
37 #include "accel_ssr.h"
38
39 static inline int list_valid(struct list_head *lh) {
40         return(lh->next != NULL);
41 }
42
43 static void netfront_accel_ssr_deliver (struct netfront_accel_vnic *vnic,
44                                         struct netfront_accel_ssr_state *st,
45                                         struct netfront_accel_ssr_conn *c);
46
47 /** Construct an efx_ssr_state.
48  *
49  * @v st     The SSR state (per channel per port)
50  * @v port   The port.
51  */
52 void netfront_accel_ssr_init(struct netfront_accel_ssr_state *st) {
53         unsigned i;
54
55         INIT_LIST_HEAD(&st->conns);
56         INIT_LIST_HEAD(&st->free_conns);
57         for (i = 0; i < 8; ++i) {
58                 struct netfront_accel_ssr_conn *c = 
59                         kmalloc(sizeof(*c), GFP_KERNEL);
60                 if (c == NULL)  break;
61                 c->n_in_order_pkts = 0;
62                 c->skb = NULL;
63                 list_add(&c->link, &st->free_conns);
64         }
65
66 }
67
68
69 /** Destructor for an efx_ssr_state.
70  *
71  * @v st     The SSR state (per channel per port)
72  */
73 void netfront_accel_ssr_fini(netfront_accel_vnic *vnic, 
74                              struct netfront_accel_ssr_state *st) {
75         struct netfront_accel_ssr_conn *c;
76
77         /* Return cleanly if efx_ssr_init() not previously called */
78         BUG_ON(list_valid(&st->conns) != list_valid(&st->free_conns));
79         if (! list_valid(&st->conns))
80                 return;
81
82         while ( ! list_empty(&st->free_conns)) {
83                 c = list_entry(st->free_conns.prev, 
84                                struct netfront_accel_ssr_conn, link);
85                 list_del(&c->link);
86                 BUG_ON(c->skb != NULL);
87                 kfree(c);
88         }
89         while ( ! list_empty(&st->conns)) {
90                 c = list_entry(st->conns.prev, 
91                                struct netfront_accel_ssr_conn, link);
92                 list_del(&c->link);
93                 if (c->skb)
94                         netfront_accel_ssr_deliver(vnic, st, c);
95                 kfree(c);
96         }
97 }
98
99
100 /** Calc IP checksum and deliver to the OS
101  *
102  * @v st     The SSR state (per channel per port)
103  * @v c      The SSR connection state
104  */
105 static void netfront_accel_ssr_deliver(netfront_accel_vnic *vnic,
106                                        struct netfront_accel_ssr_state *st,
107                                        struct netfront_accel_ssr_conn *c) {
108         BUG_ON(c->skb == NULL);
109
110         /*
111          * If we've chained packets together, recalculate the IP
112          * checksum.
113          */
114         if (skb_shinfo(c->skb)->frag_list) {
115                 NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_bursts);
116                 c->iph->check = 0;
117                 c->iph->check = ip_fast_csum((unsigned char *) c->iph, 
118                                              c->iph->ihl);
119         }
120
121         VPRINTK("%s: %d\n", __FUNCTION__, c->skb->len);
122
123         netif_receive_skb(c->skb); 
124         c->skb = NULL;
125 }
126
127
128 /** Push held skbs down into network stack.
129  *
130  * @v st       SSR state
131  *
132  * Only called if we are tracking one or more connections.
133  */
134 void __netfront_accel_ssr_end_of_burst(netfront_accel_vnic *vnic, 
135                                        struct netfront_accel_ssr_state *st) {
136         struct netfront_accel_ssr_conn *c;
137
138         BUG_ON(list_empty(&st->conns));
139
140         list_for_each_entry(c, &st->conns, link)
141                 if (c->skb)
142                         netfront_accel_ssr_deliver(vnic, st, c);
143
144         /* Time-out connections that have received no traffic for 20ms. */
145         c = list_entry(st->conns.prev, struct netfront_accel_ssr_conn,
146                        link);
147         if (jiffies - c->last_pkt_jiffies > (HZ / 50 + 1)) {
148                 NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_drop_stream);
149                 list_del(&c->link);
150                 list_add(&c->link, &st->free_conns);
151         }
152 }
153
154
155 /** Process SKB and decide whether to dispatch it to the stack now or
156  * later.
157  *
158  * @v st         SSR state
159  * @v skb       SKB to exmaine
160  * @ret rc       0 => deliver SKB to kernel now, otherwise the SKB belongs
161  *             us.
162  */
163 int netfront_accel_ssr_skb(struct netfront_accel_vnic *vnic,
164                            struct netfront_accel_ssr_state *st,
165                            struct sk_buff *skb) {
166         int data_length, dont_merge;
167         struct netfront_accel_ssr_conn *c;
168         struct iphdr *iph;
169         struct tcphdr *th;
170         unsigned th_seq;
171
172         BUG_ON(skb_shinfo(skb)->frag_list != NULL);
173         BUG_ON(skb->next != NULL);
174
175         /* We're not interested if it isn't TCP over IPv4. */
176         iph = (struct iphdr *) skb->data;
177         if (skb->protocol != htons(ETH_P_IP) ||
178             iph->protocol != IPPROTO_TCP) {
179                 return 0;
180         }
181
182         /* Ignore segments that fail csum or are fragmented. */
183         if (unlikely((skb->ip_summed - CHECKSUM_UNNECESSARY) |
184                      (iph->frag_off & htons(IP_MF | IP_OFFSET)))) {
185                 return 0;
186         }
187
188         th = (struct tcphdr*)(skb->data + iph->ihl * 4);
189         data_length = ntohs(iph->tot_len) - iph->ihl * 4 - th->doff * 4;
190         th_seq = ntohl(th->seq);
191         dont_merge = (data_length == 0) | th->urg | th->syn | th->rst;
192
193         list_for_each_entry(c, &st->conns, link) {
194                 if ((c->saddr  - iph->saddr) |
195                     (c->daddr  - iph->daddr) |
196                     (c->source - th->source) |
197                     (c->dest   - th->dest  ))
198                         continue;
199
200                 /* Re-insert at head of list to reduce lookup time. */
201                 list_del(&c->link);
202                 list_add(&c->link, &st->conns);
203                 c->last_pkt_jiffies = jiffies;
204
205                 if (unlikely(th_seq - c->next_seq)) {
206                         /* Out-of-order, so start counting again. */
207                         if (c->skb)
208                                 netfront_accel_ssr_deliver(vnic, st, c);
209                         c->n_in_order_pkts = 0;
210                         c->next_seq = th_seq + data_length;
211                         NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_misorder);
212                         return 0;
213                 }
214                 c->next_seq = th_seq + data_length;
215
216                 if (++c->n_in_order_pkts < 300) {
217                         /* May be in slow-start, so don't merge. */
218                         NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_slow_start);
219                         return 0;
220                 }
221
222                 if (unlikely(dont_merge)) {
223                         if (c->skb)
224                                 netfront_accel_ssr_deliver(vnic, st, c);
225                         return 0;
226                 }
227
228                 if (c->skb) {
229                         c->iph->tot_len = ntohs(c->iph->tot_len);
230                         c->iph->tot_len += data_length;
231                         c->iph->tot_len = htons(c->iph->tot_len);
232                         c->th->ack_seq = th->ack_seq;
233                         c->th->fin |= th->fin;
234                         c->th->psh |= th->psh;
235                         c->th->window = th->window;
236
237                         /* Remove the headers from this skb. */
238                         skb_pull(skb, skb->len - data_length);
239
240                         /*
241                          * Tack the new skb onto the head skb's frag_list.
242                          * This is exactly the format that fragmented IP
243                          * datagrams are reassembled into.
244                          */
245                         BUG_ON(skb->next != 0);
246                         if ( ! skb_shinfo(c->skb)->frag_list)
247                                 skb_shinfo(c->skb)->frag_list = skb;
248                         else
249                                 c->skb_tail->next = skb;
250                         c->skb_tail = skb;
251                         c->skb->len += skb->len;
252                         c->skb->data_len += skb->len;
253                         c->skb->truesize += skb->truesize;
254
255                         NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_merges);
256
257                         /*
258                          * If the next packet might push this super-packet
259                          * over the limit for an IP packet, deliver it now.
260                          * This is slightly conservative, but close enough.
261                          */
262                         if (c->skb->len + 
263                             (PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE)
264                             > 16384)
265                                 netfront_accel_ssr_deliver(vnic, st, c);
266
267                         return 1;
268                 }
269                 else {
270                         c->iph = iph;
271                         c->th = th;
272                         c->skb = skb;
273                         return 1;
274                 }
275         }
276
277         /* We're not yet tracking this connection. */
278
279         if (dont_merge) {
280                 return 0;
281         }
282
283         if (list_empty(&st->free_conns)) {
284                 c = list_entry(st->conns.prev, 
285                                struct netfront_accel_ssr_conn,
286                                link);
287                 if (c->skb) {
288                         NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_too_many);
289                         return 0;
290                 }
291         }
292         else {
293                 c = list_entry(st->free_conns.next,
294                                struct netfront_accel_ssr_conn,
295                                link);
296         }
297         list_del(&c->link);
298         list_add(&c->link, &st->conns);
299         c->saddr = iph->saddr;
300         c->daddr = iph->daddr;
301         c->source = th->source;
302         c->dest = th->dest;
303         c->next_seq = th_seq + data_length;
304         c->n_in_order_pkts = 0;
305         BUG_ON(c->skb != NULL);
306         NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_new_stream);
307         return 0;
308 }