1 /****************************************************************************
2 * Solarflare driver for Xen network acceleration
4 * Copyright 2006-2008: Solarflare Communications Inc,
5 * 9501 Jeronimo Road, Suite 250,
6 * Irvine, CA 92618, USA
8 * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License version 2 as published
12 * by the Free Software Foundation, incorporated herein by reference.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 ****************************************************************************
25 #include <linux/socket.h>
28 #include <linux/tcp.h>
29 #include <linux/list.h>
31 #include <net/checksum.h>
34 #include "accel_util.h"
35 #include "accel_bufs.h"
37 #include "accel_ssr.h"
39 static inline int list_valid(struct list_head *lh) {
40 return(lh->next != NULL);
43 static void netfront_accel_ssr_deliver (struct netfront_accel_vnic *vnic,
44 struct netfront_accel_ssr_state *st,
45 struct netfront_accel_ssr_conn *c);
47 /** Construct an efx_ssr_state.
49 * @v st The SSR state (per channel per port)
52 void netfront_accel_ssr_init(struct netfront_accel_ssr_state *st) {
55 INIT_LIST_HEAD(&st->conns);
56 INIT_LIST_HEAD(&st->free_conns);
57 for (i = 0; i < 8; ++i) {
58 struct netfront_accel_ssr_conn *c =
59 kmalloc(sizeof(*c), GFP_KERNEL);
61 c->n_in_order_pkts = 0;
63 list_add(&c->link, &st->free_conns);
69 /** Destructor for an efx_ssr_state.
71 * @v st The SSR state (per channel per port)
73 void netfront_accel_ssr_fini(netfront_accel_vnic *vnic,
74 struct netfront_accel_ssr_state *st) {
75 struct netfront_accel_ssr_conn *c;
77 /* Return cleanly if efx_ssr_init() not previously called */
78 BUG_ON(list_valid(&st->conns) != list_valid(&st->free_conns));
79 if (! list_valid(&st->conns))
82 while ( ! list_empty(&st->free_conns)) {
83 c = list_entry(st->free_conns.prev,
84 struct netfront_accel_ssr_conn, link);
86 BUG_ON(c->skb != NULL);
89 while ( ! list_empty(&st->conns)) {
90 c = list_entry(st->conns.prev,
91 struct netfront_accel_ssr_conn, link);
94 netfront_accel_ssr_deliver(vnic, st, c);
100 /** Calc IP checksum and deliver to the OS
102 * @v st The SSR state (per channel per port)
103 * @v c The SSR connection state
105 static void netfront_accel_ssr_deliver(netfront_accel_vnic *vnic,
106 struct netfront_accel_ssr_state *st,
107 struct netfront_accel_ssr_conn *c) {
108 BUG_ON(c->skb == NULL);
111 * If we've chained packets together, recalculate the IP
114 if (skb_shinfo(c->skb)->frag_list) {
115 NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_bursts);
117 c->iph->check = ip_fast_csum((unsigned char *) c->iph,
121 VPRINTK("%s: %d\n", __FUNCTION__, c->skb->len);
123 netif_receive_skb(c->skb);
128 /** Push held skbs down into network stack.
132 * Only called if we are tracking one or more connections.
134 void __netfront_accel_ssr_end_of_burst(netfront_accel_vnic *vnic,
135 struct netfront_accel_ssr_state *st) {
136 struct netfront_accel_ssr_conn *c;
138 BUG_ON(list_empty(&st->conns));
140 list_for_each_entry(c, &st->conns, link)
142 netfront_accel_ssr_deliver(vnic, st, c);
144 /* Time-out connections that have received no traffic for 20ms. */
145 c = list_entry(st->conns.prev, struct netfront_accel_ssr_conn,
147 if (jiffies - c->last_pkt_jiffies > (HZ / 50 + 1)) {
148 NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_drop_stream);
150 list_add(&c->link, &st->free_conns);
155 /** Process SKB and decide whether to dispatch it to the stack now or
159 * @v skb SKB to exmaine
160 * @ret rc 0 => deliver SKB to kernel now, otherwise the SKB belongs
163 int netfront_accel_ssr_skb(struct netfront_accel_vnic *vnic,
164 struct netfront_accel_ssr_state *st,
165 struct sk_buff *skb) {
166 int data_length, dont_merge;
167 struct netfront_accel_ssr_conn *c;
172 BUG_ON(skb_shinfo(skb)->frag_list != NULL);
173 BUG_ON(skb->next != NULL);
175 /* We're not interested if it isn't TCP over IPv4. */
176 iph = (struct iphdr *) skb->data;
177 if (skb->protocol != htons(ETH_P_IP) ||
178 iph->protocol != IPPROTO_TCP) {
182 /* Ignore segments that fail csum or are fragmented. */
183 if (unlikely((skb->ip_summed - CHECKSUM_UNNECESSARY) |
184 (iph->frag_off & htons(IP_MF | IP_OFFSET)))) {
188 th = (struct tcphdr*)(skb->data + iph->ihl * 4);
189 data_length = ntohs(iph->tot_len) - iph->ihl * 4 - th->doff * 4;
190 th_seq = ntohl(th->seq);
191 dont_merge = (data_length == 0) | th->urg | th->syn | th->rst;
193 list_for_each_entry(c, &st->conns, link) {
194 if ((c->saddr - iph->saddr) |
195 (c->daddr - iph->daddr) |
196 (c->source - th->source) |
197 (c->dest - th->dest ))
200 /* Re-insert at head of list to reduce lookup time. */
202 list_add(&c->link, &st->conns);
203 c->last_pkt_jiffies = jiffies;
205 if (unlikely(th_seq - c->next_seq)) {
206 /* Out-of-order, so start counting again. */
208 netfront_accel_ssr_deliver(vnic, st, c);
209 c->n_in_order_pkts = 0;
210 c->next_seq = th_seq + data_length;
211 NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_misorder);
214 c->next_seq = th_seq + data_length;
216 if (++c->n_in_order_pkts < 300) {
217 /* May be in slow-start, so don't merge. */
218 NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_slow_start);
222 if (unlikely(dont_merge)) {
224 netfront_accel_ssr_deliver(vnic, st, c);
229 c->iph->tot_len = ntohs(c->iph->tot_len);
230 c->iph->tot_len += data_length;
231 c->iph->tot_len = htons(c->iph->tot_len);
232 c->th->ack_seq = th->ack_seq;
233 c->th->fin |= th->fin;
234 c->th->psh |= th->psh;
235 c->th->window = th->window;
237 /* Remove the headers from this skb. */
238 skb_pull(skb, skb->len - data_length);
241 * Tack the new skb onto the head skb's frag_list.
242 * This is exactly the format that fragmented IP
243 * datagrams are reassembled into.
245 BUG_ON(skb->next != 0);
246 if ( ! skb_shinfo(c->skb)->frag_list)
247 skb_shinfo(c->skb)->frag_list = skb;
249 c->skb_tail->next = skb;
251 c->skb->len += skb->len;
252 c->skb->data_len += skb->len;
253 c->skb->truesize += skb->truesize;
255 NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_merges);
258 * If the next packet might push this super-packet
259 * over the limit for an IP packet, deliver it now.
260 * This is slightly conservative, but close enough.
263 (PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE)
265 netfront_accel_ssr_deliver(vnic, st, c);
277 /* We're not yet tracking this connection. */
283 if (list_empty(&st->free_conns)) {
284 c = list_entry(st->conns.prev,
285 struct netfront_accel_ssr_conn,
288 NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_too_many);
293 c = list_entry(st->free_conns.next,
294 struct netfront_accel_ssr_conn,
298 list_add(&c->link, &st->conns);
299 c->saddr = iph->saddr;
300 c->daddr = iph->daddr;
301 c->source = th->source;
303 c->next_seq = th_seq + data_length;
304 c->n_in_order_pkts = 0;
305 BUG_ON(c->skb != NULL);
306 NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_new_stream);