1 /****************************************************************************
2 * Solarflare driver for Xen network acceleration
4 * Copyright 2006-2008: Solarflare Communications Inc,
5 * 9501 Jeronimo Road, Suite 250,
6 * Irvine, CA 92618, USA
8 * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License version 2 as published
12 * by the Free Software Foundation, incorporated herein by reference.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 ****************************************************************************
25 #include <linux/pci.h>
26 #include <linux/tcp.h>
29 #include <linux/if_ether.h>
32 #include "accel_util.h"
34 #include "accel_tso.h"
36 #define ETH_HDR_LEN(skb) skb_network_offset(skb)
37 #define SKB_TCP_OFF(skb) skb_transport_offset(skb)
38 #define SKB_IP_OFF(skb) skb_network_offset(skb)
41 * Set a maximum number of buffers in each output packet to make life
42 * a little simpler - if this is reached it will just move on to
45 #define ACCEL_TSO_MAX_BUFFERS (6)
49 * The state used during segmentation. It is put into this data structure
50 * just to make it easy to pass into inline functions.
52 struct netfront_accel_tso_state {
53 /** bytes of data we've yet to segment */
54 unsigned remaining_len;
56 /** current sequence number */
59 /** remaining space in current packet */
60 unsigned packet_space;
62 /** List of packets to be output, containing the buffers and
63 * iovecs to describe each packet
65 struct netfront_accel_tso_output_packet *output_packets;
67 /** Total number of buffers in output_packets */
70 /** Total number of packets in output_packets */
73 /** Input Fragment Cursor.
75 * Where we are in the current fragment of the incoming SKB. These
76 * values get updated in place when we split a fragment over
80 /** address of current position */
82 /** remaining length */
84 } ifc; /* == ifc Input Fragment Cursor */
88 * These values are set once at the start of the TSO send and do
89 * not get changed as the routine progresses.
92 /* the number of bytes of header */
93 unsigned int header_length;
95 /* The number of bytes to put in each outgoing segment. */
98 /* Current IP ID, host endian. */
101 /* Max size of each output packet payload */
108 * Verify that our various assumptions about sk_buffs and the conditions
109 * under which TSO will be attempted hold true.
111 * @v skb The sk_buff to check.
113 static inline void tso_check_safe(struct sk_buff *skb) {
114 EPRINTK_ON(skb->protocol != htons (ETH_P_IP));
115 EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP));
116 EPRINTK_ON(ip_hdr(skb)->protocol != IPPROTO_TCP);
117 EPRINTK_ON((SKB_TCP_OFF(skb) + tcp_hdrlen(skb)) > skb_headlen(skb));
122 /** Parse the SKB header and initialise state. */
123 static inline void tso_start(struct netfront_accel_tso_state *st,
124 struct sk_buff *skb) {
127 * All ethernet/IP/TCP headers combined size is TCP header size
128 * plus offset of TCP header relative to start of packet.
130 st->p.header_length = tcp_hdrlen(skb) + SKB_TCP_OFF(skb);
131 st->p.full_packet_size = (st->p.header_length
132 + skb_shinfo(skb)->gso_size);
133 st->p.gso_size = skb_shinfo(skb)->gso_size;
135 st->p.ip_id = htons(ip_hdr(skb)->id);
136 st->seqnum = ntohl(tcp_hdr(skb)->seq);
138 EPRINTK_ON(tcp_hdr(skb)->urg);
139 EPRINTK_ON(tcp_hdr(skb)->syn);
140 EPRINTK_ON(tcp_hdr(skb)->rst);
142 st->remaining_len = skb->len - st->p.header_length;
144 st->output_packets = NULL;
148 VPRINTK("Starting new TSO: hl %d ps %d gso %d seq %x len %d\n",
149 st->p.header_length, st->p.full_packet_size, st->p.gso_size,
150 st->seqnum, skb->len);
154 * Add another NIC mapped buffer onto an output packet
156 static inline int tso_start_new_buffer(netfront_accel_vnic *vnic,
157 struct netfront_accel_tso_state *st,
160 struct netfront_accel_tso_buffer *tso_buf;
161 struct netfront_accel_pkt_desc *buf;
163 /* Get a mapped packet buffer */
164 buf = netfront_accel_buf_get(vnic->tx_bufs);
166 DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
170 /* Store a bit of meta-data at the end */
171 tso_buf =(struct netfront_accel_tso_buffer *)
172 (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH
173 + sizeof(struct netfront_accel_tso_output_packet));
180 struct netfront_accel_tso_output_packet *output_packet
181 = (struct netfront_accel_tso_output_packet *)
182 (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH);
183 output_packet->next = st->output_packets;
184 st->output_packets = output_packet;
185 tso_buf->next = NULL;
186 st->output_packets->tso_bufs = tso_buf;
187 st->output_packets->tso_bufs_len = 1;
189 tso_buf->next = st->output_packets->tso_bufs;
190 st->output_packets->tso_bufs = tso_buf;
191 st->output_packets->tso_bufs_len ++;
194 BUG_ON(st->output_packets->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS);
199 * Store the context, set to NULL, last packet buffer will get
202 tso_buf->buf->skb = NULL;
208 /* Generate a new header, and prepare for the new packet.
211 * @v skb Socket buffer
213 * @ret rc 0 on success, or -1 if failed to alloc header
217 int tso_start_new_packet(netfront_accel_vnic *vnic,
219 struct netfront_accel_tso_state *st)
221 struct netfront_accel_tso_buffer *tso_buf;
222 struct iphdr *tsoh_iph;
223 struct tcphdr *tsoh_th;
226 if (tso_start_new_buffer(vnic, st, 1) < 0) {
227 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
231 /* This has been set up by tso_start_new_buffer() */
232 tso_buf = st->output_packets->tso_bufs;
234 /* Copy in the header */
235 memcpy(tso_buf->buf->pkt_kva, skb->data, st->p.header_length);
236 tso_buf->length = st->p.header_length;
238 tsoh_th = (struct tcphdr*)
239 (tso_buf->buf->pkt_kva + SKB_TCP_OFF(skb));
240 tsoh_iph = (struct iphdr*)
241 (tso_buf->buf->pkt_kva + SKB_IP_OFF(skb));
243 /* Set to zero to encourage falcon to fill these in */
247 tsoh_th->seq = htonl(st->seqnum);
248 st->seqnum += st->p.gso_size;
250 if (st->remaining_len > st->p.gso_size) {
251 /* This packet will not finish the TSO burst. */
252 ip_length = st->p.full_packet_size - ETH_HDR_LEN(skb);
256 /* This packet will be the last in the TSO burst. */
257 ip_length = (st->p.header_length - ETH_HDR_LEN(skb)
258 + st->remaining_len);
259 tsoh_th->fin = tcp_hdr(skb)->fin;
260 tsoh_th->psh = tcp_hdr(skb)->psh;
263 tsoh_iph->tot_len = htons(ip_length);
265 /* Linux leaves suitable gaps in the IP ID space for us to fill. */
266 tsoh_iph->id = st->p.ip_id++;
267 tsoh_iph->id = htons(tsoh_iph->id);
269 st->packet_space = st->p.gso_size;
278 static inline void tso_get_fragment(struct netfront_accel_tso_state *st,
287 static inline void tso_unwind(netfront_accel_vnic *vnic,
288 struct netfront_accel_tso_state *st)
290 struct netfront_accel_tso_buffer *tso_buf;
291 struct netfront_accel_tso_output_packet *output_packet;
293 DPRINTK("%s\n", __FUNCTION__);
295 while (st->output_packets != NULL) {
296 output_packet = st->output_packets;
297 st->output_packets = output_packet->next;
298 while (output_packet->tso_bufs != NULL) {
299 tso_buf = output_packet->tso_bufs;
300 output_packet->tso_bufs = tso_buf->next;
303 output_packet->tso_bufs_len --;
305 netfront_accel_buf_put(vnic->tx_bufs,
306 tso_buf->buf->buf_id);
309 BUG_ON(st->buffers != 0);
315 void tso_fill_packet_with_fragment(netfront_accel_vnic *vnic,
316 struct netfront_accel_tso_state *st)
318 struct netfront_accel_tso_buffer *tso_buf;
321 BUG_ON(st->output_packets == NULL);
322 BUG_ON(st->output_packets->tso_bufs == NULL);
324 tso_buf = st->output_packets->tso_bufs;
326 if (st->ifc.len == 0) return;
327 if (st->packet_space == 0) return;
328 if (tso_buf->length == NETFRONT_ACCEL_TSO_BUF_LENGTH) return;
330 n = min(st->ifc.len, st->packet_space);
332 space = NETFRONT_ACCEL_TSO_BUF_LENGTH - tso_buf->length;
335 st->packet_space -= n;
336 st->remaining_len -= n;
339 memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n);
341 tso_buf->length += n;
343 BUG_ON(tso_buf->length > NETFRONT_ACCEL_TSO_BUF_LENGTH);
351 int netfront_accel_enqueue_skb_tso(netfront_accel_vnic *vnic,
354 struct netfront_accel_tso_state state;
355 struct netfront_accel_tso_buffer *tso_buf = NULL;
356 struct netfront_accel_tso_output_packet *reversed_list = NULL;
357 struct netfront_accel_tso_output_packet *tmp_pkt;
358 ef_iovec iovecs[ACCEL_TSO_MAX_BUFFERS];
359 int frag_i, rc, dma_id;
364 if (skb->ip_summed != CHECKSUM_PARTIAL)
365 EPRINTK("Trying to TSO send a packet without HW checksum\n");
367 tso_start(&state, skb);
370 * Setup the first payload fragment. If the skb header area
371 * contains exactly the headers and all payload is in the frag
372 * list things are little simpler
374 if (skb_headlen(skb) == state.p.header_length) {
375 /* Grab the first payload fragment. */
376 BUG_ON(skb_shinfo(skb)->nr_frags < 1);
378 f = &skb_shinfo(skb)->frags[frag_i];
379 tso_get_fragment(&state, skb_frag_size(f),
380 page_address(skb_frag_page(f)) + f->page_offset);
382 int hl = state.p.header_length;
383 tso_get_fragment(&state, skb_headlen(skb) - hl,
388 if (tso_start_new_packet(vnic, skb, &state) < 0) {
389 DPRINTK("%s: out of first start-packet memory\n",
395 tso_fill_packet_with_fragment(vnic, &state);
397 /* Move onto the next fragment? */
398 if (state.ifc.len == 0) {
399 if (++frag_i >= skb_shinfo(skb)->nr_frags)
400 /* End of payload reached. */
402 f = &skb_shinfo(skb)->frags[frag_i];
403 tso_get_fragment(&state, skb_frag_size(f),
404 page_address(skb_frag_page(f)) +
408 /* Start a new buffer? */
409 if ((state.output_packets->tso_bufs->length ==
410 NETFRONT_ACCEL_TSO_BUF_LENGTH) &&
411 tso_start_new_buffer(vnic, &state, 0)) {
412 DPRINTK("%s: out of start-buffer memory\n",
417 /* Start at new packet? */
418 if ((state.packet_space == 0 ||
419 ((state.output_packets->tso_bufs_len >=
420 ACCEL_TSO_MAX_BUFFERS) &&
421 (state.output_packets->tso_bufs->length >=
422 NETFRONT_ACCEL_TSO_BUF_LENGTH))) &&
423 tso_start_new_packet(vnic, skb, &state) < 0) {
424 DPRINTK("%s: out of start-packet memory\n",
431 /* Check for space */
432 if (ef_vi_transmit_space(&vnic->vi) < state.buffers) {
433 DPRINTK("%s: Not enough TX space (%d)\n",
434 __FUNCTION__, state.buffers);
439 * Store the skb context in the most recent buffer (i.e. the
440 * last buffer that will be sent)
442 state.output_packets->tso_bufs->buf->skb = skb;
444 /* Reverse the list of packets as we construct it on a stack */
445 while (state.output_packets != NULL) {
446 tmp_pkt = state.output_packets;
447 state.output_packets = tmp_pkt->next;
448 tmp_pkt->next = reversed_list;
449 reversed_list = tmp_pkt;
452 /* Pass off to hardware */
453 while (reversed_list != NULL) {
454 tmp_pkt = reversed_list;
455 reversed_list = tmp_pkt->next;
457 BUG_ON(tmp_pkt->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS);
458 BUG_ON(tmp_pkt->tso_bufs_len == 0);
460 dma_id = tmp_pkt->tso_bufs->buf->buf_id;
463 * Make an iovec of the buffers in the list, reversing
464 * the buffers as we go as they are constructed on a
467 tso_buf = tmp_pkt->tso_bufs;
468 for (frag_i = tmp_pkt->tso_bufs_len - 1;
471 iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr;
472 iovecs[frag_i].iov_len = tso_buf->length;
473 tso_buf = tso_buf->next;
476 rc = ef_vi_transmitv(&vnic->vi, iovecs, tmp_pkt->tso_bufs_len,
479 * We checked for space already, so it really should
485 /* Track number of tx fastpath stats */
486 vnic->netdev_stats.fastpath_tx_bytes += skb->len;
487 vnic->netdev_stats.fastpath_tx_pkts += state.packets;
488 #if NETFRONT_ACCEL_STATS
491 n = vnic->netdev_stats.fastpath_tx_pkts -
492 vnic->stats.fastpath_tx_completions;
493 if (n > vnic->stats.fastpath_tx_pending_max)
494 vnic->stats.fastpath_tx_pending_max = n;
498 return NETFRONT_ACCEL_STATUS_GOOD;
501 tso_unwind(vnic, &state);
503 NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
505 return NETFRONT_ACCEL_STATUS_BUSY;