1 /******************************************************************************
2 * drivers/xen/netback/netback.c
4 * Back-end of the driver for virtual network devices. This portion of the
5 * driver exports a 'unified' network-device interface that can be accessed
6 * by any operating system that implements a compatible front end. A
7 * reference front-end implementation can be found in:
8 * drivers/xen/netfront/netfront.c
10 * Copyright (c) 2002-2005, K A Fraser
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License version 2
14 * as published by the Free Software Foundation; or, when distributed
15 * separately from the Linux kernel or incorporated into other
16 * software packages, subject to the following license:
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this source file (the "Software"), to deal in the Software without
20 * restriction, including without limitation the rights to use, copy, modify,
21 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
22 * and to permit persons to whom the Software is furnished to do so, subject to
23 * the following conditions:
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
38 #include <linux/if_vlan.h>
39 #include <linux/kthread.h>
40 #include <linux/vmalloc.h>
42 #include <xen/balloon.h>
43 #include <xen/evtchn.h>
44 #include <xen/gnttab.h>
45 #include <xen/interface/memory.h>
46 #include <xen/net-util.h>
48 /*define NETBE_DEBUG_INTERRUPT*/
50 struct xen_netbk *__read_mostly xen_netbk;
51 unsigned int __read_mostly netbk_nr_groups;
52 static bool __read_mostly use_kthreads = true;
53 static bool __initdata bind_threads;
55 #define GET_GROUP_INDEX(netif) ((netif)->group)
57 static void netif_idx_release(struct xen_netbk *, u16 pending_idx);
58 static void make_tx_response(netif_t *netif,
59 netif_tx_request_t *txp,
61 static netif_rx_response_t *make_rx_response(netif_t *netif,
68 static void net_tx_action(unsigned long group);
69 static void net_rx_action(unsigned long group);
71 /* Discriminate from any valid pending_idx value. */
72 #define INVALID_PENDING_IDX 0xffff
74 static inline unsigned long idx_to_pfn(struct xen_netbk *netbk, u16 idx)
76 return page_to_pfn(netbk->mmap_pages[idx]);
79 static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk, u16 idx)
81 return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
84 /* extra field used in struct page */
87 #if BITS_PER_LONG < 64
88 #define GROUP_WIDTH (BITS_PER_LONG - CONFIG_XEN_NETDEV_TX_SHIFT)
89 #define MAX_GROUPS ((1U << GROUP_WIDTH) - 1)
90 unsigned int grp:GROUP_WIDTH;
91 unsigned int idx:CONFIG_XEN_NETDEV_TX_SHIFT;
93 #define MAX_GROUPS UINT_MAX
94 unsigned int grp, idx;
100 static inline void netif_set_page_ext(struct page *pg, unsigned int group,
103 union page_ext ext = { .e = { .grp = group + 1, .idx = idx } };
105 BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
106 pg->mapping = ext.mapping;
109 static inline unsigned int netif_page_group(const struct page *pg)
111 union page_ext ext = { .mapping = pg->mapping };
113 return ext.e.grp - 1;
116 static inline unsigned int netif_page_index(const struct page *pg)
118 union page_ext ext = { .mapping = pg->mapping };
123 static u16 frag_get_pending_idx(const skb_frag_t *frag)
125 return (u16)frag->page_offset;
128 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
130 frag->page_offset = pending_idx;
134 * This is the amount of packet we copy rather than map, so that the
135 * guest can't fiddle with the contents of the headers while we do
136 * packet processing on them (netfilter, routing, etc).
138 #define PKT_PROT_LEN (ETH_HLEN + VLAN_HLEN + \
139 sizeof(struct iphdr) + MAX_IPOPTLEN + \
140 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
142 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
144 static inline pending_ring_idx_t nr_pending_reqs(const struct xen_netbk *netbk)
146 return MAX_PENDING_REQS -
147 netbk->pending_prod + netbk->pending_cons;
150 /* Setting this allows the safe use of this driver without netloop. */
151 static bool MODPARM_copy_skb = true;
152 module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
153 MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
154 static bool MODPARM_permute_returns;
155 module_param_named(permute_returns, MODPARM_permute_returns, bool, S_IRUSR|S_IWUSR);
156 MODULE_PARM_DESC(permute_returns, "Randomly permute the order in which TX responses are sent to the frontend");
157 module_param_named(groups, netbk_nr_groups, uint, 0);
158 MODULE_PARM_DESC(groups, "Specify the number of tasklet pairs/threads to use");
159 module_param_named(tasklets, use_kthreads, invbool, 0);
160 MODULE_PARM_DESC(tasklets, "Use tasklets instead of kernel threads");
161 module_param_named(bind, bind_threads, bool, 0);
162 MODULE_PARM_DESC(bind, "Bind kernel threads to (v)CPUs");
164 int netbk_copy_skb_mode;
166 static inline unsigned long alloc_mfn(struct xen_netbk *netbk)
168 BUG_ON(netbk->alloc_index == 0);
169 return netbk->mfn_list[--netbk->alloc_index];
172 static int check_mfn(struct xen_netbk *netbk, unsigned int nr)
174 struct xen_memory_reservation reservation = {
180 if (likely(netbk->alloc_index >= nr))
183 set_xen_guest_handle(reservation.extent_start,
184 netbk->mfn_list + netbk->alloc_index);
185 reservation.nr_extents = MAX_MFN_ALLOC - netbk->alloc_index;
186 rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
188 netbk->alloc_index += rc;
190 return netbk->alloc_index >= nr ? 0 : -ENOMEM;
193 static void netbk_schedule(struct xen_netbk *netbk)
196 wake_up(&netbk->netbk_action_wq);
198 tasklet_schedule(&netbk->net_tx_tasklet);
201 static void netbk_schedule_group(unsigned long group)
203 netbk_schedule(&xen_netbk[group]);
206 static inline void maybe_schedule_tx_action(unsigned int group)
208 struct xen_netbk *netbk = &xen_netbk[group];
211 if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
212 !list_empty(&netbk->schedule_list))
213 netbk_schedule(netbk);
216 static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
218 struct skb_shared_info *ninfo;
219 struct sk_buff *nskb;
220 unsigned long offset;
225 BUG_ON(skb_shinfo(skb)->frag_list != NULL);
227 nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
231 skb_reserve(nskb, 16 + NET_IP_ALIGN);
232 headlen = skb_end_pointer(nskb) - nskb->data;
233 if (headlen > skb_headlen(skb))
234 headlen = skb_headlen(skb);
235 ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
238 ninfo = skb_shinfo(nskb);
239 ninfo->gso_size = skb_shinfo(skb)->gso_size;
240 ninfo->gso_type = skb_shinfo(skb)->gso_type;
243 len = skb->len - headlen;
245 nskb->len = skb->len;
246 nskb->data_len = len;
247 nskb->truesize += len;
254 if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
259 copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
260 zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
262 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
266 ret = skb_copy_bits(skb, offset, page_address(page), copy);
269 __skb_fill_page_desc(nskb, ninfo->nr_frags, page, 0, copy);
276 #ifdef NET_SKBUFF_DATA_USES_OFFSET
279 offset = nskb->data - skb->data;
282 nskb->transport_header = skb->transport_header + offset;
283 nskb->network_header = skb->network_header + offset;
284 nskb->mac_header = skb->mac_header + offset;
294 static inline int netbk_max_required_rx_slots(netif_t *netif)
296 if (netif->can_sg || netif->gso)
297 return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
298 return 1; /* all in one */
301 static inline int netbk_queue_full(netif_t *netif)
303 RING_IDX peek = netif->rx_req_cons_peek;
304 RING_IDX needed = netbk_max_required_rx_slots(netif);
306 return ((netif->rx.sring->req_prod - peek) < needed) ||
307 ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
310 static void tx_queue_callback(unsigned long data)
312 netif_t *netif = (netif_t *)data;
313 if (netif_schedulable(netif))
314 netif_wake_queue(netif->dev);
317 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
319 netif_t *netif = netdev_priv(dev);
320 unsigned int group = GET_GROUP_INDEX(netif);
321 struct xen_netbk *netbk;
323 BUG_ON(skb->dev != dev);
325 if (unlikely(group >= netbk_nr_groups)) {
326 BUG_ON(group != UINT_MAX);
330 /* Drop the packet if the target domain has no receive buffers. */
331 if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
335 * Copy the packet here if it's destined for a flipping interface
336 * but isn't flippable (e.g. extra references to data).
337 * XXX For now we also copy skbuffs whose head crosses a page
338 * boundary, because netbk_gop_skb can't handle them.
340 if (!netif->copying_receiver ||
341 ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
342 struct sk_buff *nskb = netbk_copy_skb(skb);
343 if ( unlikely(nskb == NULL) )
345 /* Copy only the header fields we use in this driver. */
346 nskb->dev = skb->dev;
347 nskb->ip_summed = skb->ip_summed;
352 netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
353 !!skb_shinfo(skb)->gso_size;
356 if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
357 netif->rx.sring->req_event = netif->rx_req_cons_peek +
358 netbk_max_required_rx_slots(netif);
359 mb(); /* request notification /then/ check & stop the queue */
360 if (netbk_queue_full(netif)) {
361 netif_stop_queue(dev);
363 * Schedule 500ms timeout to restart the queue, thus
364 * ensuring that an inactive queue will be drained.
365 * Packets will be immediately be dropped until more
366 * receive buffers become available (see
367 * netbk_queue_full() check above).
369 netif->tx_queue_timeout.data = (unsigned long)netif;
370 netif->tx_queue_timeout.function = tx_queue_callback;
371 mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
375 netbk = &xen_netbk[group];
376 skb_queue_tail(&netbk->rx_queue, skb);
377 netbk_schedule(netbk);
382 dev->stats.tx_dropped++;
388 static void xen_network_done_notify(void)
390 static struct net_device *eth0_dev = NULL;
391 if (unlikely(eth0_dev == NULL))
392 eth0_dev = __dev_get_by_name(&init_net, "eth0");
396 * Add following to poll() function in NAPI driver (Tigon3 is example):
397 * if ( xen_network_done() )
398 * tg3_enable_ints(tp);
400 int xen_network_done(void)
402 return skb_queue_empty(&rx_queue);
406 struct netrx_pending_operations {
407 unsigned trans_prod, trans_cons;
408 unsigned mmu_prod, mmu_mcl;
409 unsigned mcl_prod, mcl_cons;
410 unsigned copy_prod, copy_cons;
411 unsigned meta_prod, meta_cons;
413 gnttab_transfer_t *trans;
415 multicall_entry_t *mcl;
416 struct netbk_rx_meta *meta;
419 /* Set up the grant operations for this fragment. If it's a flipping
420 interface, we also set up the unmap request from here. */
421 static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
422 int i, struct netrx_pending_operations *npo,
423 struct page *page, unsigned long size,
424 unsigned long offset)
427 gnttab_transfer_t *gop;
428 gnttab_copy_t *copy_gop;
429 multicall_entry_t *mcl;
430 netif_rx_request_t *req;
431 unsigned long old_mfn, new_mfn;
432 struct xen_netbk *netbk = &xen_netbk[GET_GROUP_INDEX(netif)];
434 old_mfn = virt_to_mfn(page_address(page));
436 req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
437 if (netif->copying_receiver) {
438 unsigned int group, idx;
440 /* The fragment needs to be copied rather than
443 copy_gop = npo->copy + npo->copy_prod++;
444 copy_gop->flags = GNTCOPY_dest_gref;
445 if (PageForeign(page) &&
446 page->mapping != NULL &&
447 (idx = netif_page_index(page)) < MAX_PENDING_REQS &&
448 (group = netif_page_group(page)) < netbk_nr_groups) {
449 struct pending_tx_info *src_pend;
452 netbk = &xen_netbk[group];
453 BUG_ON(netbk->mmap_pages[idx] != page);
454 src_pend = &netbk->pending_tx_info[idx];
455 grp = GET_GROUP_INDEX(src_pend->netif);
456 BUG_ON(group != grp && grp != UINT_MAX);
457 copy_gop->source.domid = src_pend->netif->domid;
458 copy_gop->source.u.ref = src_pend->req.gref;
459 copy_gop->flags |= GNTCOPY_source_gref;
461 copy_gop->source.domid = DOMID_SELF;
462 copy_gop->source.u.gmfn = old_mfn;
464 copy_gop->source.offset = offset;
465 copy_gop->dest.domid = netif->domid;
466 copy_gop->dest.offset = 0;
467 copy_gop->dest.u.ref = req->gref;
468 copy_gop->len = size;
471 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
472 new_mfn = alloc_mfn(netbk);
475 * Set the new P2M table entry before
476 * reassigning the old data page. Heed the
477 * comment in pgtable-2level.h:pte_page(). :-)
479 set_phys_to_machine(page_to_pfn(page), new_mfn);
481 mcl = npo->mcl + npo->mcl_prod++;
482 MULTI_update_va_mapping(mcl,
483 (unsigned long)page_address(page),
484 pfn_pte_ma(new_mfn, PAGE_KERNEL),
487 mmu = npo->mmu + npo->mmu_prod++;
488 mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
490 mmu->val = page_to_pfn(page);
493 gop = npo->trans + npo->trans_prod++;
495 gop->domid = netif->domid;
496 gop->ref = req->gref;
501 static void netbk_gop_skb(struct sk_buff *skb,
502 struct netrx_pending_operations *npo)
504 netif_t *netif = netdev_priv(skb->dev);
505 int nr_frags = skb_shinfo(skb)->nr_frags;
508 struct netbk_rx_meta *head_meta, *meta;
510 head_meta = npo->meta + npo->meta_prod++;
511 head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
512 head_meta->frag.size = skb_shinfo(skb)->gso_size;
513 extra = !!head_meta->frag.size + 1;
515 for (i = 0; i < nr_frags; i++) {
516 meta = npo->meta + npo->meta_prod++;
517 meta->frag = skb_shinfo(skb)->frags[i];
518 meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
519 skb_frag_page(&meta->frag),
520 skb_frag_size(&meta->frag),
521 meta->frag.page_offset);
525 * This must occur at the end to ensure that we don't trash skb_shinfo
526 * until we're done. We know that the head doesn't cross a page
527 * boundary because such packets get copied in netif_be_start_xmit.
529 head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
530 virt_to_page(skb->data),
532 offset_in_page(skb->data));
534 netif->rx.req_cons += nr_frags + extra;
537 static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
541 for (i = 0; i < nr_frags; i++)
542 put_page(skb_frag_page(&meta[i].frag));
545 /* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
546 used to set up the operations on the top of
547 netrx_pending_operations, which have since been done. Check that
548 they didn't give any errors and advance over them. */
549 static int netbk_check_gop(int nr_frags, domid_t domid, struct netrx_pending_operations *npo)
551 multicall_entry_t *mcl;
552 gnttab_transfer_t *gop;
553 gnttab_copy_t *copy_op;
554 int status = XEN_NETIF_RSP_OKAY;
557 for (i = 0; i <= nr_frags; i++) {
558 if (npo->meta[npo->meta_cons + i].copy) {
559 copy_op = npo->copy + npo->copy_cons++;
560 if (unlikely(copy_op->status == GNTST_eagain))
561 gnttab_check_GNTST_eagain_while(GNTTABOP_copy, copy_op);
562 if (unlikely(copy_op->status != GNTST_okay)) {
563 DPRINTK("Bad status %d from copy to DOM%d.\n",
564 copy_op->status, domid);
565 status = XEN_NETIF_RSP_ERROR;
568 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
569 mcl = npo->mcl + npo->mcl_cons++;
570 /* The update_va_mapping() must not fail. */
571 BUG_ON(mcl->result != 0);
574 gop = npo->trans + npo->trans_cons++;
575 /* Check the reassignment error code. */
576 if (unlikely(gop->status != GNTST_okay)) {
577 DPRINTK("Bad status %d from grant transfer to DOM%u\n",
580 * Page no longer belongs to us unless
581 * GNTST_bad_page, but that should be
582 * a fatal error anyway.
584 BUG_ON(gop->status == GNTST_bad_page);
585 status = XEN_NETIF_RSP_ERROR;
593 static void netbk_add_frag_responses(netif_t *netif, int status,
594 struct netbk_rx_meta *meta, int nr_frags)
597 unsigned long offset;
599 for (i = 0; i < nr_frags; i++) {
601 int flags = (i == nr_frags - 1) ? 0 : XEN_NETRXF_more_data;
606 offset = meta[i].frag.page_offset;
607 make_rx_response(netif, id, status, offset,
608 meta[i].frag.size, flags);
612 static void net_rx_action(unsigned long group)
614 netif_t *netif = NULL;
617 netif_rx_response_t *resp;
618 multicall_entry_t *mcl;
619 struct sk_buff_head rxq;
625 unsigned long offset;
626 struct xen_netbk *netbk = &xen_netbk[group];
628 struct netrx_pending_operations npo = {
629 .mmu = netbk->rx_mmu,
630 .trans = netbk->grant_trans_op,
631 .copy = netbk->grant_copy_op,
632 .mcl = netbk->rx_mcl,
636 skb_queue_head_init(&rxq);
640 while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
641 nr_frags = skb_shinfo(skb)->nr_frags;
642 *(int *)skb->cb = nr_frags;
644 if (!xen_feature(XENFEAT_auto_translated_physmap) &&
645 !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
646 check_mfn(netbk, nr_frags + 1)) {
647 /* Memory squeeze? Back off for an arbitrary while. */
648 if ( net_ratelimit() )
649 WPRINTK("Memory squeeze in netback "
651 mod_timer(&netbk->net_timer, jiffies + HZ);
652 skb_queue_head(&netbk->rx_queue, skb);
656 netbk_gop_skb(skb, &npo);
658 count += nr_frags + 1;
660 __skb_queue_tail(&rxq, skb);
662 /* Filled the batch queue? */
663 if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
667 BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
669 npo.mmu_mcl = npo.mcl_prod;
671 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
672 BUG_ON(npo.mmu_prod > ARRAY_SIZE(netbk->rx_mmu));
673 mcl = npo.mcl + npo.mcl_prod++;
675 BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
676 mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
678 mcl->op = __HYPERVISOR_mmu_update;
679 mcl->args[0] = (unsigned long)netbk->rx_mmu;
680 mcl->args[1] = npo.mmu_prod;
682 mcl->args[3] = DOMID_SELF;
685 if (npo.trans_prod) {
686 BUG_ON(npo.trans_prod > ARRAY_SIZE(netbk->grant_trans_op));
687 mcl = npo.mcl + npo.mcl_prod++;
688 mcl->op = __HYPERVISOR_grant_table_op;
689 mcl->args[0] = GNTTABOP_transfer;
690 mcl->args[1] = (unsigned long)netbk->grant_trans_op;
691 mcl->args[2] = npo.trans_prod;
695 BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
696 mcl = npo.mcl + npo.mcl_prod++;
697 mcl->op = __HYPERVISOR_grant_table_op;
698 mcl->args[0] = GNTTABOP_copy;
699 mcl->args[1] = (unsigned long)netbk->grant_copy_op;
700 mcl->args[2] = npo.copy_prod;
707 BUG_ON(npo.mcl_prod > ARRAY_SIZE(netbk->rx_mcl));
709 ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
711 /* The mmu_machphys_update() must not fail. */
712 BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
714 while ((skb = __skb_dequeue(&rxq)) != NULL) {
715 nr_frags = *(int *)skb->cb;
717 netif = netdev_priv(skb->dev);
719 status = netbk_check_gop(nr_frags, netif->domid, &npo);
721 /* We can't rely on skb_release_data to release the
722 pages used by fragments for us, since it tries to
723 touch the pages in the fraglist. If we're in
724 flipping mode, that doesn't work. In copying mode,
725 we still have access to all of the pages, and so
726 it's safe to let release_data deal with it. */
727 /* (Freeing the fragments is safe since we copy
728 non-linear skbs destined for flipping interfaces) */
729 if (!netif->copying_receiver) {
730 atomic_set(&(skb_shinfo(skb)->dataref), 1);
731 skb_shinfo(skb)->frag_list = NULL;
732 skb_shinfo(skb)->nr_frags = 0;
733 netbk_free_pages(nr_frags, netbk->meta + npo.meta_cons + 1);
736 skb->dev->stats.tx_bytes += skb->len;
737 skb->dev->stats.tx_packets++;
739 id = netbk->meta[npo.meta_cons].id;
740 flags = nr_frags ? XEN_NETRXF_more_data : 0;
742 switch (skb->ip_summed) {
743 case CHECKSUM_PARTIAL: /* local packet? */
744 flags |= XEN_NETRXF_csum_blank |
745 XEN_NETRXF_data_validated;
747 case CHECKSUM_UNNECESSARY: /* remote but checksummed? */
748 flags |= XEN_NETRXF_data_validated;
752 if (netbk->meta[npo.meta_cons].copy)
755 offset = offset_in_page(skb->data);
756 resp = make_rx_response(netif, id, status, offset,
757 skb_headlen(skb), flags);
759 if (netbk->meta[npo.meta_cons].frag.size) {
760 struct netif_extra_info *gso =
761 (struct netif_extra_info *)
762 RING_GET_RESPONSE(&netif->rx,
763 netif->rx.rsp_prod_pvt++);
765 resp->flags |= XEN_NETRXF_extra_info;
767 gso->u.gso.size = netbk->meta[npo.meta_cons].frag.size;
768 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
770 gso->u.gso.features = 0;
772 gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
776 netbk_add_frag_responses(netif, status,
777 netbk->meta + npo.meta_cons + 1,
780 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
781 irq = netif->irq - DYNIRQ_BASE;
782 if (ret && !__test_and_set_bit(irq, netbk->rx_notify))
783 netbk->notify_list[notify_nr++] = irq;
785 if (netif_queue_stopped(netif->dev) &&
786 netif_schedulable(netif) &&
787 !netbk_queue_full(netif))
788 netif_wake_queue(netif->dev);
793 npo.meta_cons += nr_frags + 1;
796 if (notify_nr == 1) {
797 irq = *netbk->notify_list;
798 __clear_bit(irq, netbk->rx_notify);
799 notify_remote_via_irq(irq + DYNIRQ_BASE);
801 for (count = ret = 0; ret < notify_nr; ++ret) {
802 irq = netbk->notify_list[ret];
803 __clear_bit(irq, netbk->rx_notify);
804 if (!multi_notify_remote_via_irq(netbk->rx_mcl + count,
808 if (HYPERVISOR_multicall(netbk->rx_mcl, count))
812 /* More work to do? */
813 if (!skb_queue_empty(&netbk->rx_queue) &&
814 !timer_pending(&netbk->net_timer))
815 netbk_schedule(netbk);
818 xen_network_done_notify();
822 static int __on_net_schedule_list(netif_t *netif)
824 return netif->list.next != NULL;
827 /* Must be called with netbk->schedule_list_lock held. */
828 static void remove_from_net_schedule_list(netif_t *netif)
830 if (likely(__on_net_schedule_list(netif))) {
831 list_del(&netif->list);
832 netif->list.next = NULL;
837 static netif_t *poll_net_schedule_list(struct xen_netbk *netbk)
839 netif_t *netif = NULL;
841 spin_lock_irq(&netbk->schedule_list_lock);
842 if (!list_empty(&netbk->schedule_list)) {
843 netif = list_first_entry(&netbk->schedule_list, netif_t, list);
845 remove_from_net_schedule_list(netif);
847 spin_unlock_irq(&netbk->schedule_list_lock);
851 static void add_to_net_schedule_list_tail(netif_t *netif)
853 struct xen_netbk *netbk = &xen_netbk[GET_GROUP_INDEX(netif)];
856 if (__on_net_schedule_list(netif))
859 spin_lock_irqsave(&netbk->schedule_list_lock, flags);
860 if (!__on_net_schedule_list(netif) &&
861 likely(netif_schedulable(netif))) {
862 list_add_tail(&netif->list, &netbk->schedule_list);
865 spin_unlock_irqrestore(&netbk->schedule_list_lock, flags);
869 * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
870 * If this driver is pipelining transmit requests then we can be very
871 * aggressive in avoiding new-packet notifications -- frontend only needs to
872 * send a notification if there are no outstanding unreceived responses.
873 * If we may be buffer transmit buffers for any reason then we must be rather
874 * more conservative and treat this as the final check for pending work.
876 void netif_schedule_work(netif_t *netif)
880 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
881 more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
883 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
887 add_to_net_schedule_list_tail(netif);
888 maybe_schedule_tx_action(GET_GROUP_INDEX(netif));
892 void netif_deschedule_work(netif_t *netif)
894 struct xen_netbk *netbk = &xen_netbk[GET_GROUP_INDEX(netif)];
896 spin_lock_irq(&netbk->schedule_list_lock);
897 remove_from_net_schedule_list(netif);
898 spin_unlock_irq(&netbk->schedule_list_lock);
902 static void tx_add_credit(netif_t *netif)
904 unsigned long max_burst, max_credit;
907 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
908 * Otherwise the interface can seize up due to insufficient credit.
910 max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
911 max_burst = min(max_burst, 131072UL);
912 max_burst = max(max_burst, netif->credit_bytes);
914 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
915 max_credit = netif->remaining_credit + netif->credit_bytes;
916 if (max_credit < netif->remaining_credit)
917 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
919 netif->remaining_credit = min(max_credit, max_burst);
922 static void tx_credit_callback(unsigned long data)
924 netif_t *netif = (netif_t *)data;
925 tx_add_credit(netif);
926 netif_schedule_work(netif);
929 static inline int copy_pending_req(struct xen_netbk *netbk,
930 pending_ring_idx_t pending_idx)
932 return gnttab_copy_grant_page(netbk->grant_tx_handle[pending_idx],
933 &netbk->mmap_pages[pending_idx]);
936 static void permute_dealloc_ring(u16 *dealloc_ring, pending_ring_idx_t dc,
937 pending_ring_idx_t dp)
939 static unsigned random_src = 0x12345678;
941 pending_ring_idx_t dest;
945 dst_offset = (random_src / 256) % (dp - dc);
946 dest = dc + dst_offset;
947 tmp = dealloc_ring[MASK_PEND_IDX(dest)];
948 dealloc_ring[MASK_PEND_IDX(dest)] =
949 dealloc_ring[MASK_PEND_IDX(dc)];
950 dealloc_ring[MASK_PEND_IDX(dc)] = tmp;
956 inline static void net_tx_action_dealloc(struct xen_netbk *netbk)
958 struct netbk_tx_pending_inuse *inuse, *n;
959 gnttab_unmap_grant_ref_t *gop;
961 pending_ring_idx_t dc, dp;
965 dc = netbk->dealloc_cons;
966 gop = netbk->tx_unmap_ops;
969 * Free up any grants we have finished using
972 dp = netbk->dealloc_prod;
974 /* Ensure we see all indices enqueued by netif_idx_release(). */
977 if (MODPARM_permute_returns && netbk_nr_groups == 1)
978 permute_dealloc_ring(netbk->dealloc_ring, dc, dp);
982 struct netbk_tx_pending_inuse *pending_inuse =
983 netbk->pending_inuse;
985 pending_idx = netbk->dealloc_ring[MASK_PEND_IDX(dc++)];
986 list_move_tail(&pending_inuse[pending_idx].list, &list);
988 pfn = idx_to_pfn(netbk, pending_idx);
989 /* Already unmapped? */
990 if (!phys_to_machine_mapping_valid(pfn))
993 gnttab_set_unmap_op(gop, idx_to_kaddr(netbk, pending_idx),
995 netbk->grant_tx_handle[pending_idx]);
999 } while (dp != netbk->dealloc_prod);
1001 netbk->dealloc_cons = dc;
1003 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
1004 netbk->tx_unmap_ops,
1005 gop - netbk->tx_unmap_ops))
1008 /* Copy any entries that have been pending for too long. */
1009 if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
1010 !list_empty(&netbk->pending_inuse_head)) {
1011 list_for_each_entry_safe(inuse, n, &netbk->pending_inuse_head, list) {
1012 struct pending_tx_info *pending_tx_info
1013 = netbk->pending_tx_info;
1015 if (time_after(inuse->alloc_time + HZ / 2, jiffies))
1018 pending_idx = inuse - netbk->pending_inuse;
1020 pending_tx_info[pending_idx].netif->nr_copied_skbs++;
1022 switch (copy_pending_req(netbk, pending_idx)) {
1024 list_move_tail(&inuse->list, &list);
1027 list_del_init(&inuse->list);
1037 list_for_each_entry_safe(inuse, n, &list, list) {
1038 struct pending_tx_info *pending_tx_info =
1039 netbk->pending_tx_info;
1041 pending_idx = inuse - netbk->pending_inuse;
1042 netif = pending_tx_info[pending_idx].netif;
1044 make_tx_response(netif, &pending_tx_info[pending_idx].req,
1045 XEN_NETIF_RSP_OKAY);
1047 /* Ready for next use. */
1048 gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
1050 netbk->pending_ring[MASK_PEND_IDX(netbk->pending_prod++)] =
1055 list_del_init(&inuse->list);
1059 static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
1061 RING_IDX cons = netif->tx.req_cons;
1064 make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
1067 txp = RING_GET_REQUEST(&netif->tx, cons++);
1069 netif->tx.req_cons = cons;
1070 netif_schedule_work(netif);
1074 static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
1075 netif_tx_request_t *txp, int work_to_do)
1077 RING_IDX cons = netif->tx.req_cons;
1080 if (!(first->flags & XEN_NETTXF_more_data))
1084 if (frags >= work_to_do) {
1085 DPRINTK("Need more frags\n");
1089 if (unlikely(frags >= MAX_SKB_FRAGS)) {
1090 DPRINTK("Too many frags\n");
1094 memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
1096 if (txp->size > first->size) {
1097 DPRINTK("Frags galore\n");
1101 first->size -= txp->size;
1104 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
1105 DPRINTK("txp->offset: %x, size: %u\n",
1106 txp->offset, txp->size);
1109 } while ((txp++)->flags & XEN_NETTXF_more_data);
1114 static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
1115 struct sk_buff *skb,
1116 netif_tx_request_t *txp,
1117 gnttab_map_grant_ref_t *mop)
1119 struct skb_shared_info *shinfo = skb_shinfo(skb);
1120 skb_frag_t *frags = shinfo->frags;
1121 u16 pending_idx = *(u16 *)skb->data;
1124 /* Skip first skb fragment if it is on same page as header fragment. */
1125 start = (frag_get_pending_idx(frags) == pending_idx);
1127 for (i = start; i < shinfo->nr_frags; i++, txp++) {
1128 struct xen_netbk *netbk = &xen_netbk[GET_GROUP_INDEX(netif)];
1129 pending_ring_idx_t index = MASK_PEND_IDX(netbk->pending_cons++);
1130 struct pending_tx_info *pending_tx_info =
1131 netbk->pending_tx_info;
1133 pending_idx = netbk->pending_ring[index];
1135 gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
1136 GNTMAP_host_map | GNTMAP_readonly,
1137 txp->gref, netif->domid);
1139 memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
1141 pending_tx_info[pending_idx].netif = netif;
1142 frag_set_pending_idx(&frags[i], pending_idx);
1148 static int netbk_tx_check_mop(struct xen_netbk *netbk, struct sk_buff *skb,
1149 gnttab_map_grant_ref_t **mopp)
1151 gnttab_map_grant_ref_t *mop = *mopp;
1152 u16 pending_idx = *(u16 *)skb->data;
1153 struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
1154 netif_t *netif = pending_tx_info[pending_idx].netif;
1155 netif_tx_request_t *txp;
1156 struct skb_shared_info *shinfo = skb_shinfo(skb);
1157 int nr_frags = shinfo->nr_frags;
1160 /* Check status of header. */
1162 if (unlikely(err != GNTST_okay)) {
1163 pending_ring_idx_t index = MASK_PEND_IDX(netbk->pending_prod++);
1165 txp = &pending_tx_info[pending_idx].req;
1166 make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
1167 netbk->pending_ring[index] = pending_idx;
1170 set_phys_to_machine(idx_to_pfn(netbk, pending_idx),
1171 FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
1172 netbk->grant_tx_handle[pending_idx] = mop->handle;
1175 /* Skip first skb fragment if it is on same page as header fragment. */
1176 start = (frag_get_pending_idx(shinfo->frags) == pending_idx);
1178 for (i = start; i < nr_frags; i++) {
1180 pending_ring_idx_t index;
1182 pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
1184 /* Check error status: if okay then remember grant handle. */
1185 newerr = (++mop)->status;
1186 if (likely(newerr == GNTST_okay)) {
1187 set_phys_to_machine(idx_to_pfn(netbk, pending_idx),
1188 FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
1189 netbk->grant_tx_handle[pending_idx] = mop->handle;
1190 /* Had a previous error? Invalidate this fragment. */
1191 if (unlikely(err != GNTST_okay))
1192 netif_idx_release(netbk, pending_idx);
1196 /* Error on this fragment: respond to client with an error. */
1197 txp = &pending_tx_info[pending_idx].req;
1198 make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
1199 index = MASK_PEND_IDX(netbk->pending_prod++);
1200 netbk->pending_ring[index] = pending_idx;
1203 /* Not the first error? Preceding frags already invalidated. */
1204 if (err != GNTST_okay)
1207 /* First error: invalidate header and preceding fragments. */
1208 pending_idx = *((u16 *)skb->data);
1209 netif_idx_release(netbk, pending_idx);
1210 for (j = start; j < i; j++) {
1211 pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
1212 netif_idx_release(netbk, pending_idx);
1215 /* Remember the error: invalidate all subsequent fragments. */
1223 static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
1225 struct skb_shared_info *shinfo = skb_shinfo(skb);
1226 int nr_frags = shinfo->nr_frags;
1229 for (i = 0; i < nr_frags; i++) {
1230 netif_tx_request_t *txp;
1231 u16 pending_idx = frag_get_pending_idx(shinfo->frags + i);
1233 netbk->pending_inuse[pending_idx].alloc_time = jiffies;
1234 list_add_tail(&netbk->pending_inuse[pending_idx].list,
1235 &netbk->pending_inuse_head);
1237 txp = &netbk->pending_tx_info[pending_idx].req;
1238 __skb_fill_page_desc(skb, i, netbk->mmap_pages[pending_idx],
1239 txp->offset, txp->size);
1241 skb->len += txp->size;
1242 skb->data_len += txp->size;
1243 skb->truesize += txp->size;
1247 int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
1250 struct netif_extra_info extra;
1251 RING_IDX cons = netif->tx.req_cons;
1254 if (unlikely(work_to_do-- <= 0)) {
1255 DPRINTK("Missing extra info\n");
1259 memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
1261 if (unlikely(!extra.type ||
1262 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1263 netif->tx.req_cons = ++cons;
1264 DPRINTK("Invalid extra type: %d\n", extra.type);
1268 memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
1269 netif->tx.req_cons = ++cons;
1270 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
1275 static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
1277 if (!gso->u.gso.size) {
1278 DPRINTK("GSO size must not be zero.\n");
1282 /* Currently only TCPv4 S.O. is supported. */
1283 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
1284 DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
1288 skb_shinfo(skb)->gso_size = gso->u.gso.size;
1289 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1291 /* Header must be checked, and gso_segs computed. */
1292 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1293 skb_shinfo(skb)->gso_segs = 0;
1298 /* Called after netfront has transmitted */
1299 static void net_tx_action(unsigned long group)
1301 struct xen_netbk *netbk = &xen_netbk[group];
1302 struct sk_buff *skb;
1304 netif_tx_request_t txreq;
1305 netif_tx_request_t txfrags[MAX_SKB_FRAGS];
1306 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
1309 gnttab_map_grant_ref_t *mop;
1310 unsigned int data_len;
1311 int ret, work_to_do;
1313 net_tx_action_dealloc(netbk);
1315 mop = netbk->tx_map_ops;
1316 BUILD_BUG_ON(MAX_SKB_FRAGS >= MAX_PENDING_REQS);
1317 while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
1318 !list_empty(&netbk->schedule_list)) {
1319 /* Get a netif from the list with work to do. */
1320 netif = poll_net_schedule_list(netbk);
1324 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
1330 i = netif->tx.req_cons;
1331 rmb(); /* Ensure that we see the request before we copy it. */
1332 memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
1334 /* Credit-based scheduling. */
1335 if (txreq.size > netif->remaining_credit) {
1336 unsigned long now = jiffies;
1337 unsigned long next_credit =
1338 netif->credit_timeout.expires +
1339 msecs_to_jiffies(netif->credit_usec / 1000);
1341 /* Timer could already be pending in rare cases. */
1342 if (timer_pending(&netif->credit_timeout)) {
1347 /* Passed the point where we can replenish credit? */
1348 if (time_after_eq(now, next_credit)) {
1349 netif->credit_timeout.expires = now;
1350 tx_add_credit(netif);
1353 /* Still too big to send right now? Set a callback. */
1354 if (txreq.size > netif->remaining_credit) {
1355 netif->credit_timeout.data =
1356 (unsigned long)netif;
1357 netif->credit_timeout.function =
1359 mod_timer(&netif->credit_timeout, next_credit);
1364 netif->remaining_credit -= txreq.size;
1367 netif->tx.req_cons = ++i;
1369 memset(extras, 0, sizeof(extras));
1370 if (txreq.flags & XEN_NETTXF_extra_info) {
1371 work_to_do = netbk_get_extras(netif, extras,
1373 i = netif->tx.req_cons;
1374 if (unlikely(work_to_do < 0)) {
1375 netbk_tx_err(netif, &txreq, i);
1380 ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
1381 if (unlikely(ret < 0)) {
1382 netbk_tx_err(netif, &txreq, i - ret);
1387 if (unlikely(txreq.size < ETH_HLEN)) {
1388 DPRINTK("Bad packet size: %d\n", txreq.size);
1389 netbk_tx_err(netif, &txreq, i);
1393 /* No crossing a page as the payload mustn't fragment. */
1394 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
1395 DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
1396 txreq.offset, txreq.size,
1397 (txreq.offset &~PAGE_MASK) + txreq.size);
1398 netbk_tx_err(netif, &txreq, i);
1402 pending_idx = netbk->pending_ring[MASK_PEND_IDX(netbk->pending_cons)];
1404 data_len = (txreq.size > PKT_PROT_LEN &&
1405 ret < MAX_SKB_FRAGS) ?
1406 PKT_PROT_LEN : txreq.size;
1408 skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
1409 GFP_ATOMIC | __GFP_NOWARN);
1410 if (unlikely(skb == NULL)) {
1411 DPRINTK("Can't allocate a skb in start_xmit.\n");
1412 netbk_tx_err(netif, &txreq, i);
1416 /* Packets passed to netif_rx() must have some headroom. */
1417 skb_reserve(skb, 16 + NET_IP_ALIGN);
1419 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1420 struct netif_extra_info *gso;
1421 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1423 if (netbk_set_skb_gso(skb, gso)) {
1425 netbk_tx_err(netif, &txreq, i);
1430 gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
1431 GNTMAP_host_map | GNTMAP_readonly,
1432 txreq.gref, netif->domid);
1435 memcpy(&netbk->pending_tx_info[pending_idx].req,
1436 &txreq, sizeof(txreq));
1437 netbk->pending_tx_info[pending_idx].netif = netif;
1438 *((u16 *)skb->data) = pending_idx;
1440 __skb_put(skb, data_len);
1442 skb_shinfo(skb)->nr_frags = ret;
1443 if (data_len < txreq.size)
1444 skb_shinfo(skb)->nr_frags++;
1446 pending_idx = INVALID_PENDING_IDX;
1447 frag_set_pending_idx(skb_shinfo(skb)->frags, pending_idx);
1449 __skb_queue_tail(&netbk->tx_queue, skb);
1451 netbk->pending_cons++;
1453 mop = netbk_get_requests(netif, skb, txfrags, mop);
1455 netif->tx.req_cons = i;
1456 netif_schedule_work(netif);
1458 if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
1462 if (mop == netbk->tx_map_ops)
1465 /* NOTE: some maps may fail with GNTST_eagain, which could be successfully
1466 * retried in the backend after a delay. However, we can also fail the tx
1467 * req and let the frontend resend the relevant packet again. This is fine
1468 * because it is unlikely that a network buffer will be paged out or shared,
1469 * and therefore it is unlikely to fail with GNTST_eagain. */
1470 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
1472 mop - netbk->tx_map_ops);
1475 mop = netbk->tx_map_ops;
1476 while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
1477 struct net_device *dev;
1478 netif_tx_request_t *txp;
1480 pending_idx = *((u16 *)skb->data);
1481 netif = netbk->pending_tx_info[pending_idx].netif;
1483 txp = &netbk->pending_tx_info[pending_idx].req;
1485 /* Check the remap error code. */
1486 if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
1487 DPRINTK("netback grant failed.\n");
1488 skb_shinfo(skb)->nr_frags = 0;
1490 dev->stats.rx_dropped++;
1494 data_len = skb->len;
1496 (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
1498 if (data_len < txp->size) {
1499 /* Append the packet payload as a fragment. */
1500 txp->offset += data_len;
1501 txp->size -= data_len;
1503 /* Schedule a response immediately. */
1504 netif_idx_release(netbk, pending_idx);
1507 if (txp->flags & XEN_NETTXF_csum_blank)
1508 skb->ip_summed = CHECKSUM_PARTIAL;
1509 else if (txp->flags & XEN_NETTXF_data_validated)
1510 skb->ip_summed = CHECKSUM_UNNECESSARY;
1512 skb->ip_summed = CHECKSUM_NONE;
1514 netbk_fill_frags(netbk, skb);
1517 * If the initial fragment was < PKT_PROT_LEN then
1518 * pull through some bytes from the other fragments to
1519 * increase the linear region to PKT_PROT_LEN bytes.
1521 if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
1522 int target = min_t(int, skb->len, PKT_PROT_LEN);
1523 __pskb_pull_tail(skb, target - skb_headlen(skb));
1526 skb->protocol = eth_type_trans(skb, dev);
1528 if (skb_checksum_setup(skb, &netif->rx_gso_csum_fixups)) {
1529 DPRINTK("Can't setup checksum in net_tx_action\n");
1534 if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
1535 unlikely(skb_linearize(skb))) {
1536 DPRINTK("Can't linearize skb in net_tx_action.\n");
1538 dev->stats.rx_errors++;
1542 dev->stats.rx_bytes += skb->len;
1543 dev->stats.rx_packets++;
1552 if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
1553 !list_empty(&netbk->pending_inuse_head)) {
1554 struct netbk_tx_pending_inuse *oldest;
1556 oldest = list_entry(netbk->pending_inuse_head.next,
1557 struct netbk_tx_pending_inuse, list);
1558 mod_timer(&netbk->tx_pending_timer, oldest->alloc_time + HZ);
1562 static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
1564 unsigned long flags;
1566 spin_lock_irqsave(&netbk->release_lock, flags);
1567 netbk->dealloc_ring[MASK_PEND_IDX(netbk->dealloc_prod)] = pending_idx;
1568 /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
1570 netbk->dealloc_prod++;
1571 spin_unlock_irqrestore(&netbk->release_lock, flags);
1573 netbk_schedule(netbk);
1576 static void netif_page_release(struct page *page, unsigned int order)
1578 unsigned int idx = netif_page_index(page);
1579 unsigned int group = netif_page_group(page);
1580 struct xen_netbk *netbk = &xen_netbk[group];
1583 BUG_ON(group >= netbk_nr_groups || idx >= MAX_PENDING_REQS);
1584 BUG_ON(netbk->mmap_pages[idx] != page);
1585 netif_idx_release(netbk, idx);
1588 irqreturn_t netif_be_int(int irq, void *dev_id)
1590 netif_t *netif = dev_id;
1591 unsigned int group = GET_GROUP_INDEX(netif);
1593 if (unlikely(group >= netbk_nr_groups)) {
1595 * Short of having a way to bind the IRQ in disabled mode
1596 * (IRQ_NOAUTOEN), we have to ignore the first invocation(s)
1597 * (before we got assigned to a group).
1599 BUG_ON(group != UINT_MAX);
1603 add_to_net_schedule_list_tail(netif);
1604 maybe_schedule_tx_action(group);
1606 if (netif_schedulable(netif) && !netbk_queue_full(netif))
1607 netif_wake_queue(netif->dev);
1612 static void make_tx_response(netif_t *netif,
1613 netif_tx_request_t *txp,
1616 RING_IDX i = netif->tx.rsp_prod_pvt;
1617 netif_tx_response_t *resp;
1620 resp = RING_GET_RESPONSE(&netif->tx, i);
1624 if (txp->flags & XEN_NETTXF_extra_info)
1625 RING_GET_RESPONSE(&netif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
1627 netif->tx.rsp_prod_pvt = ++i;
1628 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
1630 notify_remote_via_irq(netif->irq);
1632 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
1633 if (i == netif->tx.req_cons) {
1635 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
1637 add_to_net_schedule_list_tail(netif);
1642 static netif_rx_response_t *make_rx_response(netif_t *netif,
1649 RING_IDX i = netif->rx.rsp_prod_pvt;
1650 netif_rx_response_t *resp;
1652 resp = RING_GET_RESPONSE(&netif->rx, i);
1653 resp->offset = offset;
1654 resp->flags = flags;
1656 resp->status = (s16)size;
1658 resp->status = (s16)st;
1660 netif->rx.rsp_prod_pvt = ++i;
1665 #ifdef NETBE_DEBUG_INTERRUPT
1666 static irqreturn_t netif_be_dbg(int irq, void *dev_id)
1669 unsigned int i = 0, group;
1671 pr_alert("netif_schedule_list:\n");
1673 for (group = 0; group < netbk_nr_groups; ++group) {
1674 struct xen_netbk *netbk = &xen_netbk[group];
1676 spin_lock_irq(&netbk->schedule_list_lock);
1678 list_for_each_entry(netif, &netbk->schedule_list, list) {
1679 pr_alert(" %d: private(rx_req_cons=%08x "
1680 "rx_resp_prod=%08x\n", i,
1681 netif->rx.req_cons, netif->rx.rsp_prod_pvt);
1682 pr_alert(" tx_req_cons=%08x tx_resp_prod=%08x)\n",
1683 netif->tx.req_cons, netif->tx.rsp_prod_pvt);
1684 pr_alert(" shared(rx_req_prod=%08x "
1685 "rx_resp_prod=%08x\n",
1686 netif->rx.sring->req_prod,
1687 netif->rx.sring->rsp_prod);
1688 pr_alert(" rx_event=%08x tx_req_prod=%08x\n",
1689 netif->rx.sring->rsp_event,
1690 netif->tx.sring->req_prod);
1691 pr_alert(" tx_resp_prod=%08x, tx_event=%08x)\n",
1692 netif->tx.sring->rsp_prod,
1693 netif->tx.sring->rsp_event);
1697 spin_unlock_irq(&netbk->netbk->schedule_list_lock);
1700 pr_alert(" ** End of netif_schedule_list **\n");
1705 static struct irqaction netif_be_dbg_action = {
1706 .handler = netif_be_dbg,
1707 .flags = IRQF_SHARED,
1708 .name = "net-be-dbg"
1712 static inline int rx_work_todo(struct xen_netbk *netbk)
1714 return !skb_queue_empty(&netbk->rx_queue);
1717 static inline int tx_work_todo(struct xen_netbk *netbk)
1719 if (netbk->dealloc_cons != netbk->dealloc_prod)
1722 if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
1723 !list_empty(&netbk->pending_inuse_head))
1726 if (nr_pending_reqs(netbk) + MAX_SKB_FRAGS < MAX_PENDING_REQS &&
1727 !list_empty(&netbk->schedule_list))
1733 static int netbk_action_thread(void *index)
1735 unsigned long group = (unsigned long)index;
1736 struct xen_netbk *netbk = &xen_netbk[group];
1738 while (!kthread_should_stop()) {
1739 wait_event_interruptible(netbk->netbk_action_wq,
1740 rx_work_todo(netbk) ||
1741 tx_work_todo(netbk) ||
1742 kthread_should_stop());
1745 if (rx_work_todo(netbk))
1746 net_rx_action(group);
1748 if (tx_work_todo(netbk))
1749 net_tx_action(group);
1756 static int __init netback_init(void)
1758 unsigned int i, group;
1762 if (!is_running_on_xen())
1765 group = netbk_nr_groups;
1766 if (!netbk_nr_groups)
1767 netbk_nr_groups = (num_online_cpus() + 1) / 2;
1768 if (netbk_nr_groups > MAX_GROUPS)
1769 netbk_nr_groups = MAX_GROUPS;
1772 xen_netbk = vzalloc(netbk_nr_groups * sizeof(*xen_netbk));
1773 } while (!xen_netbk && (netbk_nr_groups >>= 1));
1776 if (group && netbk_nr_groups != group)
1777 pr_warn("netback: only using %u (instead of %u) groups\n",
1778 netbk_nr_groups, group);
1780 /* We can increase reservation by this much in net_rx_action(). */
1781 balloon_update_driver_allowance(netbk_nr_groups * NET_RX_RING_SIZE);
1783 for (group = 0; group < netbk_nr_groups; group++) {
1784 struct xen_netbk *netbk = &xen_netbk[group];
1786 skb_queue_head_init(&netbk->rx_queue);
1787 skb_queue_head_init(&netbk->tx_queue);
1789 init_timer(&netbk->net_timer);
1790 netbk->net_timer.data = group;
1791 netbk->net_timer.function = netbk_schedule_group;
1793 init_timer(&netbk->tx_pending_timer);
1794 netbk->tx_pending_timer.data = group;
1795 netbk->tx_pending_timer.function = netbk_schedule_group;
1797 netbk->pending_prod = MAX_PENDING_REQS;
1799 INIT_LIST_HEAD(&netbk->pending_inuse_head);
1800 INIT_LIST_HEAD(&netbk->schedule_list);
1802 spin_lock_init(&netbk->schedule_list_lock);
1803 spin_lock_init(&netbk->release_lock);
1806 alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
1807 if (netbk->mmap_pages == NULL) {
1808 pr_err("%s: out of memory\n", __func__);
1813 for (i = 0; i < MAX_PENDING_REQS; i++) {
1814 page = netbk->mmap_pages[i];
1815 SetPageForeign(page, netif_page_release);
1816 netif_set_page_ext(page, group, i);
1817 netbk->pending_ring[i] = i;
1818 INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
1822 init_waitqueue_head(&netbk->netbk_action_wq);
1823 netbk->task = kthread_create(netbk_action_thread,
1824 (void *)(long)group,
1825 "netback/%u", group);
1827 if (IS_ERR(netbk->task)) {
1828 pr_err("netback: kthread_create() failed\n");
1829 rc = PTR_ERR(netbk->task);
1833 kthread_bind(netbk->task,
1834 group % num_online_cpus());
1835 wake_up_process(netbk->task);
1837 tasklet_init(&netbk->net_tx_tasklet, net_tx_action, group);
1838 tasklet_init(&netbk->net_rx_tasklet, net_rx_action, group);
1842 netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
1843 if (MODPARM_copy_skb) {
1844 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
1846 netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
1848 netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
1853 netif_xenbus_init();
1855 #ifdef NETBE_DEBUG_INTERRUPT
1856 (void)bind_virq_to_irqaction(VIRQ_DEBUG,
1858 &netif_be_dbg_action);
1865 struct xen_netbk *netbk = &xen_netbk[group];
1867 if (use_kthreads && netbk->task && !IS_ERR(netbk->task))
1868 kthread_stop(netbk->task);
1869 if (netbk->mmap_pages)
1870 free_empty_pages_and_pagevec(netbk->mmap_pages,
1874 balloon_update_driver_allowance(-(long)netbk_nr_groups
1875 * NET_RX_RING_SIZE);
1880 module_init(netback_init);
1882 MODULE_LICENSE("Dual BSD/GPL");
1883 MODULE_ALIAS("xen-backend:vif");