1 /******************************************************************************
2 * drivers/xen/netback/netback.c
4 * Back-end of the driver for virtual network devices. This portion of the
5 * driver exports a 'unified' network-device interface that can be accessed
6 * by any operating system that implements a compatible front end. A
7 * reference front-end implementation can be found in:
8 * drivers/xen/netfront/netfront.c
10 * Copyright (c) 2002-2005, K A Fraser
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License version 2
14 * as published by the Free Software Foundation; or, when distributed
15 * separately from the Linux kernel or incorporated into other
16 * software packages, subject to the following license:
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this source file (the "Software"), to deal in the Software without
20 * restriction, including without limitation the rights to use, copy, modify,
21 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
22 * and to permit persons to whom the Software is furnished to do so, subject to
23 * the following conditions:
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
38 #include <linux/kthread.h>
39 #include <linux/vmalloc.h>
40 #include <xen/balloon.h>
41 #include <xen/interface/memory.h>
43 /*define NETBE_DEBUG_INTERRUPT*/
45 struct xen_netbk *__read_mostly xen_netbk;
46 unsigned int __read_mostly netbk_nr_groups;
47 static bool __read_mostly use_kthreads = true;
48 static bool __initdata bind_threads;
50 #define GET_GROUP_INDEX(netif) ((netif)->group)
52 static void netif_idx_release(struct xen_netbk *, u16 pending_idx);
53 static void make_tx_response(netif_t *netif,
54 netif_tx_request_t *txp,
56 static netif_rx_response_t *make_rx_response(netif_t *netif,
63 static void net_tx_action(unsigned long group);
64 static void net_rx_action(unsigned long group);
66 static inline unsigned long idx_to_pfn(struct xen_netbk *netbk, unsigned int idx)
68 return page_to_pfn(netbk->mmap_pages[idx]);
71 static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk, unsigned int idx)
73 return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
76 /* extra field used in struct page */
79 #if BITS_PER_LONG < 64
80 #define GROUP_WIDTH (BITS_PER_LONG - CONFIG_XEN_NETDEV_TX_SHIFT)
81 #define MAX_GROUPS ((1U << GROUP_WIDTH) - 1)
82 unsigned int grp:GROUP_WIDTH;
83 unsigned int idx:CONFIG_XEN_NETDEV_TX_SHIFT;
85 #define MAX_GROUPS UINT_MAX
86 unsigned int grp, idx;
92 static inline void netif_set_page_ext(struct page *pg, unsigned int group,
95 union page_ext ext = { .e = { .grp = group + 1, .idx = idx } };
97 BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
98 pg->mapping = ext.mapping;
101 static inline unsigned int netif_page_group(const struct page *pg)
103 union page_ext ext = { .mapping = pg->mapping };
105 return ext.e.grp - 1;
108 static inline unsigned int netif_page_index(const struct page *pg)
110 union page_ext ext = { .mapping = pg->mapping };
115 #define PKT_PROT_LEN 64
117 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
119 static inline pending_ring_idx_t nr_pending_reqs(const struct xen_netbk *netbk)
121 return MAX_PENDING_REQS -
122 netbk->pending_prod + netbk->pending_cons;
125 /* Setting this allows the safe use of this driver without netloop. */
126 static int MODPARM_copy_skb = 1;
127 module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
128 MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
129 static int MODPARM_permute_returns = 0;
130 module_param_named(permute_returns, MODPARM_permute_returns, bool, S_IRUSR|S_IWUSR);
131 MODULE_PARM_DESC(permute_returns, "Randomly permute the order in which TX responses are sent to the frontend");
132 module_param_named(groups, netbk_nr_groups, uint, 0);
133 MODULE_PARM_DESC(groups, "Specify the number of tasklet pairs/threads to use");
134 module_param_named(tasklets, use_kthreads, invbool, 0);
135 MODULE_PARM_DESC(tasklets, "Use tasklets instead of kernel threads");
136 module_param_named(bind, bind_threads, bool, 0);
137 MODULE_PARM_DESC(bind, "Bind kernel threads to (v)CPUs");
139 int netbk_copy_skb_mode;
141 static inline unsigned long alloc_mfn(struct xen_netbk *netbk)
143 BUG_ON(netbk->alloc_index == 0);
144 return netbk->mfn_list[--netbk->alloc_index];
147 static int check_mfn(struct xen_netbk *netbk, unsigned int nr)
149 struct xen_memory_reservation reservation = {
155 if (likely(netbk->alloc_index >= nr))
158 set_xen_guest_handle(reservation.extent_start,
159 netbk->mfn_list + netbk->alloc_index);
160 reservation.nr_extents = MAX_MFN_ALLOC - netbk->alloc_index;
161 rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
163 netbk->alloc_index += rc;
165 return netbk->alloc_index >= nr ? 0 : -ENOMEM;
168 static inline void maybe_schedule_tx_action(unsigned int group)
170 struct xen_netbk *netbk = &xen_netbk[group];
173 if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
174 !list_empty(&netbk->net_schedule_list)) {
176 wake_up(&netbk->netbk_action_wq);
178 tasklet_schedule(&netbk->net_tx_tasklet);
182 static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
184 struct skb_shared_info *ninfo;
185 struct sk_buff *nskb;
186 unsigned long offset;
191 BUG_ON(skb_shinfo(skb)->frag_list != NULL);
193 nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
197 skb_reserve(nskb, 16 + NET_IP_ALIGN);
198 headlen = skb_end_pointer(nskb) - nskb->data;
199 if (headlen > skb_headlen(skb))
200 headlen = skb_headlen(skb);
201 ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
204 ninfo = skb_shinfo(nskb);
205 ninfo->gso_size = skb_shinfo(skb)->gso_size;
206 ninfo->gso_type = skb_shinfo(skb)->gso_type;
209 len = skb->len - headlen;
211 nskb->len = skb->len;
212 nskb->data_len = len;
213 nskb->truesize += len;
220 if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
225 copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
226 zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
228 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
232 ret = skb_copy_bits(skb, offset, page_address(page), copy);
235 ninfo->frags[ninfo->nr_frags].page = page;
236 ninfo->frags[ninfo->nr_frags].page_offset = 0;
237 ninfo->frags[ninfo->nr_frags].size = copy;
244 #ifdef NET_SKBUFF_DATA_USES_OFFSET
247 offset = nskb->data - skb->data;
250 nskb->transport_header = skb->transport_header + offset;
251 nskb->network_header = skb->network_header + offset;
252 nskb->mac_header = skb->mac_header + offset;
262 static inline int netbk_max_required_rx_slots(netif_t *netif)
264 if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
265 return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
266 return 1; /* all in one */
269 static inline int netbk_queue_full(netif_t *netif)
271 RING_IDX peek = netif->rx_req_cons_peek;
272 RING_IDX needed = netbk_max_required_rx_slots(netif);
274 return ((netif->rx.sring->req_prod - peek) < needed) ||
275 ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
278 static void tx_queue_callback(unsigned long data)
280 netif_t *netif = (netif_t *)data;
281 if (netif_schedulable(netif))
282 netif_wake_queue(netif->dev);
285 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
287 netif_t *netif = netdev_priv(dev);
288 struct xen_netbk *netbk;
290 BUG_ON(skb->dev != dev);
292 /* Drop the packet if the target domain has no receive buffers. */
293 if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
297 * Copy the packet here if it's destined for a flipping interface
298 * but isn't flippable (e.g. extra references to data).
299 * XXX For now we also copy skbuffs whose head crosses a page
300 * boundary, because netbk_gop_skb can't handle them.
302 if (!netif->copying_receiver ||
303 ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
304 struct sk_buff *nskb = netbk_copy_skb(skb);
305 if ( unlikely(nskb == NULL) )
307 /* Copy only the header fields we use in this driver. */
308 nskb->dev = skb->dev;
309 nskb->ip_summed = skb->ip_summed;
310 nskb->proto_data_valid = skb->proto_data_valid;
315 netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
316 !!skb_shinfo(skb)->gso_size;
319 if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
320 netif->rx.sring->req_event = netif->rx_req_cons_peek +
321 netbk_max_required_rx_slots(netif);
322 mb(); /* request notification /then/ check & stop the queue */
323 if (netbk_queue_full(netif)) {
324 netif_stop_queue(dev);
326 * Schedule 500ms timeout to restart the queue, thus
327 * ensuring that an inactive queue will be drained.
328 * Packets will be immediately be dropped until more
329 * receive buffers become available (see
330 * netbk_queue_full() check above).
332 netif->tx_queue_timeout.data = (unsigned long)netif;
333 netif->tx_queue_timeout.function = tx_queue_callback;
334 mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
338 netbk = &xen_netbk[GET_GROUP_INDEX(netif)];
339 skb_queue_tail(&netbk->rx_queue, skb);
341 wake_up(&netbk->netbk_action_wq);
343 tasklet_schedule(&netbk->net_rx_tasklet);
348 netif->stats.tx_dropped++;
354 static void xen_network_done_notify(void)
356 static struct net_device *eth0_dev = NULL;
357 if (unlikely(eth0_dev == NULL))
358 eth0_dev = __dev_get_by_name(&init_net, "eth0");
362 * Add following to poll() function in NAPI driver (Tigon3 is example):
363 * if ( xen_network_done() )
364 * tg3_enable_ints(tp);
366 int xen_network_done(void)
368 return skb_queue_empty(&rx_queue);
372 struct netrx_pending_operations {
373 unsigned trans_prod, trans_cons;
374 unsigned mmu_prod, mmu_mcl;
375 unsigned mcl_prod, mcl_cons;
376 unsigned copy_prod, copy_cons;
377 unsigned meta_prod, meta_cons;
379 gnttab_transfer_t *trans;
381 multicall_entry_t *mcl;
382 struct netbk_rx_meta *meta;
385 /* Set up the grant operations for this fragment. If it's a flipping
386 interface, we also set up the unmap request from here. */
387 static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
388 int i, struct netrx_pending_operations *npo,
389 struct page *page, unsigned long size,
390 unsigned long offset)
393 gnttab_transfer_t *gop;
394 gnttab_copy_t *copy_gop;
395 multicall_entry_t *mcl;
396 netif_rx_request_t *req;
397 unsigned long old_mfn, new_mfn;
398 struct xen_netbk *netbk = &xen_netbk[GET_GROUP_INDEX(netif)];
400 old_mfn = virt_to_mfn(page_address(page));
402 req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
403 if (netif->copying_receiver) {
404 unsigned int group, idx;
406 /* The fragment needs to be copied rather than
409 copy_gop = npo->copy + npo->copy_prod++;
410 copy_gop->flags = GNTCOPY_dest_gref;
411 if (PageForeign(page) &&
412 page->mapping != NULL &&
413 (idx = netif_page_index(page)) < MAX_PENDING_REQS &&
414 (group = netif_page_group(page)) < netbk_nr_groups) {
415 struct pending_tx_info *src_pend;
417 netbk = &xen_netbk[group];
418 BUG_ON(netbk->mmap_pages[idx] != page);
419 src_pend = &netbk->pending_tx_info[idx];
420 BUG_ON(group != GET_GROUP_INDEX(src_pend->netif));
421 copy_gop->source.domid = src_pend->netif->domid;
422 copy_gop->source.u.ref = src_pend->req.gref;
423 copy_gop->flags |= GNTCOPY_source_gref;
425 copy_gop->source.domid = DOMID_SELF;
426 copy_gop->source.u.gmfn = old_mfn;
428 copy_gop->source.offset = offset;
429 copy_gop->dest.domid = netif->domid;
430 copy_gop->dest.offset = 0;
431 copy_gop->dest.u.ref = req->gref;
432 copy_gop->len = size;
435 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
436 new_mfn = alloc_mfn(netbk);
439 * Set the new P2M table entry before
440 * reassigning the old data page. Heed the
441 * comment in pgtable-2level.h:pte_page(). :-)
443 set_phys_to_machine(page_to_pfn(page), new_mfn);
445 mcl = npo->mcl + npo->mcl_prod++;
446 MULTI_update_va_mapping(mcl,
447 (unsigned long)page_address(page),
448 pfn_pte_ma(new_mfn, PAGE_KERNEL),
451 mmu = npo->mmu + npo->mmu_prod++;
452 mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
454 mmu->val = page_to_pfn(page);
457 gop = npo->trans + npo->trans_prod++;
459 gop->domid = netif->domid;
460 gop->ref = req->gref;
465 static void netbk_gop_skb(struct sk_buff *skb,
466 struct netrx_pending_operations *npo)
468 netif_t *netif = netdev_priv(skb->dev);
469 int nr_frags = skb_shinfo(skb)->nr_frags;
472 struct netbk_rx_meta *head_meta, *meta;
474 head_meta = npo->meta + npo->meta_prod++;
475 head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
476 head_meta->frag.size = skb_shinfo(skb)->gso_size;
477 extra = !!head_meta->frag.size + 1;
479 for (i = 0; i < nr_frags; i++) {
480 meta = npo->meta + npo->meta_prod++;
481 meta->frag = skb_shinfo(skb)->frags[i];
482 meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
485 meta->frag.page_offset);
489 * This must occur at the end to ensure that we don't trash skb_shinfo
490 * until we're done. We know that the head doesn't cross a page
491 * boundary because such packets get copied in netif_be_start_xmit.
493 head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
494 virt_to_page(skb->data),
496 offset_in_page(skb->data));
498 netif->rx.req_cons += nr_frags + extra;
501 static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
505 for (i = 0; i < nr_frags; i++)
506 put_page(meta[i].frag.page);
509 /* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
510 used to set up the operations on the top of
511 netrx_pending_operations, which have since been done. Check that
512 they didn't give any errors and advance over them. */
513 static int netbk_check_gop(int nr_frags, domid_t domid,
514 struct netrx_pending_operations *npo, int *eagain)
516 multicall_entry_t *mcl;
517 gnttab_transfer_t *gop;
518 gnttab_copy_t *copy_op;
519 int status = NETIF_RSP_OKAY;
524 for (i = 0; i <= nr_frags; i++) {
525 if (npo->meta[npo->meta_cons + i].copy) {
526 copy_op = npo->copy + npo->copy_cons++;
527 if (copy_op->status != GNTST_okay) {
528 DPRINTK("Bad status %d from copy to DOM%d.\n",
529 copy_op->status, domid);
530 status = NETIF_RSP_ERROR;
531 if(copy_op->status == GNTST_eagain)
535 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
536 mcl = npo->mcl + npo->mcl_cons++;
537 /* The update_va_mapping() must not fail. */
538 BUG_ON(mcl->result != 0);
541 gop = npo->trans + npo->trans_cons++;
542 /* Check the reassignment error code. */
543 if (gop->status != 0) {
544 DPRINTK("Bad status %d from grant transfer to DOM%u\n",
547 * Page no longer belongs to us unless
548 * GNTST_bad_page, but that should be
549 * a fatal error anyway.
551 BUG_ON(gop->status == GNTST_bad_page);
552 if(gop->status == GNTST_eagain)
554 status = NETIF_RSP_ERROR;
562 static void netbk_add_frag_responses(netif_t *netif, int status,
563 struct netbk_rx_meta *meta, int nr_frags)
566 unsigned long offset;
568 for (i = 0; i < nr_frags; i++) {
570 int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
575 offset = meta[i].frag.page_offset;
576 make_rx_response(netif, id, status, offset,
577 meta[i].frag.size, flags);
581 static void net_rx_action(unsigned long group)
583 netif_t *netif = NULL;
586 netif_rx_response_t *resp;
587 multicall_entry_t *mcl;
588 struct sk_buff_head rxq;
594 unsigned long offset;
595 struct xen_netbk *netbk = &xen_netbk[group];
597 struct netrx_pending_operations npo = {
598 .mmu = netbk->rx_mmu,
599 .trans = netbk->grant_trans_op,
600 .copy = netbk->grant_copy_op,
601 .mcl = netbk->rx_mcl,
605 skb_queue_head_init(&rxq);
609 while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
610 nr_frags = skb_shinfo(skb)->nr_frags;
611 *(int *)skb->cb = nr_frags;
613 if (!xen_feature(XENFEAT_auto_translated_physmap) &&
614 !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
615 check_mfn(netbk, nr_frags + 1)) {
616 /* Memory squeeze? Back off for an arbitrary while. */
617 if ( net_ratelimit() )
618 WPRINTK("Memory squeeze in netback "
620 mod_timer(&netbk->net_timer, jiffies + HZ);
621 skb_queue_head(&netbk->rx_queue, skb);
625 netbk_gop_skb(skb, &npo);
627 count += nr_frags + 1;
629 __skb_queue_tail(&rxq, skb);
631 /* Filled the batch queue? */
632 if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
636 BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
638 npo.mmu_mcl = npo.mcl_prod;
640 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
641 BUG_ON(npo.mmu_prod > ARRAY_SIZE(netbk->rx_mmu));
642 mcl = npo.mcl + npo.mcl_prod++;
644 BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
645 mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
647 mcl->op = __HYPERVISOR_mmu_update;
648 mcl->args[0] = (unsigned long)netbk->rx_mmu;
649 mcl->args[1] = npo.mmu_prod;
651 mcl->args[3] = DOMID_SELF;
654 if (npo.trans_prod) {
655 BUG_ON(npo.trans_prod > ARRAY_SIZE(netbk->grant_trans_op));
656 mcl = npo.mcl + npo.mcl_prod++;
657 mcl->op = __HYPERVISOR_grant_table_op;
658 mcl->args[0] = GNTTABOP_transfer;
659 mcl->args[1] = (unsigned long)netbk->grant_trans_op;
660 mcl->args[2] = npo.trans_prod;
664 BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
665 mcl = npo.mcl + npo.mcl_prod++;
666 mcl->op = __HYPERVISOR_grant_table_op;
667 mcl->args[0] = GNTTABOP_copy;
668 mcl->args[1] = (unsigned long)netbk->grant_copy_op;
669 mcl->args[2] = npo.copy_prod;
676 BUG_ON(npo.mcl_prod > ARRAY_SIZE(netbk->rx_mcl));
678 ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
680 /* The mmu_machphys_update() must not fail. */
681 BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
683 while ((skb = __skb_dequeue(&rxq)) != NULL) {
684 nr_frags = *(int *)skb->cb;
686 netif = netdev_priv(skb->dev);
688 status = netbk_check_gop(nr_frags, netif->domid, &npo, &eagain);
690 /* We can't rely on skb_release_data to release the
691 pages used by fragments for us, since it tries to
692 touch the pages in the fraglist. If we're in
693 flipping mode, that doesn't work. In copying mode,
694 we still have access to all of the pages, and so
695 it's safe to let release_data deal with it. */
696 /* (Freeing the fragments is safe since we copy
697 non-linear skbs destined for flipping interfaces) */
698 if (!netif->copying_receiver) {
700 * Cannot handle failed grant transfers at the moment (because
701 * mmu_updates likely completed)
704 atomic_set(&(skb_shinfo(skb)->dataref), 1);
705 skb_shinfo(skb)->frag_list = NULL;
706 skb_shinfo(skb)->nr_frags = 0;
707 netbk_free_pages(nr_frags, netbk->meta + npo.meta_cons + 1);
712 netif->stats.tx_bytes += skb->len;
713 netif->stats.tx_packets++;
716 id = netbk->meta[npo.meta_cons].id;
717 flags = nr_frags ? NETRXF_more_data : 0;
719 if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
720 flags |= NETRXF_csum_blank | NETRXF_data_validated;
721 else if (skb->proto_data_valid) /* remote but checksummed? */
722 flags |= NETRXF_data_validated;
724 if (netbk->meta[npo.meta_cons].copy)
727 offset = offset_in_page(skb->data);
728 resp = make_rx_response(netif, id, status, offset,
729 skb_headlen(skb), flags);
731 if (netbk->meta[npo.meta_cons].frag.size) {
732 struct netif_extra_info *gso =
733 (struct netif_extra_info *)
734 RING_GET_RESPONSE(&netif->rx,
735 netif->rx.rsp_prod_pvt++);
737 resp->flags |= NETRXF_extra_info;
739 gso->u.gso.size = netbk->meta[npo.meta_cons].frag.size;
740 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
742 gso->u.gso.features = 0;
744 gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
748 netbk_add_frag_responses(netif, status,
749 netbk->meta + npo.meta_cons + 1,
752 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
753 irq = netif->irq - DYNIRQ_BASE;
754 if (ret && !__test_and_set_bit(irq, netbk->rx_notify))
755 netbk->notify_list[notify_nr++] = irq;
757 if (netif_queue_stopped(netif->dev) &&
758 netif_schedulable(netif) &&
759 !netbk_queue_full(netif))
760 netif_wake_queue(netif->dev);
762 if(!eagain || netbk_queue_full(netif))
766 netif->stats.tx_dropped += !!eagain;
770 netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
771 !!skb_shinfo(skb)->gso_size;
772 skb_queue_head(&netbk->rx_queue, skb);
775 npo.meta_cons += nr_frags + 1;
778 if (notify_nr == 1) {
779 irq = *netbk->notify_list;
780 __clear_bit(irq, netbk->rx_notify);
781 notify_remote_via_irq(irq + DYNIRQ_BASE);
783 for (count = ret = 0; ret < notify_nr; ++ret) {
784 irq = netbk->notify_list[ret];
785 __clear_bit(irq, netbk->rx_notify);
786 if (!multi_notify_remote_via_irq(netbk->rx_mcl + count,
790 if (HYPERVISOR_multicall(netbk->rx_mcl, count))
794 /* More work to do? */
795 if (!skb_queue_empty(&netbk->rx_queue) &&
796 !timer_pending(&netbk->net_timer)) {
798 wake_up(&netbk->netbk_action_wq);
800 tasklet_schedule(&netbk->net_rx_tasklet);
804 xen_network_done_notify();
808 static void net_alarm(unsigned long group)
811 wake_up(&xen_netbk[group].netbk_action_wq);
813 tasklet_schedule(&xen_netbk[group].net_rx_tasklet);
816 static void netbk_tx_pending_timeout(unsigned long group)
819 wake_up(&xen_netbk[group].netbk_action_wq);
821 tasklet_schedule(&xen_netbk[group].net_tx_tasklet);
824 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
826 netif_t *netif = netdev_priv(dev);
827 return &netif->stats;
830 static int __on_net_schedule_list(netif_t *netif)
832 return netif->list.next != NULL;
835 static void remove_from_net_schedule_list(netif_t *netif)
837 struct xen_netbk *netbk = &xen_netbk[GET_GROUP_INDEX(netif)];
839 spin_lock_irq(&netbk->net_schedule_list_lock);
840 if (likely(__on_net_schedule_list(netif))) {
841 list_del(&netif->list);
842 netif->list.next = NULL;
845 spin_unlock_irq(&netbk->net_schedule_list_lock);
848 static void add_to_net_schedule_list_tail(netif_t *netif)
850 struct xen_netbk *netbk = &xen_netbk[GET_GROUP_INDEX(netif)];
852 if (__on_net_schedule_list(netif))
855 spin_lock_irq(&netbk->net_schedule_list_lock);
856 if (!__on_net_schedule_list(netif) &&
857 likely(netif_schedulable(netif))) {
858 list_add_tail(&netif->list, &netbk->net_schedule_list);
861 spin_unlock_irq(&netbk->net_schedule_list_lock);
865 * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
866 * If this driver is pipelining transmit requests then we can be very
867 * aggressive in avoiding new-packet notifications -- frontend only needs to
868 * send a notification if there are no outstanding unreceived responses.
869 * If we may be buffer transmit buffers for any reason then we must be rather
870 * more conservative and treat this as the final check for pending work.
872 void netif_schedule_work(netif_t *netif)
876 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
877 more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
879 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
883 add_to_net_schedule_list_tail(netif);
884 maybe_schedule_tx_action(GET_GROUP_INDEX(netif));
888 void netif_deschedule_work(netif_t *netif)
890 remove_from_net_schedule_list(netif);
894 static void tx_add_credit(netif_t *netif)
896 unsigned long max_burst, max_credit;
899 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
900 * Otherwise the interface can seize up due to insufficient credit.
902 max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
903 max_burst = min(max_burst, 131072UL);
904 max_burst = max(max_burst, netif->credit_bytes);
906 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
907 max_credit = netif->remaining_credit + netif->credit_bytes;
908 if (max_credit < netif->remaining_credit)
909 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
911 netif->remaining_credit = min(max_credit, max_burst);
914 static void tx_credit_callback(unsigned long data)
916 netif_t *netif = (netif_t *)data;
917 tx_add_credit(netif);
918 netif_schedule_work(netif);
921 static inline int copy_pending_req(struct xen_netbk *netbk,
922 pending_ring_idx_t pending_idx)
924 return gnttab_copy_grant_page(netbk->grant_tx_handle[pending_idx],
925 &netbk->mmap_pages[pending_idx]);
928 static void permute_dealloc_ring(u16 *dealloc_ring, pending_ring_idx_t dc,
929 pending_ring_idx_t dp)
931 static unsigned random_src = 0x12345678;
933 pending_ring_idx_t dest;
937 dst_offset = (random_src / 256) % (dp - dc);
938 dest = dc + dst_offset;
939 tmp = dealloc_ring[MASK_PEND_IDX(dest)];
940 dealloc_ring[MASK_PEND_IDX(dest)] =
941 dealloc_ring[MASK_PEND_IDX(dc)];
942 dealloc_ring[MASK_PEND_IDX(dc)] = tmp;
948 inline static void net_tx_action_dealloc(struct xen_netbk *netbk)
950 struct netbk_tx_pending_inuse *inuse, *n;
951 gnttab_unmap_grant_ref_t *gop;
953 pending_ring_idx_t dc, dp;
958 dc = netbk->dealloc_cons;
959 gop = netbk->tx_unmap_ops;
962 * Free up any grants we have finished using
965 dp = netbk->dealloc_prod;
967 /* Ensure we see all indices enqueued by netif_idx_release(). */
970 if (MODPARM_permute_returns)
971 permute_dealloc_ring(netbk->dealloc_ring, dc, dp);
975 struct netbk_tx_pending_inuse *pending_inuse =
976 netbk->pending_inuse;
978 pending_idx = netbk->dealloc_ring[MASK_PEND_IDX(dc++)];
979 list_move_tail(&pending_inuse[pending_idx].list, &list);
981 pfn = idx_to_pfn(netbk, pending_idx);
982 /* Already unmapped? */
983 if (!phys_to_machine_mapping_valid(pfn))
986 gnttab_set_unmap_op(gop, idx_to_kaddr(netbk, pending_idx),
988 netbk->grant_tx_handle[pending_idx]);
992 if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
993 list_empty(&netbk->pending_inuse_head))
996 /* Copy any entries that have been pending for too long. */
997 list_for_each_entry_safe(inuse, n, &netbk->pending_inuse_head, list) {
998 struct pending_tx_info *pending_tx_info
999 = netbk->pending_tx_info;
1001 if (time_after(inuse->alloc_time + HZ / 2, jiffies))
1004 pending_idx = inuse - netbk->pending_inuse;
1006 pending_tx_info[pending_idx].netif->nr_copied_skbs++;
1008 switch (copy_pending_req(netbk, pending_idx)) {
1010 list_move_tail(&inuse->list, &list);
1013 list_del_init(&inuse->list);
1021 } while (dp != netbk->dealloc_prod);
1023 netbk->dealloc_cons = dc;
1025 ret = HYPERVISOR_grant_table_op(
1026 GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
1027 gop - netbk->tx_unmap_ops);
1030 list_for_each_entry_safe(inuse, n, &list, list) {
1031 struct pending_tx_info *pending_tx_info =
1032 netbk->pending_tx_info;
1034 pending_idx = inuse - netbk->pending_inuse;
1035 netif = pending_tx_info[pending_idx].netif;
1037 make_tx_response(netif, &pending_tx_info[pending_idx].req,
1040 /* Ready for next use. */
1041 gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
1043 netbk->pending_ring[MASK_PEND_IDX(netbk->pending_prod++)] =
1048 list_del_init(&inuse->list);
1052 static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
1054 RING_IDX cons = netif->tx.req_cons;
1057 make_tx_response(netif, txp, NETIF_RSP_ERROR);
1060 txp = RING_GET_REQUEST(&netif->tx, cons++);
1062 netif->tx.req_cons = cons;
1063 netif_schedule_work(netif);
1067 static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
1068 netif_tx_request_t *txp, int work_to_do)
1070 RING_IDX cons = netif->tx.req_cons;
1073 if (!(first->flags & NETTXF_more_data))
1077 if (frags >= work_to_do) {
1078 DPRINTK("Need more frags\n");
1082 if (unlikely(frags >= MAX_SKB_FRAGS)) {
1083 DPRINTK("Too many frags\n");
1087 memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
1089 if (txp->size > first->size) {
1090 DPRINTK("Frags galore\n");
1094 first->size -= txp->size;
1097 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
1098 DPRINTK("txp->offset: %x, size: %u\n",
1099 txp->offset, txp->size);
1102 } while ((txp++)->flags & NETTXF_more_data);
1107 static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
1108 struct sk_buff *skb,
1109 netif_tx_request_t *txp,
1110 gnttab_map_grant_ref_t *mop)
1112 struct skb_shared_info *shinfo = skb_shinfo(skb);
1113 skb_frag_t *frags = shinfo->frags;
1114 unsigned long pending_idx = *((u16 *)skb->data);
1117 /* Skip first skb fragment if it is on same page as header fragment. */
1118 start = ((unsigned long)shinfo->frags[0].page == pending_idx);
1120 for (i = start; i < shinfo->nr_frags; i++, txp++) {
1121 struct xen_netbk *netbk = &xen_netbk[GET_GROUP_INDEX(netif)];
1122 pending_ring_idx_t index = MASK_PEND_IDX(netbk->pending_cons++);
1123 struct pending_tx_info *pending_tx_info =
1124 netbk->pending_tx_info;
1126 pending_idx = netbk->pending_ring[index];
1128 gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
1129 GNTMAP_host_map | GNTMAP_readonly,
1130 txp->gref, netif->domid);
1132 memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
1134 pending_tx_info[pending_idx].netif = netif;
1135 frags[i].page = (void *)pending_idx;
1141 static int netbk_tx_check_mop(struct xen_netbk *netbk, struct sk_buff *skb,
1142 gnttab_map_grant_ref_t **mopp)
1144 gnttab_map_grant_ref_t *mop = *mopp;
1145 int pending_idx = *((u16 *)skb->data);
1146 struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
1147 netif_t *netif = pending_tx_info[pending_idx].netif;
1148 netif_tx_request_t *txp;
1149 struct skb_shared_info *shinfo = skb_shinfo(skb);
1150 int nr_frags = shinfo->nr_frags;
1153 /* Check status of header. */
1155 if (unlikely(err)) {
1156 pending_ring_idx_t index = MASK_PEND_IDX(netbk->pending_prod++);
1158 txp = &pending_tx_info[pending_idx].req;
1159 make_tx_response(netif, txp, NETIF_RSP_ERROR);
1160 netbk->pending_ring[index] = pending_idx;
1163 set_phys_to_machine(idx_to_pfn(netbk, pending_idx),
1164 FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
1165 netbk->grant_tx_handle[pending_idx] = mop->handle;
1168 /* Skip first skb fragment if it is on same page as header fragment. */
1169 start = ((unsigned long)shinfo->frags[0].page == pending_idx);
1171 for (i = start; i < nr_frags; i++) {
1173 pending_ring_idx_t index;
1175 pending_idx = (unsigned long)shinfo->frags[i].page;
1177 /* Check error status: if okay then remember grant handle. */
1178 newerr = (++mop)->status;
1179 if (likely(!newerr)) {
1180 set_phys_to_machine(idx_to_pfn(netbk, pending_idx),
1181 FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
1182 netbk->grant_tx_handle[pending_idx] = mop->handle;
1183 /* Had a previous error? Invalidate this fragment. */
1185 netif_idx_release(netbk, pending_idx);
1189 /* Error on this fragment: respond to client with an error. */
1190 txp = &pending_tx_info[pending_idx].req;
1191 make_tx_response(netif, txp, NETIF_RSP_ERROR);
1192 index = MASK_PEND_IDX(netbk->pending_prod++);
1193 netbk->pending_ring[index] = pending_idx;
1196 /* Not the first error? Preceding frags already invalidated. */
1200 /* First error: invalidate header and preceding fragments. */
1201 pending_idx = *((u16 *)skb->data);
1202 netif_idx_release(netbk, pending_idx);
1203 for (j = start; j < i; j++) {
1204 pending_idx = (unsigned long)shinfo->frags[i].page;
1205 netif_idx_release(netbk, pending_idx);
1208 /* Remember the error: invalidate all subsequent fragments. */
1216 static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
1218 struct skb_shared_info *shinfo = skb_shinfo(skb);
1219 int nr_frags = shinfo->nr_frags;
1222 for (i = 0; i < nr_frags; i++) {
1223 skb_frag_t *frag = shinfo->frags + i;
1224 netif_tx_request_t *txp;
1225 unsigned long pending_idx;
1227 pending_idx = (unsigned long)frag->page;
1229 netbk->pending_inuse[pending_idx].alloc_time = jiffies;
1230 list_add_tail(&netbk->pending_inuse[pending_idx].list,
1231 &netbk->pending_inuse_head);
1233 txp = &netbk->pending_tx_info[pending_idx].req;
1234 frag->page = netbk->mmap_pages[pending_idx];
1235 frag->size = txp->size;
1236 frag->page_offset = txp->offset;
1238 skb->len += txp->size;
1239 skb->data_len += txp->size;
1240 skb->truesize += txp->size;
1244 int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
1247 struct netif_extra_info extra;
1248 RING_IDX cons = netif->tx.req_cons;
1251 if (unlikely(work_to_do-- <= 0)) {
1252 DPRINTK("Missing extra info\n");
1256 memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
1258 if (unlikely(!extra.type ||
1259 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1260 netif->tx.req_cons = ++cons;
1261 DPRINTK("Invalid extra type: %d\n", extra.type);
1265 memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
1266 netif->tx.req_cons = ++cons;
1267 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
1272 static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
1274 if (!gso->u.gso.size) {
1275 DPRINTK("GSO size must not be zero.\n");
1279 /* Currently only TCPv4 S.O. is supported. */
1280 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
1281 DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
1285 skb_shinfo(skb)->gso_size = gso->u.gso.size;
1286 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1288 /* Header must be checked, and gso_segs computed. */
1289 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1290 skb_shinfo(skb)->gso_segs = 0;
1295 /* Called after netfront has transmitted */
1296 static void net_tx_action(unsigned long group)
1298 struct xen_netbk *netbk = &xen_netbk[group];
1299 struct sk_buff *skb;
1301 netif_tx_request_t txreq;
1302 netif_tx_request_t txfrags[MAX_SKB_FRAGS];
1303 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
1306 gnttab_map_grant_ref_t *mop;
1307 unsigned int data_len;
1308 int ret, work_to_do;
1310 net_tx_action_dealloc(netbk);
1312 mop = netbk->tx_map_ops;
1313 BUILD_BUG_ON(MAX_SKB_FRAGS >= MAX_PENDING_REQS);
1314 while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
1315 !list_empty(&netbk->net_schedule_list)) {
1316 /* Get a netif from the list with work to do. */
1317 netif = list_first_entry(&netbk->net_schedule_list,
1320 remove_from_net_schedule_list(netif);
1322 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
1328 i = netif->tx.req_cons;
1329 rmb(); /* Ensure that we see the request before we copy it. */
1330 memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
1332 /* Credit-based scheduling. */
1333 if (txreq.size > netif->remaining_credit) {
1334 unsigned long now = jiffies;
1335 unsigned long next_credit =
1336 netif->credit_timeout.expires +
1337 msecs_to_jiffies(netif->credit_usec / 1000);
1339 /* Timer could already be pending in rare cases. */
1340 if (timer_pending(&netif->credit_timeout)) {
1345 /* Passed the point where we can replenish credit? */
1346 if (time_after_eq(now, next_credit)) {
1347 netif->credit_timeout.expires = now;
1348 tx_add_credit(netif);
1351 /* Still too big to send right now? Set a callback. */
1352 if (txreq.size > netif->remaining_credit) {
1353 netif->credit_timeout.data =
1354 (unsigned long)netif;
1355 netif->credit_timeout.function =
1357 mod_timer(&netif->credit_timeout, next_credit);
1362 netif->remaining_credit -= txreq.size;
1365 netif->tx.req_cons = ++i;
1367 memset(extras, 0, sizeof(extras));
1368 if (txreq.flags & NETTXF_extra_info) {
1369 work_to_do = netbk_get_extras(netif, extras,
1371 i = netif->tx.req_cons;
1372 if (unlikely(work_to_do < 0)) {
1373 netbk_tx_err(netif, &txreq, i);
1378 ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
1379 if (unlikely(ret < 0)) {
1380 netbk_tx_err(netif, &txreq, i - ret);
1385 if (unlikely(txreq.size < ETH_HLEN)) {
1386 DPRINTK("Bad packet size: %d\n", txreq.size);
1387 netbk_tx_err(netif, &txreq, i);
1391 /* No crossing a page as the payload mustn't fragment. */
1392 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
1393 DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
1394 txreq.offset, txreq.size,
1395 (txreq.offset &~PAGE_MASK) + txreq.size);
1396 netbk_tx_err(netif, &txreq, i);
1400 pending_idx = netbk->pending_ring[MASK_PEND_IDX(netbk->pending_cons)];
1402 data_len = (txreq.size > PKT_PROT_LEN &&
1403 ret < MAX_SKB_FRAGS) ?
1404 PKT_PROT_LEN : txreq.size;
1406 skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
1407 GFP_ATOMIC | __GFP_NOWARN);
1408 if (unlikely(skb == NULL)) {
1409 DPRINTK("Can't allocate a skb in start_xmit.\n");
1410 netbk_tx_err(netif, &txreq, i);
1414 /* Packets passed to netif_rx() must have some headroom. */
1415 skb_reserve(skb, 16 + NET_IP_ALIGN);
1417 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1418 struct netif_extra_info *gso;
1419 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1421 if (netbk_set_skb_gso(skb, gso)) {
1423 netbk_tx_err(netif, &txreq, i);
1428 gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
1429 GNTMAP_host_map | GNTMAP_readonly,
1430 txreq.gref, netif->domid);
1433 memcpy(&netbk->pending_tx_info[pending_idx].req,
1434 &txreq, sizeof(txreq));
1435 netbk->pending_tx_info[pending_idx].netif = netif;
1436 *((u16 *)skb->data) = pending_idx;
1438 __skb_put(skb, data_len);
1440 skb_shinfo(skb)->nr_frags = ret;
1441 if (data_len < txreq.size) {
1442 skb_shinfo(skb)->nr_frags++;
1443 skb_shinfo(skb)->frags[0].page =
1444 (void *)(unsigned long)pending_idx;
1446 /* Discriminate from any valid pending_idx value. */
1447 skb_shinfo(skb)->frags[0].page = (void *)~0UL;
1450 __skb_queue_tail(&netbk->tx_queue, skb);
1452 netbk->pending_cons++;
1454 mop = netbk_get_requests(netif, skb, txfrags, mop);
1456 netif->tx.req_cons = i;
1457 netif_schedule_work(netif);
1459 if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
1463 if (mop == netbk->tx_map_ops)
1466 /* NOTE: some maps may fail with GNTST_eagain, which could be successfully
1467 * retried in the backend after a delay. However, we can also fail the tx
1468 * req and let the frontend resend the relevant packet again. This is fine
1469 * because it is unlikely that a network buffer will be paged out or shared,
1470 * and therefore it is unlikely to fail with GNTST_eagain. */
1471 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
1473 mop - netbk->tx_map_ops);
1476 mop = netbk->tx_map_ops;
1477 while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
1478 netif_tx_request_t *txp;
1480 pending_idx = *((u16 *)skb->data);
1481 netif = netbk->pending_tx_info[pending_idx].netif;
1482 txp = &netbk->pending_tx_info[pending_idx].req;
1484 /* Check the remap error code. */
1485 if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
1486 DPRINTK("netback grant failed.\n");
1487 skb_shinfo(skb)->nr_frags = 0;
1492 data_len = skb->len;
1494 (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
1496 if (data_len < txp->size) {
1497 /* Append the packet payload as a fragment. */
1498 txp->offset += data_len;
1499 txp->size -= data_len;
1501 /* Schedule a response immediately. */
1502 netif_idx_release(netbk, pending_idx);
1506 * Old frontends do not assert data_validated but we
1507 * can infer it from csum_blank so test both flags.
1509 if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
1510 skb->ip_summed = CHECKSUM_UNNECESSARY;
1511 skb->proto_data_valid = 1;
1513 skb->ip_summed = CHECKSUM_NONE;
1514 skb->proto_data_valid = 0;
1516 skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
1518 netbk_fill_frags(netbk, skb);
1520 skb->dev = netif->dev;
1521 skb->protocol = eth_type_trans(skb, skb->dev);
1523 netif->stats.rx_bytes += skb->len;
1524 netif->stats.rx_packets++;
1526 if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
1527 unlikely(skb_linearize(skb))) {
1528 DPRINTK("Can't linearize skb in net_tx_action.\n");
1537 netif->dev->last_rx = jiffies;
1541 if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
1542 !list_empty(&netbk->pending_inuse_head)) {
1543 struct netbk_tx_pending_inuse *oldest;
1545 oldest = list_entry(netbk->pending_inuse_head.next,
1546 struct netbk_tx_pending_inuse, list);
1547 mod_timer(&netbk->tx_pending_timer, oldest->alloc_time + HZ);
1551 static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
1553 unsigned long flags;
1555 spin_lock_irqsave(&netbk->release_lock, flags);
1556 netbk->dealloc_ring[MASK_PEND_IDX(netbk->dealloc_prod)] = pending_idx;
1557 /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
1559 netbk->dealloc_prod++;
1560 spin_unlock_irqrestore(&netbk->release_lock, flags);
1563 wake_up(&netbk->netbk_action_wq);
1565 tasklet_schedule(&netbk->net_tx_tasklet);
1568 static void netif_page_release(struct page *page, unsigned int order)
1570 unsigned int idx = netif_page_index(page);
1571 unsigned int group = netif_page_group(page);
1572 struct xen_netbk *netbk = &xen_netbk[group];
1575 BUG_ON(group >= netbk_nr_groups || idx >= MAX_PENDING_REQS);
1576 BUG_ON(netbk->mmap_pages[idx] != page);
1577 netif_idx_release(netbk, idx);
1580 irqreturn_t netif_be_int(int irq, void *dev_id)
1582 netif_t *netif = dev_id;
1583 unsigned int group = GET_GROUP_INDEX(netif);
1585 if (unlikely(group >= netbk_nr_groups)) {
1587 * Short of having a way to bind the IRQ in disabled mode
1588 * (IRQ_NOAUTOEN), we have to ignore the first invocation(s)
1589 * (before we got assigned to a group).
1591 BUG_ON(group != UINT_MAX);
1595 add_to_net_schedule_list_tail(netif);
1596 maybe_schedule_tx_action(group);
1598 if (netif_schedulable(netif) && !netbk_queue_full(netif))
1599 netif_wake_queue(netif->dev);
1604 static void make_tx_response(netif_t *netif,
1605 netif_tx_request_t *txp,
1608 RING_IDX i = netif->tx.rsp_prod_pvt;
1609 netif_tx_response_t *resp;
1612 resp = RING_GET_RESPONSE(&netif->tx, i);
1616 if (txp->flags & NETTXF_extra_info)
1617 RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
1619 netif->tx.rsp_prod_pvt = ++i;
1620 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
1622 notify_remote_via_irq(netif->irq);
1624 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
1625 if (i == netif->tx.req_cons) {
1627 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
1629 add_to_net_schedule_list_tail(netif);
1634 static netif_rx_response_t *make_rx_response(netif_t *netif,
1641 RING_IDX i = netif->rx.rsp_prod_pvt;
1642 netif_rx_response_t *resp;
1644 resp = RING_GET_RESPONSE(&netif->rx, i);
1645 resp->offset = offset;
1646 resp->flags = flags;
1648 resp->status = (s16)size;
1650 resp->status = (s16)st;
1652 netif->rx.rsp_prod_pvt = ++i;
1657 #ifdef NETBE_DEBUG_INTERRUPT
1658 static irqreturn_t netif_be_dbg(int irq, void *dev_id)
1660 struct list_head *ent;
1662 unsigned int i = 0, group;
1664 printk(KERN_ALERT "netif_schedule_list:\n");
1666 for (group = 0; group < netbk_nr_groups; ++group) {
1667 struct xen_netbk *netbk = &xen_netbk[group];
1669 spin_lock_irq(&netbk->net_schedule_list_lock);
1671 list_for_each(ent, &netbk->net_schedule_list) {
1672 netif = list_entry(ent, netif_t, list);
1673 printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
1674 "rx_resp_prod=%08x\n",
1675 i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
1676 printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
1677 netif->tx.req_cons, netif->tx.rsp_prod_pvt);
1678 printk(KERN_ALERT " shared(rx_req_prod=%08x "
1679 "rx_resp_prod=%08x\n",
1680 netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
1681 printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
1682 netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
1683 printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
1684 netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
1688 spin_unlock_irq(&netbk->netbk->net_schedule_list_lock);
1691 printk(KERN_ALERT " ** End of netif_schedule_list **\n");
1696 static struct irqaction netif_be_dbg_action = {
1697 .handler = netif_be_dbg,
1698 .flags = IRQF_SHARED,
1699 .name = "net-be-dbg"
1703 static inline int rx_work_todo(struct xen_netbk *netbk)
1705 return !skb_queue_empty(&netbk->rx_queue);
1708 static inline int tx_work_todo(struct xen_netbk *netbk)
1710 if (netbk->dealloc_cons != netbk->dealloc_prod)
1713 if (nr_pending_reqs(netbk) + MAX_SKB_FRAGS < MAX_PENDING_REQS &&
1714 !list_empty(&netbk->net_schedule_list))
1720 static int netbk_action_thread(void *index)
1722 unsigned long group = (unsigned long)index;
1723 struct xen_netbk *netbk = &xen_netbk[group];
1725 while (!kthread_should_stop()) {
1726 wait_event_interruptible(netbk->netbk_action_wq,
1727 rx_work_todo(netbk) ||
1728 tx_work_todo(netbk) ||
1729 kthread_should_stop());
1732 if (rx_work_todo(netbk))
1733 net_rx_action(group);
1735 if (tx_work_todo(netbk))
1736 net_tx_action(group);
1743 static int __init netback_init(void)
1745 unsigned int i, group;
1749 if (!is_running_on_xen())
1752 if (!netbk_nr_groups)
1753 netbk_nr_groups = (num_online_cpus() + 1) / 2;
1754 if (netbk_nr_groups > MAX_GROUPS)
1755 netbk_nr_groups = MAX_GROUPS;
1757 /* We can increase reservation by this much in net_rx_action(). */
1758 balloon_update_driver_allowance(netbk_nr_groups * NET_RX_RING_SIZE);
1760 xen_netbk = __vmalloc(netbk_nr_groups * sizeof(*xen_netbk),
1761 GFP_KERNEL|__GFP_HIGHMEM|__GFP_ZERO, PAGE_KERNEL);
1763 printk(KERN_ALERT "%s: out of memory\n", __func__);
1767 for (group = 0; group < netbk_nr_groups; group++) {
1768 struct xen_netbk *netbk = &xen_netbk[group];
1771 init_waitqueue_head(&netbk->netbk_action_wq);
1772 netbk->task = kthread_create(netbk_action_thread,
1773 (void *)(long)group,
1774 "netback/%u", group);
1776 if (!IS_ERR(netbk->task)) {
1778 kthread_bind(netbk->task, group);
1779 wake_up_process(netbk->task);
1782 "kthread_create() fails at netback\n");
1783 rc = PTR_ERR(netbk->task);
1787 tasklet_init(&netbk->net_tx_tasklet, net_tx_action, group);
1788 tasklet_init(&netbk->net_rx_tasklet, net_rx_action, group);
1791 skb_queue_head_init(&netbk->rx_queue);
1792 skb_queue_head_init(&netbk->tx_queue);
1795 alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
1796 if (netbk->mmap_pages == NULL) {
1797 printk(KERN_ALERT "%s: out of memory\n", __func__);
1802 init_timer(&netbk->net_timer);
1803 netbk->net_timer.data = group;
1804 netbk->net_timer.function = net_alarm;
1806 init_timer(&netbk->tx_pending_timer);
1807 netbk->tx_pending_timer.data = group;
1808 netbk->tx_pending_timer.function =
1809 netbk_tx_pending_timeout;
1811 netbk->pending_prod = MAX_PENDING_REQS;
1813 INIT_LIST_HEAD(&netbk->pending_inuse_head);
1814 INIT_LIST_HEAD(&netbk->net_schedule_list);
1816 spin_lock_init(&netbk->net_schedule_list_lock);
1817 spin_lock_init(&netbk->release_lock);
1819 for (i = 0; i < MAX_PENDING_REQS; i++) {
1820 page = netbk->mmap_pages[i];
1821 SetPageForeign(page, netif_page_release);
1822 netif_set_page_ext(page, group, i);
1823 netbk->pending_ring[i] = i;
1824 INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
1828 netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
1829 if (MODPARM_copy_skb) {
1830 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
1832 netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
1834 netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
1839 netif_xenbus_init();
1841 #ifdef NETBE_DEBUG_INTERRUPT
1842 (void)bind_virq_to_irqaction(VIRQ_DEBUG,
1844 &netif_be_dbg_action);
1850 while (group-- > 0) {
1851 struct xen_netbk *netbk = &xen_netbk[group];
1853 if (use_kthreads && netbk->task && !IS_ERR(netbk->task))
1854 kthread_stop(netbk->task);
1855 if (netbk->mmap_pages)
1856 free_empty_pages_and_pagevec(netbk->mmap_pages,
1858 del_timer(&netbk->tx_pending_timer);
1859 del_timer(&netbk->net_timer);
1866 module_init(netback_init);
1868 MODULE_LICENSE("Dual BSD/GPL");