drivers/xen/netback/netback.c

   1 /******************************************************************************
   2  * drivers/xen/netback/netback.c
   3  *
   4  * Back-end of the driver for virtual network devices. This portion of the
   5  * driver exports a 'unified' network-device interface that can be accessed
   6  * by any operating system that implements a compatible front end. A
   7  * reference front-end implementation can be found in:
   8  *  drivers/xen/netfront/netfront.c
   9  *
  10  * Copyright (c) 2002-2005, K A Fraser
  11  *
  12  * This program is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU General Public License version 2
  14  * as published by the Free Software Foundation; or, when distributed
  15  * separately from the Linux kernel or incorporated into other
  16  * software packages, subject to the following license:
  17  *
  18  * Permission is hereby granted, free of charge, to any person obtaining a copy
  19  * of this source file (the "Software"), to deal in the Software without
  20  * restriction, including without limitation the rights to use, copy, modify,
  21  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  22  * and to permit persons to whom the Software is furnished to do so, subject to
  23  * the following conditions:
  24  *
  25  * The above copyright notice and this permission notice shall be included in
  26  * all copies or substantial portions of the Software.
  27  *
  28  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  29  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  30  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  31  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  32  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  33  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  34  * IN THE SOFTWARE.
  35  */
  36
  37 #include "common.h"
  38 #include <linux/if_vlan.h>
  39 #include <linux/kthread.h>
  40 #include <linux/vmalloc.h>
  41 #include <net/tcp.h>
  42 #include <xen/balloon.h>
  43 #include <xen/evtchn.h>
  44 #include <xen/gnttab.h>
  45 #include <xen/interface/memory.h>
  46 #include <xen/net-util.h>
  47
  48 /*define NETBE_DEBUG_INTERRUPT*/
  49
  50 struct xen_netbk *__read_mostly xen_netbk;
  51 unsigned int __read_mostly netbk_nr_groups;
  52 static bool __read_mostly use_kthreads = true;
  53 static bool __initdata bind_threads;
  54
  55 #define GET_GROUP_INDEX(netif) ((netif)->group)
  56
  57 static void netif_idx_release(struct xen_netbk *, u16 pending_idx);
  58 static void make_tx_response(netif_t *netif,
  59                              netif_tx_request_t *txp,
  60                              s8       st);
  61 static netif_rx_response_t *make_rx_response(netif_t *netif,
  62                                              u16      id,
  63                                              s8       st,
  64                                              u16      offset,
  65                                              u16      size,
  66                                              u16      flags);
  67
  68 static void net_tx_action(unsigned long group);
  69 static void net_rx_action(unsigned long group);
  70
  71 /* Discriminate from any valid pending_idx value. */
  72 #define INVALID_PENDING_IDX 0xffff
  73
  74 static inline unsigned long idx_to_pfn(struct xen_netbk *netbk, u16 idx)
  75 {
  76         return page_to_pfn(netbk->mmap_pages[idx]);
  77 }
  78
  79 static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk, u16 idx)
  80 {
  81         return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
  82 }
  83
  84 /* extra field used in struct page */
  85 union page_ext {
  86         struct {
  87 #if BITS_PER_LONG < 64
  88 #define GROUP_WIDTH (BITS_PER_LONG - CONFIG_XEN_NETDEV_TX_SHIFT)
  89 #define MAX_GROUPS ((1U << GROUP_WIDTH) - 1)
  90                 unsigned int grp:GROUP_WIDTH;
  91                 unsigned int idx:CONFIG_XEN_NETDEV_TX_SHIFT;
  92 #else
  93 #define MAX_GROUPS UINT_MAX
  94                 unsigned int grp, idx;
  95 #endif
  96         } e;
  97         void *mapping;
  98 };
  99
 100 static inline void netif_set_page_ext(struct page *pg, unsigned int group,
 101                                       unsigned int idx)
 102 {
 103         union page_ext ext = { .e = { .grp = group + 1, .idx = idx } };
 104
 105         BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
 106         pg->mapping = ext.mapping;
 107 }
 108
 109 static inline unsigned int netif_page_group(const struct page *pg)
 110 {
 111         union page_ext ext = { .mapping = pg->mapping };
 112
 113         return ext.e.grp - 1;
 114 }
 115
 116 static inline unsigned int netif_page_index(const struct page *pg)
 117 {
 118         union page_ext ext = { .mapping = pg->mapping };
 119
 120         return ext.e.idx;
 121 }
 122
 123 static u16 frag_get_pending_idx(const skb_frag_t *frag)
 124 {
 125         return (u16)frag->page_offset;
 126 }
 127
 128 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
 129 {
 130         frag->page_offset = pending_idx;
 131 }
 132
 133 /*
 134  * This is the amount of packet we copy rather than map, so that the
 135  * guest can't fiddle with the contents of the headers while we do
 136  * packet processing on them (netfilter, routing, etc).
 137  */
 138 #define PKT_PROT_LEN    (ETH_HLEN + VLAN_HLEN + \
 139                          sizeof(struct iphdr) + MAX_IPOPTLEN + \
 140                          sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
 141
 142 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
 143
 144 static inline pending_ring_idx_t nr_pending_reqs(const struct xen_netbk *netbk)
 145 {
 146         return MAX_PENDING_REQS -
 147                 netbk->pending_prod + netbk->pending_cons;
 148 }
 149
 150 /* Setting this allows the safe use of this driver without netloop. */
 151 static bool MODPARM_copy_skb = true;
 152 module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
 153 MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
 154 static bool MODPARM_permute_returns;
 155 module_param_named(permute_returns, MODPARM_permute_returns, bool, S_IRUSR|S_IWUSR);
 156 MODULE_PARM_DESC(permute_returns, "Randomly permute the order in which TX responses are sent to the frontend");
 157 module_param_named(groups, netbk_nr_groups, uint, 0);
 158 MODULE_PARM_DESC(groups, "Specify the number of tasklet pairs/threads to use");
 159 module_param_named(tasklets, use_kthreads, invbool, 0);
 160 MODULE_PARM_DESC(tasklets, "Use tasklets instead of kernel threads");
 161 module_param_named(bind, bind_threads, bool, 0);
 162 MODULE_PARM_DESC(bind, "Bind kernel threads to (v)CPUs");
 163
 164 int netbk_copy_skb_mode;
 165
 166 static inline unsigned long alloc_mfn(struct xen_netbk *netbk)
 167 {
 168         BUG_ON(netbk->alloc_index == 0);
 169         return netbk->mfn_list[--netbk->alloc_index];
 170 }
 171
 172 static int check_mfn(struct xen_netbk *netbk, unsigned int nr)
 173 {
 174         struct xen_memory_reservation reservation = {
 175                 .extent_order = 0,
 176                 .domid        = DOMID_SELF
 177         };
 178         int rc;
 179
 180         if (likely(netbk->alloc_index >= nr))
 181                 return 0;
 182
 183         set_xen_guest_handle(reservation.extent_start,
 184                              netbk->mfn_list + netbk->alloc_index);
 185         reservation.nr_extents = MAX_MFN_ALLOC - netbk->alloc_index;
 186         rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
 187         if (likely(rc > 0))
 188                 netbk->alloc_index += rc;
 189
 190         return netbk->alloc_index >= nr ? 0 : -ENOMEM;
 191 }
 192
 193 static void netbk_schedule(struct xen_netbk *netbk)
 194 {
 195         if (use_kthreads)
 196                 wake_up(&netbk->netbk_action_wq);
 197         else
 198                 tasklet_schedule(&netbk->net_tx_tasklet);
 199 }
 200
 201 static void netbk_schedule_group(unsigned long group)
 202 {
 203         netbk_schedule(&xen_netbk[group]);
 204 }
 205
 206 static inline void maybe_schedule_tx_action(unsigned int group)
 207 {
 208         struct xen_netbk *netbk = &xen_netbk[group];
 209
 210         smp_mb();
 211         if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
 212             !list_empty(&netbk->schedule_list))
 213                 netbk_schedule(netbk);
 214 }
 215
 216 static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
 217 {
 218         struct skb_shared_info *ninfo;
 219         struct sk_buff *nskb;
 220         unsigned long offset;
 221         int ret;
 222         int len;
 223         int headlen;
 224
 225         BUG_ON(skb_shinfo(skb)->frag_list != NULL);
 226
 227         nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
 228         if (unlikely(!nskb))
 229                 goto err;
 230
 231         skb_reserve(nskb, 16 + NET_IP_ALIGN);
 232         headlen = skb_end_pointer(nskb) - nskb->data;
 233         if (headlen > skb_headlen(skb))
 234                 headlen = skb_headlen(skb);
 235         ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
 236         BUG_ON(ret);
 237
 238         ninfo = skb_shinfo(nskb);
 239         ninfo->gso_size = skb_shinfo(skb)->gso_size;
 240         ninfo->gso_type = skb_shinfo(skb)->gso_type;
 241
 242         offset = headlen;
 243         len = skb->len - headlen;
 244
 245         nskb->len = skb->len;
 246         nskb->data_len = len;
 247         nskb->truesize += len;
 248
 249         while (len) {
 250                 struct page *page;
 251                 int copy;
 252                 int zero;
 253
 254                 if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
 255                         dump_stack();
 256                         goto err_free;
 257                 }
 258
 259                 copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
 260                 zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
 261
 262                 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
 263                 if (unlikely(!page))
 264                         goto err_free;
 265
 266                 ret = skb_copy_bits(skb, offset, page_address(page), copy);
 267                 BUG_ON(ret);
 268
 269                 __skb_fill_page_desc(nskb, ninfo->nr_frags, page, 0, copy);
 270                 ninfo->nr_frags++;
 271
 272                 offset += copy;
 273                 len -= copy;
 274         }
 275
 276 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 277         offset = 0;
 278 #else
 279         offset = nskb->data - skb->data;
 280 #endif
 281
 282         nskb->transport_header = skb->transport_header + offset;
 283         nskb->network_header   = skb->network_header   + offset;
 284         nskb->mac_header       = skb->mac_header       + offset;
 285
 286         return nskb;
 287
 288  err_free:
 289         kfree_skb(nskb);
 290  err:
 291         return NULL;
 292 }
 293
 294 static inline int netbk_max_required_rx_slots(netif_t *netif)
 295 {
 296         if (netif->can_sg || netif->gso)
 297                 return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
 298         return 1; /* all in one */
 299 }
 300
 301 static inline int netbk_queue_full(netif_t *netif)
 302 {
 303         RING_IDX peek   = netif->rx_req_cons_peek;
 304         RING_IDX needed = netbk_max_required_rx_slots(netif);
 305
 306         return ((netif->rx.sring->req_prod - peek) < needed) ||
 307                ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
 308 }
 309
 310 static void tx_queue_callback(unsigned long data)
 311 {
 312         netif_t *netif = (netif_t *)data;
 313         if (netif_schedulable(netif))
 314                 netif_wake_queue(netif->dev);
 315 }
 316
 317 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 318 {
 319         netif_t *netif = netdev_priv(dev);
 320         unsigned int group = GET_GROUP_INDEX(netif);
 321         struct xen_netbk *netbk;
 322
 323         BUG_ON(skb->dev != dev);
 324
 325         if (unlikely(group >= netbk_nr_groups)) {
 326                 BUG_ON(group != UINT_MAX);
 327                 goto drop;
 328         }
 329
 330         /* Drop the packet if the target domain has no receive buffers. */
 331         if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
 332                 goto drop;
 333
 334         /*
 335          * Copy the packet here if it's destined for a flipping interface
 336          * but isn't flippable (e.g. extra references to data).
 337          * XXX For now we also copy skbuffs whose head crosses a page
 338          * boundary, because netbk_gop_skb can't handle them.
 339          */
 340         if (!netif->copying_receiver ||
 341             ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
 342                 struct sk_buff *nskb = netbk_copy_skb(skb);
 343                 if ( unlikely(nskb == NULL) )
 344                         goto drop;
 345                 /* Copy only the header fields we use in this driver. */
 346                 nskb->dev = skb->dev;
 347                 nskb->ip_summed = skb->ip_summed;
 348                 dev_kfree_skb(skb);
 349                 skb = nskb;
 350         }
 351
 352         netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
 353                                    !!skb_shinfo(skb)->gso_size;
 354         netif_get(netif);
 355
 356         if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
 357                 netif->rx.sring->req_event = netif->rx_req_cons_peek +
 358                         netbk_max_required_rx_slots(netif);
 359                 mb(); /* request notification /then/ check & stop the queue */
 360                 if (netbk_queue_full(netif)) {
 361                         netif_stop_queue(dev);
 362                         /*
 363                          * Schedule 500ms timeout to restart the queue, thus
 364                          * ensuring that an inactive queue will be drained.
 365                          * Packets will be immediately be dropped until more
 366                          * receive buffers become available (see
 367                          * netbk_queue_full() check above).
 368                          */
 369                         netif->tx_queue_timeout.data = (unsigned long)netif;
 370                         netif->tx_queue_timeout.function = tx_queue_callback;
 371                         mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
 372                 }
 373         }
 374
 375         netbk = &xen_netbk[group];
 376         skb_queue_tail(&netbk->rx_queue, skb);
 377         netbk_schedule(netbk);
 378
 379         return NETDEV_TX_OK;
 380
 381  drop:
 382         dev->stats.tx_dropped++;
 383         dev_kfree_skb(skb);
 384         return NETDEV_TX_OK;
 385 }
 386
 387 #if 0
 388 static void xen_network_done_notify(void)
 389 {
 390         static struct net_device *eth0_dev = NULL;
 391         if (unlikely(eth0_dev == NULL))
 392                 eth0_dev = __dev_get_by_name(&init_net, "eth0");
 393         napi_schedule(???);
 394 }
 395 /*
 396  * Add following to poll() function in NAPI driver (Tigon3 is example):
 397  *  if ( xen_network_done() )
 398  *      tg3_enable_ints(tp);
 399  */
 400 int xen_network_done(void)
 401 {
 402         return skb_queue_empty(&rx_queue);
 403 }
 404 #endif
 405
 406 struct netrx_pending_operations {
 407         unsigned trans_prod, trans_cons;
 408         unsigned mmu_prod, mmu_mcl;
 409         unsigned mcl_prod, mcl_cons;
 410         unsigned copy_prod, copy_cons;
 411         unsigned meta_prod, meta_cons;
 412         mmu_update_t *mmu;
 413         gnttab_transfer_t *trans;
 414         gnttab_copy_t *copy;
 415         multicall_entry_t *mcl;
 416         struct netbk_rx_meta *meta;
 417 };
 418
 419 /* Set up the grant operations for this fragment.  If it's a flipping
 420    interface, we also set up the unmap request from here. */
 421 static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
 422                           int i, struct netrx_pending_operations *npo,
 423                           struct page *page, unsigned long size,
 424                           unsigned long offset)
 425 {
 426         mmu_update_t *mmu;
 427         gnttab_transfer_t *gop;
 428         gnttab_copy_t *copy_gop;
 429         multicall_entry_t *mcl;
 430         netif_rx_request_t *req;
 431         unsigned long old_mfn, new_mfn;
 432         struct xen_netbk *netbk = &xen_netbk[GET_GROUP_INDEX(netif)];
 433
 434         old_mfn = virt_to_mfn(page_address(page));
 435
 436         req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
 437         if (netif->copying_receiver) {
 438                 unsigned int group, idx;
 439
 440                 /* The fragment needs to be copied rather than
 441                    flipped. */
 442                 meta->copy = 1;
 443                 copy_gop = npo->copy + npo->copy_prod++;
 444                 copy_gop->flags = GNTCOPY_dest_gref;
 445                 if (PageForeign(page) &&
 446                     page->mapping != NULL &&
 447                     (idx = netif_page_index(page)) < MAX_PENDING_REQS &&
 448                     (group = netif_page_group(page)) < netbk_nr_groups) {
 449                         struct pending_tx_info *src_pend;
 450                         unsigned int grp;
 451
 452                         netbk = &xen_netbk[group];
 453                         BUG_ON(netbk->mmap_pages[idx] != page);
 454                         src_pend = &netbk->pending_tx_info[idx];
 455                         grp = GET_GROUP_INDEX(src_pend->netif);
 456                         BUG_ON(group != grp && grp != UINT_MAX);
 457                         copy_gop->source.domid = src_pend->netif->domid;
 458                         copy_gop->source.u.ref = src_pend->req.gref;
 459                         copy_gop->flags |= GNTCOPY_source_gref;
 460                 } else {
 461                         copy_gop->source.domid = DOMID_SELF;
 462                         copy_gop->source.u.gmfn = old_mfn;
 463                 }
 464                 copy_gop->source.offset = offset;
 465                 copy_gop->dest.domid = netif->domid;
 466                 copy_gop->dest.offset = 0;
 467                 copy_gop->dest.u.ref = req->gref;
 468                 copy_gop->len = size;
 469         } else {
 470                 meta->copy = 0;
 471                 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 472                         new_mfn = alloc_mfn(netbk);
 473
 474                         /*
 475                          * Set the new P2M table entry before
 476                          * reassigning the old data page. Heed the
 477                          * comment in pgtable-2level.h:pte_page(). :-)
 478                          */
 479                         set_phys_to_machine(page_to_pfn(page), new_mfn);
 480
 481                         mcl = npo->mcl + npo->mcl_prod++;
 482                         MULTI_update_va_mapping(mcl,
 483                                              (unsigned long)page_address(page),
 484                                              pfn_pte_ma(new_mfn, PAGE_KERNEL),
 485                                              0);
 486
 487                         mmu = npo->mmu + npo->mmu_prod++;
 488                         mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
 489                                 MMU_MACHPHYS_UPDATE;
 490                         mmu->val = page_to_pfn(page);
 491                 }
 492
 493                 gop = npo->trans + npo->trans_prod++;
 494                 gop->mfn = old_mfn;
 495                 gop->domid = netif->domid;
 496                 gop->ref = req->gref;
 497         }
 498         return req->id;
 499 }
 500
 501 static void netbk_gop_skb(struct sk_buff *skb,
 502                           struct netrx_pending_operations *npo)
 503 {
 504         netif_t *netif = netdev_priv(skb->dev);
 505         int nr_frags = skb_shinfo(skb)->nr_frags;
 506         int i;
 507         int extra;
 508         struct netbk_rx_meta *head_meta, *meta;
 509
 510         head_meta = npo->meta + npo->meta_prod++;
 511         head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
 512         head_meta->frag.size = skb_shinfo(skb)->gso_size;
 513         extra = !!head_meta->frag.size + 1;
 514
 515         for (i = 0; i < nr_frags; i++) {
 516                 meta = npo->meta + npo->meta_prod++;
 517                 meta->frag = skb_shinfo(skb)->frags[i];
 518                 meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
 519                                           skb_frag_page(&meta->frag),
 520                                           skb_frag_size(&meta->frag),
 521                                           meta->frag.page_offset);
 522         }
 523
 524         /*
 525          * This must occur at the end to ensure that we don't trash skb_shinfo
 526          * until we're done. We know that the head doesn't cross a page
 527          * boundary because such packets get copied in netif_be_start_xmit.
 528          */
 529         head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
 530                                        virt_to_page(skb->data),
 531                                        skb_headlen(skb),
 532                                        offset_in_page(skb->data));
 533
 534         netif->rx.req_cons += nr_frags + extra;
 535 }
 536
 537 static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
 538 {
 539         int i;
 540
 541         for (i = 0; i < nr_frags; i++)
 542                 put_page(skb_frag_page(&meta[i].frag));
 543 }
 544
 545 /* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
 546    used to set up the operations on the top of
 547    netrx_pending_operations, which have since been done.  Check that
 548    they didn't give any errors and advance over them. */
 549 static int netbk_check_gop(int nr_frags, domid_t domid, struct netrx_pending_operations *npo)
 550 {
 551         multicall_entry_t *mcl;
 552         gnttab_transfer_t *gop;
 553         gnttab_copy_t     *copy_op;
 554         int status = XEN_NETIF_RSP_OKAY;
 555         int i;
 556
 557         for (i = 0; i <= nr_frags; i++) {
 558                 if (npo->meta[npo->meta_cons + i].copy) {
 559                         copy_op = npo->copy + npo->copy_cons++;
 560                         if (unlikely(copy_op->status == GNTST_eagain))
 561                                 gnttab_check_GNTST_eagain_while(GNTTABOP_copy, copy_op);
 562                         if (unlikely(copy_op->status != GNTST_okay)) {
 563                                 DPRINTK("Bad status %d from copy to DOM%d.\n",
 564                                         copy_op->status, domid);
 565                                 status = XEN_NETIF_RSP_ERROR;
 566                         }
 567                 } else {
 568                         if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 569                                 mcl = npo->mcl + npo->mcl_cons++;
 570                                 /* The update_va_mapping() must not fail. */
 571                                 BUG_ON(mcl->result != 0);
 572                         }
 573
 574                         gop = npo->trans + npo->trans_cons++;
 575                         /* Check the reassignment error code. */
 576                         if (unlikely(gop->status != GNTST_okay)) {
 577                                 DPRINTK("Bad status %d from grant transfer to DOM%u\n",
 578                                         gop->status, domid);
 579                                 /*
 580                                  * Page no longer belongs to us unless
 581                                  * GNTST_bad_page, but that should be
 582                                  * a fatal error anyway.
 583                                  */
 584                                 BUG_ON(gop->status == GNTST_bad_page);
 585                                 status = XEN_NETIF_RSP_ERROR;
 586                         }
 587                 }
 588         }
 589
 590         return status;
 591 }
 592
 593 static void netbk_add_frag_responses(netif_t *netif, int status,
 594                                      struct netbk_rx_meta *meta, int nr_frags)
 595 {
 596         int i;
 597         unsigned long offset;
 598
 599         for (i = 0; i < nr_frags; i++) {
 600                 int id = meta[i].id;
 601                 int flags = (i == nr_frags - 1) ? 0 : XEN_NETRXF_more_data;
 602
 603                 if (meta[i].copy)
 604                         offset = 0;
 605                 else
 606                         offset = meta[i].frag.page_offset;
 607                 make_rx_response(netif, id, status, offset,
 608                                  meta[i].frag.size, flags);
 609         }
 610 }
 611
 612 static void net_rx_action(unsigned long group)
 613 {
 614         netif_t *netif = NULL;
 615         s8 status;
 616         u16 id, irq, flags;
 617         netif_rx_response_t *resp;
 618         multicall_entry_t *mcl;
 619         struct sk_buff_head rxq;
 620         struct sk_buff *skb;
 621         int notify_nr = 0;
 622         int ret;
 623         int nr_frags;
 624         int count;
 625         unsigned long offset;
 626         struct xen_netbk *netbk = &xen_netbk[group];
 627
 628         struct netrx_pending_operations npo = {
 629                 .mmu   = netbk->rx_mmu,
 630                 .trans = netbk->grant_trans_op,
 631                 .copy  = netbk->grant_copy_op,
 632                 .mcl   = netbk->rx_mcl,
 633                 .meta  = netbk->meta,
 634         };
 635
 636         skb_queue_head_init(&rxq);
 637
 638         count = 0;
 639
 640         while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
 641                 nr_frags = skb_shinfo(skb)->nr_frags;
 642                 *(int *)skb->cb = nr_frags;
 643
 644                 if (!xen_feature(XENFEAT_auto_translated_physmap) &&
 645                     !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
 646                     check_mfn(netbk, nr_frags + 1)) {
 647                         /* Memory squeeze? Back off for an arbitrary while. */
 648                         if ( net_ratelimit() )
 649                                 WPRINTK("Memory squeeze in netback "
 650                                         "driver.\n");
 651                         mod_timer(&netbk->net_timer, jiffies + HZ);
 652                         skb_queue_head(&netbk->rx_queue, skb);
 653                         break;
 654                 }
 655
 656                 netbk_gop_skb(skb, &npo);
 657
 658                 count += nr_frags + 1;
 659
 660                 __skb_queue_tail(&rxq, skb);
 661
 662                 /* Filled the batch queue? */
 663                 if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
 664                         break;
 665         }
 666
 667         BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
 668
 669         npo.mmu_mcl = npo.mcl_prod;
 670         if (npo.mcl_prod) {
 671                 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
 672                 BUG_ON(npo.mmu_prod > ARRAY_SIZE(netbk->rx_mmu));
 673                 mcl = npo.mcl + npo.mcl_prod++;
 674
 675                 BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
 676                 mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
 677
 678                 mcl->op = __HYPERVISOR_mmu_update;
 679                 mcl->args[0] = (unsigned long)netbk->rx_mmu;
 680                 mcl->args[1] = npo.mmu_prod;
 681                 mcl->args[2] = 0;
 682                 mcl->args[3] = DOMID_SELF;
 683         }
 684
 685         if (npo.trans_prod) {
 686                 BUG_ON(npo.trans_prod > ARRAY_SIZE(netbk->grant_trans_op));
 687                 mcl = npo.mcl + npo.mcl_prod++;
 688                 mcl->op = __HYPERVISOR_grant_table_op;
 689                 mcl->args[0] = GNTTABOP_transfer;
 690                 mcl->args[1] = (unsigned long)netbk->grant_trans_op;
 691                 mcl->args[2] = npo.trans_prod;
 692         }
 693
 694         if (npo.copy_prod) {
 695                 BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
 696                 mcl = npo.mcl + npo.mcl_prod++;
 697                 mcl->op = __HYPERVISOR_grant_table_op;
 698                 mcl->args[0] = GNTTABOP_copy;
 699                 mcl->args[1] = (unsigned long)netbk->grant_copy_op;
 700                 mcl->args[2] = npo.copy_prod;
 701         }
 702
 703         /* Nothing to do? */
 704         if (!npo.mcl_prod)
 705                 return;
 706
 707         BUG_ON(npo.mcl_prod > ARRAY_SIZE(netbk->rx_mcl));
 708
 709         ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
 710         BUG_ON(ret != 0);
 711         /* The mmu_machphys_update() must not fail. */
 712         BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
 713
 714         while ((skb = __skb_dequeue(&rxq)) != NULL) {
 715                 nr_frags = *(int *)skb->cb;
 716
 717                 netif = netdev_priv(skb->dev);
 718
 719                 status = netbk_check_gop(nr_frags, netif->domid, &npo);
 720
 721                 /* We can't rely on skb_release_data to release the
 722                    pages used by fragments for us, since it tries to
 723                    touch the pages in the fraglist.  If we're in
 724                    flipping mode, that doesn't work.  In copying mode,
 725                    we still have access to all of the pages, and so
 726                    it's safe to let release_data deal with it. */
 727                 /* (Freeing the fragments is safe since we copy
 728                    non-linear skbs destined for flipping interfaces) */
 729                 if (!netif->copying_receiver) {
 730                         atomic_set(&(skb_shinfo(skb)->dataref), 1);
 731                         skb_shinfo(skb)->frag_list = NULL;
 732                         skb_shinfo(skb)->nr_frags = 0;
 733                         netbk_free_pages(nr_frags, netbk->meta + npo.meta_cons + 1);
 734                 }
 735
 736                 skb->dev->stats.tx_bytes += skb->len;
 737                 skb->dev->stats.tx_packets++;
 738
 739                 id = netbk->meta[npo.meta_cons].id;
 740                 flags = nr_frags ? XEN_NETRXF_more_data : 0;
 741
 742                 switch (skb->ip_summed) {
 743                 case CHECKSUM_PARTIAL: /* local packet? */
 744                         flags |= XEN_NETRXF_csum_blank |
 745                                  XEN_NETRXF_data_validated;
 746                         break;
 747                 case CHECKSUM_UNNECESSARY: /* remote but checksummed? */
 748                         flags |= XEN_NETRXF_data_validated;
 749                         break;
 750                 }
 751
 752                 if (netbk->meta[npo.meta_cons].copy)
 753                         offset = 0;
 754                 else
 755                         offset = offset_in_page(skb->data);
 756                 resp = make_rx_response(netif, id, status, offset,
 757                                         skb_headlen(skb), flags);
 758
 759                 if (netbk->meta[npo.meta_cons].frag.size) {
 760                         struct netif_extra_info *gso =
 761                                 (struct netif_extra_info *)
 762                                 RING_GET_RESPONSE(&netif->rx,
 763                                                   netif->rx.rsp_prod_pvt++);
 764
 765                         resp->flags |= XEN_NETRXF_extra_info;
 766
 767                         gso->u.gso.size = netbk->meta[npo.meta_cons].frag.size;
 768                         gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 769                         gso->u.gso.pad = 0;
 770                         gso->u.gso.features = 0;
 771
 772                         gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 773                         gso->flags = 0;
 774                 }
 775
 776                 netbk_add_frag_responses(netif, status,
 777                                          netbk->meta + npo.meta_cons + 1,
 778                                          nr_frags);
 779
 780                 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
 781                 irq = netif->irq - DYNIRQ_BASE;
 782                 if (ret && !__test_and_set_bit(irq, netbk->rx_notify))
 783                         netbk->notify_list[notify_nr++] = irq;
 784
 785                 if (netif_queue_stopped(netif->dev) &&
 786                     netif_schedulable(netif) &&
 787                     !netbk_queue_full(netif))
 788                         netif_wake_queue(netif->dev);
 789
 790                 netif_put(netif);
 791                 dev_kfree_skb(skb);
 792
 793                 npo.meta_cons += nr_frags + 1;
 794         }
 795
 796         if (notify_nr == 1) {
 797                 irq = *netbk->notify_list;
 798                 __clear_bit(irq, netbk->rx_notify);
 799                 notify_remote_via_irq(irq + DYNIRQ_BASE);
 800         } else {
 801                 for (count = ret = 0; ret < notify_nr; ++ret) {
 802                         irq = netbk->notify_list[ret];
 803                         __clear_bit(irq, netbk->rx_notify);
 804                         if (!multi_notify_remote_via_irq(netbk->rx_mcl + count,
 805                                                          irq + DYNIRQ_BASE))
 806                                 ++count;
 807                 }
 808                 if (HYPERVISOR_multicall(netbk->rx_mcl, count))
 809                         BUG();
 810         }
 811
 812         /* More work to do? */
 813         if (!skb_queue_empty(&netbk->rx_queue) &&
 814             !timer_pending(&netbk->net_timer))
 815                 netbk_schedule(netbk);
 816 #if 0
 817         else
 818                 xen_network_done_notify();
 819 #endif
 820 }
 821
 822 static int __on_net_schedule_list(netif_t *netif)
 823 {
 824         return netif->list.next != NULL;
 825 }
 826
 827 /* Must be called with netbk->schedule_list_lock held. */
 828 static void remove_from_net_schedule_list(netif_t *netif)
 829 {
 830         if (likely(__on_net_schedule_list(netif))) {
 831                 list_del(&netif->list);
 832                 netif->list.next = NULL;
 833                 netif_put(netif);
 834         }
 835 }
 836
 837 static netif_t *poll_net_schedule_list(struct xen_netbk *netbk)
 838 {
 839         netif_t *netif = NULL;
 840
 841         spin_lock_irq(&netbk->schedule_list_lock);
 842         if (!list_empty(&netbk->schedule_list)) {
 843                 netif = list_first_entry(&netbk->schedule_list, netif_t, list);
 844                 netif_get(netif);
 845                 remove_from_net_schedule_list(netif);
 846         }
 847         spin_unlock_irq(&netbk->schedule_list_lock);
 848         return netif;
 849 }
 850
 851 static void add_to_net_schedule_list_tail(netif_t *netif)
 852 {
 853         struct xen_netbk *netbk = &xen_netbk[GET_GROUP_INDEX(netif)];
 854         unsigned long flags;
 855
 856         if (__on_net_schedule_list(netif))
 857                 return;
 858
 859         spin_lock_irqsave(&netbk->schedule_list_lock, flags);
 860         if (!__on_net_schedule_list(netif) &&
 861             likely(netif_schedulable(netif))) {
 862                 list_add_tail(&netif->list, &netbk->schedule_list);
 863                 netif_get(netif);
 864         }
 865         spin_unlock_irqrestore(&netbk->schedule_list_lock, flags);
 866 }
 867
 868 /*
 869  * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
 870  * If this driver is pipelining transmit requests then we can be very
 871  * aggressive in avoiding new-packet notifications -- frontend only needs to
 872  * send a notification if there are no outstanding unreceived responses.
 873  * If we may be buffer transmit buffers for any reason then we must be rather
 874  * more conservative and treat this as the final check for pending work.
 875  */
 876 void netif_schedule_work(netif_t *netif)
 877 {
 878         int more_to_do;
 879
 880 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
 881         more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
 882 #else
 883         RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
 884 #endif
 885
 886         if (more_to_do) {
 887                 add_to_net_schedule_list_tail(netif);
 888                 maybe_schedule_tx_action(GET_GROUP_INDEX(netif));
 889         }
 890 }
 891
 892 void netif_deschedule_work(netif_t *netif)
 893 {
 894         struct xen_netbk *netbk = &xen_netbk[GET_GROUP_INDEX(netif)];
 895
 896         spin_lock_irq(&netbk->schedule_list_lock);
 897         remove_from_net_schedule_list(netif);
 898         spin_unlock_irq(&netbk->schedule_list_lock);
 899 }
 900
 901
 902 static void tx_add_credit(netif_t *netif)
 903 {
 904         unsigned long max_burst, max_credit;
 905
 906         /*
 907          * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
 908          * Otherwise the interface can seize up due to insufficient credit.
 909          */
 910         max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
 911         max_burst = min(max_burst, 131072UL);
 912         max_burst = max(max_burst, netif->credit_bytes);
 913
 914         /* Take care that adding a new chunk of credit doesn't wrap to zero. */
 915         max_credit = netif->remaining_credit + netif->credit_bytes;
 916         if (max_credit < netif->remaining_credit)
 917                 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
 918
 919         netif->remaining_credit = min(max_credit, max_burst);
 920 }
 921
 922 static void tx_credit_callback(unsigned long data)
 923 {
 924         netif_t *netif = (netif_t *)data;
 925         tx_add_credit(netif);
 926         netif_schedule_work(netif);
 927 }
 928
 929 static inline int copy_pending_req(struct xen_netbk *netbk,
 930                                    pending_ring_idx_t pending_idx)
 931 {
 932         return gnttab_copy_grant_page(netbk->grant_tx_handle[pending_idx],
 933                                       &netbk->mmap_pages[pending_idx]);
 934 }
 935
 936 static void permute_dealloc_ring(u16 *dealloc_ring, pending_ring_idx_t dc,
 937                                  pending_ring_idx_t dp)
 938 {
 939         static unsigned random_src = 0x12345678;
 940         unsigned dst_offset;
 941         pending_ring_idx_t dest;
 942         u16 tmp;
 943
 944         while (dc != dp) {
 945                 dst_offset = (random_src / 256) % (dp - dc);
 946                 dest = dc + dst_offset;
 947                 tmp = dealloc_ring[MASK_PEND_IDX(dest)];
 948                 dealloc_ring[MASK_PEND_IDX(dest)] =
 949                         dealloc_ring[MASK_PEND_IDX(dc)];
 950                 dealloc_ring[MASK_PEND_IDX(dc)] = tmp;
 951                 dc++;
 952                 random_src *= 68389;
 953         }
 954 }
 955
 956 inline static void net_tx_action_dealloc(struct xen_netbk *netbk)
 957 {
 958         struct netbk_tx_pending_inuse *inuse, *n;
 959         gnttab_unmap_grant_ref_t *gop;
 960         u16 pending_idx;
 961         pending_ring_idx_t dc, dp;
 962         netif_t *netif;
 963         LIST_HEAD(list);
 964
 965         dc = netbk->dealloc_cons;
 966         gop = netbk->tx_unmap_ops;
 967
 968         /*
 969          * Free up any grants we have finished using
 970          */
 971         do {
 972                 dp = netbk->dealloc_prod;
 973
 974                 /* Ensure we see all indices enqueued by netif_idx_release(). */
 975                 smp_rmb();
 976
 977                 if (MODPARM_permute_returns && netbk_nr_groups == 1)
 978                         permute_dealloc_ring(netbk->dealloc_ring, dc, dp);
 979
 980                 while (dc != dp) {
 981                         unsigned long pfn;
 982                         struct netbk_tx_pending_inuse *pending_inuse =
 983                                         netbk->pending_inuse;
 984
 985                         pending_idx = netbk->dealloc_ring[MASK_PEND_IDX(dc++)];
 986                         list_move_tail(&pending_inuse[pending_idx].list, &list);
 987
 988                         pfn = idx_to_pfn(netbk, pending_idx);
 989                         /* Already unmapped? */
 990                         if (!phys_to_machine_mapping_valid(pfn))
 991                                 continue;
 992
 993                         gnttab_set_unmap_op(gop, idx_to_kaddr(netbk, pending_idx),
 994                                             GNTMAP_host_map,
 995                                             netbk->grant_tx_handle[pending_idx]);
 996                         gop++;
 997                 }
 998
 999         } while (dp != netbk->dealloc_prod);
1000
1001         netbk->dealloc_cons = dc;
1002
1003         if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
1004                                       netbk->tx_unmap_ops,
1005                                       gop - netbk->tx_unmap_ops))
1006                 BUG();
1007
1008         /* Copy any entries that have been pending for too long. */
1009         if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
1010             !list_empty(&netbk->pending_inuse_head)) {
1011                 list_for_each_entry_safe(inuse, n, &netbk->pending_inuse_head, list) {
1012                         struct pending_tx_info *pending_tx_info
1013                                 = netbk->pending_tx_info;
1014
1015                         if (time_after(inuse->alloc_time + HZ / 2, jiffies))
1016                                 break;
1017
1018                         pending_idx = inuse - netbk->pending_inuse;
1019
1020                         pending_tx_info[pending_idx].netif->nr_copied_skbs++;
1021
1022                         switch (copy_pending_req(netbk, pending_idx)) {
1023                         case 0:
1024                                 list_move_tail(&inuse->list, &list);
1025                                 continue;
1026                         case -EBUSY:
1027                                 list_del_init(&inuse->list);
1028                                 continue;
1029                         case -ENOENT:
1030                                 continue;
1031                         }
1032
1033                         break;
1034                 }
1035         }
1036
1037         list_for_each_entry_safe(inuse, n, &list, list) {
1038                 struct pending_tx_info *pending_tx_info =
1039                         netbk->pending_tx_info;
1040
1041                 pending_idx = inuse - netbk->pending_inuse;
1042                 netif = pending_tx_info[pending_idx].netif;
1043
1044                 make_tx_response(netif, &pending_tx_info[pending_idx].req,
1045                                  XEN_NETIF_RSP_OKAY);
1046
1047                 /* Ready for next use. */
1048                 gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
1049
1050                 netbk->pending_ring[MASK_PEND_IDX(netbk->pending_prod++)] =
1051                         pending_idx;
1052
1053                 netif_put(netif);
1054
1055                 list_del_init(&inuse->list);
1056         }
1057 }
1058
1059 static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
1060 {
1061         RING_IDX cons = netif->tx.req_cons;
1062
1063         do {
1064                 make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
1065                 if (cons >= end)
1066                         break;
1067                 txp = RING_GET_REQUEST(&netif->tx, cons++);
1068         } while (1);
1069         netif->tx.req_cons = cons;
1070         netif_schedule_work(netif);
1071         netif_put(netif);
1072 }
1073
1074 static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
1075                                 netif_tx_request_t *txp, int work_to_do)
1076 {
1077         RING_IDX cons = netif->tx.req_cons;
1078         int frags = 0;
1079
1080         if (!(first->flags & XEN_NETTXF_more_data))
1081                 return 0;
1082
1083         do {
1084                 if (frags >= work_to_do) {
1085                         DPRINTK("Need more frags\n");
1086                         return -frags;
1087                 }
1088
1089                 if (unlikely(frags >= MAX_SKB_FRAGS)) {
1090                         DPRINTK("Too many frags\n");
1091                         return -frags;
1092                 }
1093
1094                 memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
1095                        sizeof(*txp));
1096                 if (txp->size > first->size) {
1097                         DPRINTK("Frags galore\n");
1098                         return -frags;
1099                 }
1100
1101                 first->size -= txp->size;
1102                 frags++;
1103
1104                 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
1105                         DPRINTK("txp->offset: %x, size: %u\n",
1106                                 txp->offset, txp->size);
1107                         return -frags;
1108                 }
1109         } while ((txp++)->flags & XEN_NETTXF_more_data);
1110
1111         return frags;
1112 }
1113
1114 static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
1115                                                   struct sk_buff *skb,
1116                                                   netif_tx_request_t *txp,
1117                                                   gnttab_map_grant_ref_t *mop)
1118 {
1119         struct skb_shared_info *shinfo = skb_shinfo(skb);
1120         skb_frag_t *frags = shinfo->frags;
1121         u16 pending_idx = *(u16 *)skb->data;
1122         int i, start;
1123
1124         /* Skip first skb fragment if it is on same page as header fragment. */
1125         start = (frag_get_pending_idx(frags) == pending_idx);
1126
1127         for (i = start; i < shinfo->nr_frags; i++, txp++) {
1128                 struct xen_netbk *netbk = &xen_netbk[GET_GROUP_INDEX(netif)];
1129                 pending_ring_idx_t index = MASK_PEND_IDX(netbk->pending_cons++);
1130                 struct pending_tx_info *pending_tx_info =
1131                         netbk->pending_tx_info;
1132
1133                 pending_idx = netbk->pending_ring[index];
1134
1135                 gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
1136                                   GNTMAP_host_map | GNTMAP_readonly,
1137                                   txp->gref, netif->domid);
1138
1139                 memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
1140                 netif_get(netif);
1141                 pending_tx_info[pending_idx].netif = netif;
1142                 frag_set_pending_idx(&frags[i], pending_idx);
1143         }
1144
1145         return mop;
1146 }
1147
1148 static int netbk_tx_check_mop(struct xen_netbk *netbk, struct sk_buff *skb,
1149                               gnttab_map_grant_ref_t **mopp)
1150 {
1151         gnttab_map_grant_ref_t *mop = *mopp;
1152         u16 pending_idx = *(u16 *)skb->data;
1153         struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
1154         netif_t *netif = pending_tx_info[pending_idx].netif;
1155         netif_tx_request_t *txp;
1156         struct skb_shared_info *shinfo = skb_shinfo(skb);
1157         int nr_frags = shinfo->nr_frags;
1158         int i, err, start;
1159
1160         /* Check status of header. */
1161         err = mop->status;
1162         if (unlikely(err != GNTST_okay)) {
1163                 pending_ring_idx_t index = MASK_PEND_IDX(netbk->pending_prod++);
1164
1165                 txp = &pending_tx_info[pending_idx].req;
1166                 make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
1167                 netbk->pending_ring[index] = pending_idx;
1168                 netif_put(netif);
1169         } else {
1170                 set_phys_to_machine(idx_to_pfn(netbk, pending_idx),
1171                         FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
1172                 netbk->grant_tx_handle[pending_idx] = mop->handle;
1173         }
1174
1175         /* Skip first skb fragment if it is on same page as header fragment. */
1176         start = (frag_get_pending_idx(shinfo->frags) == pending_idx);
1177
1178         for (i = start; i < nr_frags; i++) {
1179                 int j, newerr;
1180                 pending_ring_idx_t index;
1181
1182                 pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
1183
1184                 /* Check error status: if okay then remember grant handle. */
1185                 newerr = (++mop)->status;
1186                 if (likely(newerr == GNTST_okay)) {
1187                         set_phys_to_machine(idx_to_pfn(netbk, pending_idx),
1188                                 FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
1189                         netbk->grant_tx_handle[pending_idx] = mop->handle;
1190                         /* Had a previous error? Invalidate this fragment. */
1191                         if (unlikely(err != GNTST_okay))
1192                                 netif_idx_release(netbk, pending_idx);
1193                         continue;
1194                 }
1195
1196                 /* Error on this fragment: respond to client with an error. */
1197                 txp = &pending_tx_info[pending_idx].req;
1198                 make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
1199                 index = MASK_PEND_IDX(netbk->pending_prod++);
1200                 netbk->pending_ring[index] = pending_idx;
1201                 netif_put(netif);
1202
1203                 /* Not the first error? Preceding frags already invalidated. */
1204                 if (err != GNTST_okay)
1205                         continue;
1206
1207                 /* First error: invalidate header and preceding fragments. */
1208                 pending_idx = *((u16 *)skb->data);
1209                 netif_idx_release(netbk, pending_idx);
1210                 for (j = start; j < i; j++) {
1211                         pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
1212                         netif_idx_release(netbk, pending_idx);
1213                 }
1214
1215                 /* Remember the error: invalidate all subsequent fragments. */
1216                 err = newerr;
1217         }
1218
1219         *mopp = mop + 1;
1220         return err;
1221 }
1222
1223 static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
1224 {
1225         struct skb_shared_info *shinfo = skb_shinfo(skb);
1226         int nr_frags = shinfo->nr_frags;
1227         int i;
1228
1229         for (i = 0; i < nr_frags; i++) {
1230                 netif_tx_request_t *txp;
1231                 u16 pending_idx = frag_get_pending_idx(shinfo->frags + i);
1232
1233                 netbk->pending_inuse[pending_idx].alloc_time = jiffies;
1234                 list_add_tail(&netbk->pending_inuse[pending_idx].list,
1235                               &netbk->pending_inuse_head);
1236
1237                 txp = &netbk->pending_tx_info[pending_idx].req;
1238                 __skb_fill_page_desc(skb, i, netbk->mmap_pages[pending_idx],
1239                                      txp->offset, txp->size);
1240
1241                 skb->len += txp->size;
1242                 skb->data_len += txp->size;
1243                 skb->truesize += txp->size;
1244         }
1245 }
1246
1247 int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
1248                      int work_to_do)
1249 {
1250         struct netif_extra_info extra;
1251         RING_IDX cons = netif->tx.req_cons;
1252
1253         do {
1254                 if (unlikely(work_to_do-- <= 0)) {
1255                         DPRINTK("Missing extra info\n");
1256                         return -EBADR;
1257                 }
1258
1259                 memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
1260                        sizeof(extra));
1261                 if (unlikely(!extra.type ||
1262                              extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1263                         netif->tx.req_cons = ++cons;
1264                         DPRINTK("Invalid extra type: %d\n", extra.type);
1265                         return -EINVAL;
1266                 }
1267
1268                 memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
1269                 netif->tx.req_cons = ++cons;
1270         } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
1271
1272         return work_to_do;
1273 }
1274
1275 static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
1276 {
1277         if (!gso->u.gso.size) {
1278                 DPRINTK("GSO size must not be zero.\n");
1279                 return -EINVAL;
1280         }
1281
1282         /* Currently only TCPv4 S.O. is supported. */
1283         if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
1284                 DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
1285                 return -EINVAL;
1286         }
1287
1288         skb_shinfo(skb)->gso_size = gso->u.gso.size;
1289         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1290
1291         /* Header must be checked, and gso_segs computed. */
1292         skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1293         skb_shinfo(skb)->gso_segs = 0;
1294
1295         return 0;
1296 }
1297
1298 /* Called after netfront has transmitted */
1299 static void net_tx_action(unsigned long group)
1300 {
1301         struct xen_netbk *netbk = &xen_netbk[group];
1302         struct sk_buff *skb;
1303         netif_t *netif;
1304         netif_tx_request_t txreq;
1305         netif_tx_request_t txfrags[MAX_SKB_FRAGS];
1306         struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
1307         u16 pending_idx;
1308         RING_IDX i;
1309         gnttab_map_grant_ref_t *mop;
1310         unsigned int data_len;
1311         int ret, work_to_do;
1312
1313         net_tx_action_dealloc(netbk);
1314
1315         mop = netbk->tx_map_ops;
1316         BUILD_BUG_ON(MAX_SKB_FRAGS >= MAX_PENDING_REQS);
1317         while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
1318                !list_empty(&netbk->schedule_list)) {
1319                 /* Get a netif from the list with work to do. */
1320                 netif = poll_net_schedule_list(netbk);
1321                 if (!netif)
1322                         continue;
1323
1324                 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
1325                 if (!work_to_do) {
1326                         netif_put(netif);
1327                         continue;
1328                 }
1329
1330                 i = netif->tx.req_cons;
1331                 rmb(); /* Ensure that we see the request before we copy it. */
1332                 memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
1333
1334                 /* Credit-based scheduling. */
1335                 if (txreq.size > netif->remaining_credit) {
1336                         unsigned long now = jiffies;
1337                         unsigned long next_credit =
1338                                 netif->credit_timeout.expires +
1339                                 msecs_to_jiffies(netif->credit_usec / 1000);
1340
1341                         /* Timer could already be pending in rare cases. */
1342                         if (timer_pending(&netif->credit_timeout)) {
1343                                 netif_put(netif);
1344                                 continue;
1345                         }
1346
1347                         /* Passed the point where we can replenish credit? */
1348                         if (time_after_eq(now, next_credit)) {
1349                                 netif->credit_timeout.expires = now;
1350                                 tx_add_credit(netif);
1351                         }
1352
1353                         /* Still too big to send right now? Set a callback. */
1354                         if (txreq.size > netif->remaining_credit) {
1355                                 netif->credit_timeout.data     =
1356                                         (unsigned long)netif;
1357                                 netif->credit_timeout.function =
1358                                         tx_credit_callback;
1359                                 mod_timer(&netif->credit_timeout, next_credit);
1360                                 netif_put(netif);
1361                                 continue;
1362                         }
1363                 }
1364                 netif->remaining_credit -= txreq.size;
1365
1366                 work_to_do--;
1367                 netif->tx.req_cons = ++i;
1368
1369                 memset(extras, 0, sizeof(extras));
1370                 if (txreq.flags & XEN_NETTXF_extra_info) {
1371                         work_to_do = netbk_get_extras(netif, extras,
1372                                                       work_to_do);
1373                         i = netif->tx.req_cons;
1374                         if (unlikely(work_to_do < 0)) {
1375                                 netbk_tx_err(netif, &txreq, i);
1376                                 continue;
1377                         }
1378                 }
1379
1380                 ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
1381                 if (unlikely(ret < 0)) {
1382                         netbk_tx_err(netif, &txreq, i - ret);
1383                         continue;
1384                 }
1385                 i += ret;
1386
1387                 if (unlikely(txreq.size < ETH_HLEN)) {
1388                         DPRINTK("Bad packet size: %d\n", txreq.size);
1389                         netbk_tx_err(netif, &txreq, i);
1390                         continue;
1391                 }
1392
1393                 /* No crossing a page as the payload mustn't fragment. */
1394                 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
1395                         DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
1396                                 txreq.offset, txreq.size,
1397                                 (txreq.offset &~PAGE_MASK) + txreq.size);
1398                         netbk_tx_err(netif, &txreq, i);
1399                         continue;
1400                 }
1401
1402                 pending_idx = netbk->pending_ring[MASK_PEND_IDX(netbk->pending_cons)];
1403
1404                 data_len = (txreq.size > PKT_PROT_LEN &&
1405                             ret < MAX_SKB_FRAGS) ?
1406                         PKT_PROT_LEN : txreq.size;
1407
1408                 skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
1409                                 GFP_ATOMIC | __GFP_NOWARN);
1410                 if (unlikely(skb == NULL)) {
1411                         DPRINTK("Can't allocate a skb in start_xmit.\n");
1412                         netbk_tx_err(netif, &txreq, i);
1413                         break;
1414                 }
1415
1416                 /* Packets passed to netif_rx() must have some headroom. */
1417                 skb_reserve(skb, 16 + NET_IP_ALIGN);
1418
1419                 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1420                         struct netif_extra_info *gso;
1421                         gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1422
1423                         if (netbk_set_skb_gso(skb, gso)) {
1424                                 kfree_skb(skb);
1425                                 netbk_tx_err(netif, &txreq, i);
1426                                 continue;
1427                         }
1428                 }
1429
1430                 gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
1431                                   GNTMAP_host_map | GNTMAP_readonly,
1432                                   txreq.gref, netif->domid);
1433                 mop++;
1434
1435                 memcpy(&netbk->pending_tx_info[pending_idx].req,
1436                        &txreq, sizeof(txreq));
1437                 netbk->pending_tx_info[pending_idx].netif = netif;
1438                 *((u16 *)skb->data) = pending_idx;
1439
1440                 __skb_put(skb, data_len);
1441
1442                 skb_shinfo(skb)->nr_frags = ret;
1443                 if (data_len < txreq.size)
1444                         skb_shinfo(skb)->nr_frags++;
1445                 else
1446                         pending_idx = INVALID_PENDING_IDX;
1447                 frag_set_pending_idx(skb_shinfo(skb)->frags, pending_idx);
1448
1449                 __skb_queue_tail(&netbk->tx_queue, skb);
1450
1451                 netbk->pending_cons++;
1452
1453                 mop = netbk_get_requests(netif, skb, txfrags, mop);
1454
1455                 netif->tx.req_cons = i;
1456                 netif_schedule_work(netif);
1457
1458                 if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
1459                         break;
1460         }
1461
1462         if (mop == netbk->tx_map_ops)
1463                 goto out;
1464
1465     /* NOTE: some maps may fail with GNTST_eagain, which could be successfully
1466      * retried in the backend after a delay. However, we can also fail the tx
1467      * req and let the frontend resend the relevant packet again. This is fine
1468      * because it is unlikely that a network buffer will be paged out or shared,
1469      * and therefore it is unlikely to fail with GNTST_eagain. */
1470         ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
1471                                         netbk->tx_map_ops,
1472                                         mop - netbk->tx_map_ops);
1473         BUG_ON(ret);
1474
1475         mop = netbk->tx_map_ops;
1476         while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
1477                 struct net_device *dev;
1478                 netif_tx_request_t *txp;
1479
1480                 pending_idx = *((u16 *)skb->data);
1481                 netif       = netbk->pending_tx_info[pending_idx].netif;
1482                 dev         = netif->dev;
1483                 txp         = &netbk->pending_tx_info[pending_idx].req;
1484
1485                 /* Check the remap error code. */
1486                 if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
1487                         DPRINTK("netback grant failed.\n");
1488                         skb_shinfo(skb)->nr_frags = 0;
1489                         kfree_skb(skb);
1490                         dev->stats.rx_dropped++;
1491                         continue;
1492                 }
1493
1494                 data_len = skb->len;
1495                 memcpy(skb->data,
1496                        (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
1497                        data_len);
1498                 if (data_len < txp->size) {
1499                         /* Append the packet payload as a fragment. */
1500                         txp->offset += data_len;
1501                         txp->size -= data_len;
1502                 } else {
1503                         /* Schedule a response immediately. */
1504                         netif_idx_release(netbk, pending_idx);
1505                 }
1506
1507                 if (txp->flags & XEN_NETTXF_csum_blank)
1508                         skb->ip_summed = CHECKSUM_PARTIAL;
1509                 else if (txp->flags & XEN_NETTXF_data_validated)
1510                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1511                 else
1512                         skb->ip_summed = CHECKSUM_NONE;
1513
1514                 netbk_fill_frags(netbk, skb);
1515
1516                 /*
1517                  * If the initial fragment was < PKT_PROT_LEN then
1518                  * pull through some bytes from the other fragments to
1519                  * increase the linear region to PKT_PROT_LEN bytes.
1520                  */
1521                 if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
1522                         int target = min_t(int, skb->len, PKT_PROT_LEN);
1523                         __pskb_pull_tail(skb, target - skb_headlen(skb));
1524                 }
1525
1526                 skb->protocol = eth_type_trans(skb, dev);
1527
1528                 if (skb_checksum_setup(skb, &netif->rx_gso_csum_fixups)) {
1529                         DPRINTK("Can't setup checksum in net_tx_action\n");
1530                         kfree_skb(skb);
1531                         continue;
1532                 }
1533
1534                 if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
1535                     unlikely(skb_linearize(skb))) {
1536                         DPRINTK("Can't linearize skb in net_tx_action.\n");
1537                         kfree_skb(skb);
1538                         dev->stats.rx_errors++;
1539                         continue;
1540                 }
1541
1542                 dev->stats.rx_bytes += skb->len;
1543                 dev->stats.rx_packets++;
1544
1545                 if (use_kthreads)
1546                         netif_rx_ni(skb);
1547                 else
1548                         netif_rx(skb);
1549         }
1550
1551  out:
1552         if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
1553             !list_empty(&netbk->pending_inuse_head)) {
1554                 struct netbk_tx_pending_inuse *oldest;
1555
1556                 oldest = list_entry(netbk->pending_inuse_head.next,
1557                                     struct netbk_tx_pending_inuse, list);
1558                 mod_timer(&netbk->tx_pending_timer, oldest->alloc_time + HZ);
1559         }
1560 }
1561
1562 static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
1563 {
1564         unsigned long flags;
1565
1566         spin_lock_irqsave(&netbk->release_lock, flags);
1567         netbk->dealloc_ring[MASK_PEND_IDX(netbk->dealloc_prod)] = pending_idx;
1568         /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
1569         smp_wmb();
1570         netbk->dealloc_prod++;
1571         spin_unlock_irqrestore(&netbk->release_lock, flags);
1572
1573         netbk_schedule(netbk);
1574 }
1575
1576 static void netif_page_release(struct page *page, unsigned int order)
1577 {
1578         unsigned int idx = netif_page_index(page);
1579         unsigned int group = netif_page_group(page);
1580         struct xen_netbk *netbk = &xen_netbk[group];
1581
1582         BUG_ON(order);
1583         BUG_ON(group >= netbk_nr_groups || idx >= MAX_PENDING_REQS);
1584         BUG_ON(netbk->mmap_pages[idx] != page);
1585         netif_idx_release(netbk, idx);
1586 }
1587
1588 irqreturn_t netif_be_int(int irq, void *dev_id)
1589 {
1590         netif_t *netif = dev_id;
1591         unsigned int group = GET_GROUP_INDEX(netif);
1592
1593         if (unlikely(group >= netbk_nr_groups)) {
1594                 /*
1595                  * Short of having a way to bind the IRQ in disabled mode
1596                  * (IRQ_NOAUTOEN), we have to ignore the first invocation(s)
1597                  * (before we got assigned to a group).
1598                  */
1599                 BUG_ON(group != UINT_MAX);
1600                 return IRQ_HANDLED;
1601         }
1602
1603         add_to_net_schedule_list_tail(netif);
1604         maybe_schedule_tx_action(group);
1605
1606         if (netif_schedulable(netif) && !netbk_queue_full(netif))
1607                 netif_wake_queue(netif->dev);
1608
1609         return IRQ_HANDLED;
1610 }
1611
1612 static void make_tx_response(netif_t *netif,
1613                              netif_tx_request_t *txp,
1614                              s8       st)
1615 {
1616         RING_IDX i = netif->tx.rsp_prod_pvt;
1617         netif_tx_response_t *resp;
1618         int notify;
1619
1620         resp = RING_GET_RESPONSE(&netif->tx, i);
1621         resp->id     = txp->id;
1622         resp->status = st;
1623
1624         if (txp->flags & XEN_NETTXF_extra_info)
1625                 RING_GET_RESPONSE(&netif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
1626
1627         netif->tx.rsp_prod_pvt = ++i;
1628         RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
1629         if (notify)
1630                 notify_remote_via_irq(netif->irq);
1631
1632 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
1633         if (i == netif->tx.req_cons) {
1634                 int more_to_do;
1635                 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
1636                 if (more_to_do)
1637                         add_to_net_schedule_list_tail(netif);
1638         }
1639 #endif
1640 }
1641
1642 static netif_rx_response_t *make_rx_response(netif_t *netif,
1643                                              u16      id,
1644                                              s8       st,
1645                                              u16      offset,
1646                                              u16      size,
1647                                              u16      flags)
1648 {
1649         RING_IDX i = netif->rx.rsp_prod_pvt;
1650         netif_rx_response_t *resp;
1651
1652         resp = RING_GET_RESPONSE(&netif->rx, i);
1653         resp->offset     = offset;
1654         resp->flags      = flags;
1655         resp->id         = id;
1656         resp->status     = (s16)size;
1657         if (st < 0)
1658                 resp->status = (s16)st;
1659
1660         netif->rx.rsp_prod_pvt = ++i;
1661
1662         return resp;
1663 }
1664
1665 #ifdef NETBE_DEBUG_INTERRUPT
1666 static irqreturn_t netif_be_dbg(int irq, void *dev_id)
1667 {
1668         netif_t *netif;
1669         unsigned int i = 0, group;
1670
1671         pr_alert("netif_schedule_list:\n");
1672
1673         for (group = 0; group < netbk_nr_groups; ++group) {
1674                 struct xen_netbk *netbk = &xen_netbk[group];
1675
1676                 spin_lock_irq(&netbk->schedule_list_lock);
1677
1678                 list_for_each_entry(netif, &netbk->schedule_list, list) {
1679                         pr_alert(" %d: private(rx_req_cons=%08x "
1680                                  "rx_resp_prod=%08x\n", i,
1681                                  netif->rx.req_cons, netif->rx.rsp_prod_pvt);
1682                         pr_alert("   tx_req_cons=%08x tx_resp_prod=%08x)\n",
1683                                  netif->tx.req_cons, netif->tx.rsp_prod_pvt);
1684                         pr_alert("   shared(rx_req_prod=%08x "
1685                                  "rx_resp_prod=%08x\n",
1686                                  netif->rx.sring->req_prod,
1687                                  netif->rx.sring->rsp_prod);
1688                         pr_alert("   rx_event=%08x tx_req_prod=%08x\n",
1689                                  netif->rx.sring->rsp_event,
1690                                  netif->tx.sring->req_prod);
1691                         pr_alert("   tx_resp_prod=%08x, tx_event=%08x)\n",
1692                                  netif->tx.sring->rsp_prod,
1693                                  netif->tx.sring->rsp_event);
1694                         i++;
1695                 }
1696
1697                 spin_unlock_irq(&netbk->netbk->schedule_list_lock);
1698         }
1699
1700         pr_alert(" ** End of netif_schedule_list **\n");
1701
1702         return IRQ_HANDLED;
1703 }
1704
1705 static struct irqaction netif_be_dbg_action = {
1706         .handler = netif_be_dbg,
1707         .flags   = IRQF_SHARED,
1708         .name    = "net-be-dbg"
1709 };
1710 #endif
1711
1712 static inline int rx_work_todo(struct xen_netbk *netbk)
1713 {
1714         return !skb_queue_empty(&netbk->rx_queue);
1715 }
1716
1717 static inline int tx_work_todo(struct xen_netbk *netbk)
1718 {
1719         if (netbk->dealloc_cons != netbk->dealloc_prod)
1720                 return 1;
1721
1722         if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
1723             !list_empty(&netbk->pending_inuse_head))
1724                 return 1;
1725
1726         if (nr_pending_reqs(netbk) + MAX_SKB_FRAGS < MAX_PENDING_REQS &&
1727             !list_empty(&netbk->schedule_list))
1728                 return 1;
1729
1730         return 0;
1731 }
1732
1733 static int netbk_action_thread(void *index)
1734 {
1735         unsigned long group = (unsigned long)index;
1736         struct xen_netbk *netbk = &xen_netbk[group];
1737
1738         while (!kthread_should_stop()) {
1739                 wait_event_interruptible(netbk->netbk_action_wq,
1740                                          rx_work_todo(netbk) ||
1741                                          tx_work_todo(netbk) ||
1742                                          kthread_should_stop());
1743                 cond_resched();
1744
1745                 if (rx_work_todo(netbk))
1746                         net_rx_action(group);
1747
1748                 if (tx_work_todo(netbk))
1749                         net_tx_action(group);
1750         }
1751
1752         return 0;
1753 }
1754
1755
1756 static int __init netback_init(void)
1757 {
1758         unsigned int i, group;
1759         int rc;
1760         struct page *page;
1761
1762         if (!is_running_on_xen())
1763                 return -ENODEV;
1764
1765         group = netbk_nr_groups;
1766         if (!netbk_nr_groups)
1767                 netbk_nr_groups = (num_online_cpus() + 1) / 2;
1768         if (netbk_nr_groups > MAX_GROUPS)
1769                 netbk_nr_groups = MAX_GROUPS;
1770
1771         do {
1772                 xen_netbk = vzalloc(netbk_nr_groups * sizeof(*xen_netbk));
1773         } while (!xen_netbk && (netbk_nr_groups >>= 1));
1774         if (!xen_netbk)
1775                 return -ENOMEM;
1776         if (group && netbk_nr_groups != group)
1777                 pr_warn("netback: only using %u (instead of %u) groups\n",
1778                         netbk_nr_groups, group);
1779
1780         /* We can increase reservation by this much in net_rx_action(). */
1781         balloon_update_driver_allowance(netbk_nr_groups * NET_RX_RING_SIZE);
1782
1783         for (group = 0; group < netbk_nr_groups; group++) {
1784                 struct xen_netbk *netbk = &xen_netbk[group];
1785
1786                 skb_queue_head_init(&netbk->rx_queue);
1787                 skb_queue_head_init(&netbk->tx_queue);
1788
1789                 init_timer(&netbk->net_timer);
1790                 netbk->net_timer.data = group;
1791                 netbk->net_timer.function = netbk_schedule_group;
1792
1793                 init_timer(&netbk->tx_pending_timer);
1794                 netbk->tx_pending_timer.data = group;
1795                 netbk->tx_pending_timer.function = netbk_schedule_group;
1796
1797                 netbk->pending_prod = MAX_PENDING_REQS;
1798
1799                 INIT_LIST_HEAD(&netbk->pending_inuse_head);
1800                 INIT_LIST_HEAD(&netbk->schedule_list);
1801
1802                 spin_lock_init(&netbk->schedule_list_lock);
1803                 spin_lock_init(&netbk->release_lock);
1804
1805                 netbk->mmap_pages =
1806                         alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
1807                 if (netbk->mmap_pages == NULL) {
1808                         pr_err("%s: out of memory\n", __func__);
1809                         rc = -ENOMEM;
1810                         goto failed_init;
1811                 }
1812
1813                 for (i = 0; i < MAX_PENDING_REQS; i++) {
1814                         page = netbk->mmap_pages[i];
1815                         SetPageForeign(page, netif_page_release);
1816                         netif_set_page_ext(page, group, i);
1817                         netbk->pending_ring[i] = i;
1818                         INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
1819                 }
1820
1821                 if (use_kthreads) {
1822                         init_waitqueue_head(&netbk->netbk_action_wq);
1823                         netbk->task = kthread_create(netbk_action_thread,
1824                                                      (void *)(long)group,
1825                                                      "netback/%u", group);
1826
1827                         if (IS_ERR(netbk->task)) {
1828                                 pr_err("netback: kthread_create() failed\n");
1829                                 rc = PTR_ERR(netbk->task);
1830                                 goto failed_init;
1831                         }
1832                         if (bind_threads)
1833                                 kthread_bind(netbk->task,
1834                                              group % num_online_cpus());
1835                         wake_up_process(netbk->task);
1836                 } else {
1837                         tasklet_init(&netbk->net_tx_tasklet, net_tx_action, group);
1838                         tasklet_init(&netbk->net_rx_tasklet, net_rx_action, group);
1839                 }
1840         }
1841
1842         netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
1843         if (MODPARM_copy_skb) {
1844                 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
1845                                               NULL, 0))
1846                         netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
1847                 else
1848                         netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
1849         }
1850
1851         netif_accel_init();
1852
1853         netif_xenbus_init();
1854
1855 #ifdef NETBE_DEBUG_INTERRUPT
1856         (void)bind_virq_to_irqaction(VIRQ_DEBUG,
1857                                      0,
1858                                      &netif_be_dbg_action);
1859 #endif
1860
1861         return 0;
1862
1863 failed_init:
1864         do {
1865                 struct xen_netbk *netbk = &xen_netbk[group];
1866
1867                 if (use_kthreads && netbk->task && !IS_ERR(netbk->task))
1868                         kthread_stop(netbk->task);
1869                 if (netbk->mmap_pages)
1870                         free_empty_pages_and_pagevec(netbk->mmap_pages,
1871                                                      MAX_PENDING_REQS);
1872         } while (group--);
1873         vfree(xen_netbk);
1874         balloon_update_driver_allowance(-(long)netbk_nr_groups
1875                                         * NET_RX_RING_SIZE);
1876
1877         return rc;
1878 }
1879
1880 module_init(netback_init);
1881
1882 MODULE_LICENSE("Dual BSD/GPL");
1883 MODULE_ALIAS("xen-backend:vif");