Update to 3.4-final.
[linux-flexiantxendom0-3.2.10.git] / drivers / xen / netfront / netfront.c
1 /******************************************************************************
2  * Virtual network driver for conversing with remote driver backends.
3  *
4  * Copyright (c) 2002-2005, K A Fraser
5  * Copyright (c) 2005, XenSource Ltd
6  * Copyright (C) 2007 Solarflare Communications, Inc.
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License version 2
10  * as published by the Free Software Foundation; or, when distributed
11  * separately from the Linux kernel or incorporated into other
12  * software packages, subject to the following license:
13  *
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this source file (the "Software"), to deal in the Software without
16  * restriction, including without limitation the rights to use, copy, modify,
17  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18  * and to permit persons to whom the Software is furnished to do so, subject to
19  * the following conditions:
20  *
21  * The above copyright notice and this permission notice shall be included in
22  * all copies or substantial portions of the Software.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30  * IN THE SOFTWARE.
31  */
32
33 #include <linux/module.h>
34 #include <linux/version.h>
35 #include <linux/kernel.h>
36 #include <linux/sched.h>
37 #include <linux/slab.h>
38 #include <linux/string.h>
39 #include <linux/errno.h>
40 #include <linux/netdevice.h>
41 #include <linux/inetdevice.h>
42 #include <linux/etherdevice.h>
43 #include <linux/skbuff.h>
44 #include <linux/init.h>
45 #include <linux/bitops.h>
46 #include <linux/ethtool.h>
47 #include <linux/in.h>
48 #include <linux/if_ether.h>
49 #include <linux/io.h>
50 #include <linux/moduleparam.h>
51 #include <net/sock.h>
52 #include <net/pkt_sched.h>
53 #include <net/route.h>
54 #include <asm/uaccess.h>
55 #include <xen/evtchn.h>
56 #include <xen/xenbus.h>
57 #include <xen/interface/io/netif.h>
58 #include <xen/interface/memory.h>
59 #include <xen/balloon.h>
60 #include <asm/page.h>
61 #include <asm/maddr.h>
62 #include <asm/uaccess.h>
63 #include <xen/interface/grant_table.h>
64 #include <xen/gnttab.h>
65 #include <xen/net-util.h>
66
67 struct netfront_cb {
68         struct page *page;
69         unsigned offset;
70 };
71
72 #define NETFRONT_SKB_CB(skb)    ((struct netfront_cb *)((skb)->cb))
73
74 #include "netfront.h"
75
76 /*
77  * Mutually-exclusive module options to select receive data path:
78  *  rx_copy : Packets are copied by network backend into local memory
79  *  rx_flip : Page containing packet data is transferred to our ownership
80  * For fully-virtualised guests there is no option - copying must be used.
81  * For paravirtualised guests, flipping is the default.
82  */
83 #ifdef CONFIG_XEN
84 static bool MODPARM_rx_copy;
85 module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
86 MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
87 static bool MODPARM_rx_flip;
88 module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
89 MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
90 #else
91 # define MODPARM_rx_copy true
92 # define MODPARM_rx_flip false
93 #endif
94
95 #define RX_COPY_THRESHOLD 256
96
97 /* If we don't have GSO, fake things up so that we never try to use it. */
98 #if defined(NETIF_F_GSO)
99 #define HAVE_GSO                        1
100 #define HAVE_TSO                        1 /* TSO is a subset of GSO */
101 #define HAVE_CSUM_OFFLOAD               1
102 static inline void dev_disable_gso_features(struct net_device *dev)
103 {
104         /* Turn off all GSO bits except ROBUST. */
105         dev->features &= ~NETIF_F_GSO_MASK;
106         dev->features |= NETIF_F_GSO_ROBUST;
107 }
108 #elif defined(NETIF_F_TSO)
109 #define HAVE_GSO                       0
110 #define HAVE_TSO                       1
111
112 /* Some older kernels cannot cope with incorrect checksums,
113  * particularly in netfilter. I'm not sure there is 100% correlation
114  * with the presence of NETIF_F_TSO but it appears to be a good first
115  * approximiation.
116  */
117 #define HAVE_CSUM_OFFLOAD              0
118
119 #define gso_size tso_size
120 #define gso_segs tso_segs
121 static inline void dev_disable_gso_features(struct net_device *dev)
122 {
123        /* Turn off all TSO bits. */
124        dev->features &= ~NETIF_F_TSO;
125 }
126 static inline int skb_is_gso(const struct sk_buff *skb)
127 {
128         return skb_shinfo(skb)->tso_size;
129 }
130 static inline int skb_gso_ok(struct sk_buff *skb, int features)
131 {
132         return (features & NETIF_F_TSO);
133 }
134
135 #define netif_skb_features(skb) ((skb)->dev->features)
136 static inline int netif_needs_gso(struct sk_buff *skb, int features)
137 {
138         return skb_is_gso(skb) &&
139                (!skb_gso_ok(skb, features) ||
140                 unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
141 }
142 #else
143 #define HAVE_GSO                        0
144 #define HAVE_TSO                        0
145 #define HAVE_CSUM_OFFLOAD               0
146 #define netif_needs_gso(skb, feat)      0
147 #define dev_disable_gso_features(dev)   ((void)0)
148 #define ethtool_op_set_tso(dev, data)   (-ENOSYS)
149 #endif
150
151 struct netfront_rx_info {
152         struct netif_rx_response rx;
153         struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
154 };
155
156 /*
157  * Implement our own carrier flag: the network stack's version causes delays
158  * when the carrier is re-enabled (in particular, dev_activate() may not
159  * immediately be called, which can cause packet loss).
160  */
161 #define netfront_carrier_on(netif)      ((netif)->carrier = 1)
162 #define netfront_carrier_off(netif)     ((netif)->carrier = 0)
163 #define netfront_carrier_ok(netif)      ((netif)->carrier)
164
165 /*
166  * Access macros for acquiring freeing slots in tx_skbs[].
167  */
168
169 static inline void add_id_to_freelist(struct sk_buff **list, unsigned short id)
170 {
171         list[id] = list[0];
172         list[0]  = (void *)(unsigned long)id;
173 }
174
175 static inline unsigned short get_id_from_freelist(struct sk_buff **list)
176 {
177         unsigned int id = (unsigned int)(unsigned long)list[0];
178         list[0] = list[id];
179         return id;
180 }
181
182 static inline int xennet_rxidx(RING_IDX idx)
183 {
184         return idx & (NET_RX_RING_SIZE - 1);
185 }
186
187 static inline struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
188                                                 RING_IDX ri)
189 {
190         int i = xennet_rxidx(ri);
191         struct sk_buff *skb = np->rx_skbs[i];
192         np->rx_skbs[i] = NULL;
193         return skb;
194 }
195
196 static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
197                                             RING_IDX ri)
198 {
199         int i = xennet_rxidx(ri);
200         grant_ref_t ref = np->grant_rx_ref[i];
201         np->grant_rx_ref[i] = GRANT_INVALID_REF;
202         return ref;
203 }
204
205 #define DPRINTK(fmt, args...)                           \
206         pr_debug("netfront (%s:%d) " fmt,               \
207                  __FUNCTION__, __LINE__, ##args)
208 #define IPRINTK(fmt, args...) pr_info("netfront: " fmt, ##args)
209 #define WPRINTK(fmt, args...) pr_warning("netfront: " fmt, ##args)
210
211 static int setup_device(struct xenbus_device *, struct netfront_info *);
212 static struct net_device *create_netdev(struct xenbus_device *);
213
214 static void end_access(int, void *);
215 static void netif_release_rings(struct netfront_info *);
216 static void netif_disconnect_backend(struct netfront_info *);
217
218 static int network_connect(struct net_device *);
219 static void network_tx_buf_gc(struct net_device *);
220 static void network_alloc_rx_buffers(struct net_device *);
221
222 static irqreturn_t netif_int(int irq, void *dev_id);
223
224 #ifdef CONFIG_SYSFS
225 static int xennet_sysfs_addif(struct net_device *netdev);
226 static void xennet_sysfs_delif(struct net_device *netdev);
227 #else /* !CONFIG_SYSFS */
228 #define xennet_sysfs_addif(dev) (0)
229 #define xennet_sysfs_delif(dev) do { } while(0)
230 #endif
231
232 static inline bool xennet_can_sg(struct net_device *dev)
233 {
234         return dev->features & NETIF_F_SG;
235 }
236
237 /*
238  * Work around net.ipv4.conf.*.arp_notify not being enabled by default.
239  */
240 static void __devinit netfront_enable_arp_notify(struct netfront_info *info)
241 {
242 #ifdef CONFIG_INET
243         struct in_device *in_dev;
244
245         rtnl_lock();
246         in_dev = __in_dev_get_rtnl(info->netdev);
247         if (in_dev && !IN_DEV_CONF_GET(in_dev, ARP_NOTIFY))
248                 IN_DEV_CONF_SET(in_dev, ARP_NOTIFY, 1);
249         rtnl_unlock();
250         if (!in_dev)
251                 pr_warn("Cannot enable ARP notification on %s\n",
252                         info->xbdev->nodename);
253 #endif
254 }
255
256 /**
257  * Entry point to this code when a new device is created.  Allocate the basic
258  * structures and the ring buffers for communication with the backend, and
259  * inform the backend of the appropriate details for those.
260  */
261 static int __devinit netfront_probe(struct xenbus_device *dev,
262                                     const struct xenbus_device_id *id)
263 {
264         int err;
265         struct net_device *netdev;
266         struct netfront_info *info;
267
268         netdev = create_netdev(dev);
269         if (IS_ERR(netdev)) {
270                 err = PTR_ERR(netdev);
271                 xenbus_dev_fatal(dev, err, "creating netdev");
272                 return err;
273         }
274
275         info = netdev_priv(netdev);
276         dev_set_drvdata(&dev->dev, info);
277
278         err = register_netdev(info->netdev);
279         if (err) {
280                 pr_warning("%s: register_netdev err=%d\n",
281                            __FUNCTION__, err);
282                 goto fail;
283         }
284
285         netfront_enable_arp_notify(info);
286
287         err = xennet_sysfs_addif(info->netdev);
288         if (err) {
289                 unregister_netdev(info->netdev);
290                 pr_warning("%s: add sysfs failed err=%d\n",
291                            __FUNCTION__, err);
292                 goto fail;
293         }
294
295         return 0;
296
297  fail:
298         free_netdev(netdev);
299         dev_set_drvdata(&dev->dev, NULL);
300         return err;
301 }
302
303 static int __devexit netfront_remove(struct xenbus_device *dev)
304 {
305         struct netfront_info *info = dev_get_drvdata(&dev->dev);
306
307         DPRINTK("%s\n", dev->nodename);
308
309         netfront_accelerator_call_remove(info, dev);
310
311         netif_disconnect_backend(info);
312
313         del_timer_sync(&info->rx_refill_timer);
314
315         xennet_sysfs_delif(info->netdev);
316
317         unregister_netdev(info->netdev);
318
319         free_percpu(info->stats);
320
321         free_netdev(info->netdev);
322
323         return 0;
324 }
325
326
327 static int netfront_suspend(struct xenbus_device *dev)
328 {
329         struct netfront_info *info = dev_get_drvdata(&dev->dev);
330         return netfront_accelerator_suspend(info, dev);
331 }
332
333
334 static int netfront_suspend_cancel(struct xenbus_device *dev)
335 {
336         struct netfront_info *info = dev_get_drvdata(&dev->dev);
337         return netfront_accelerator_suspend_cancel(info, dev);
338 }
339
340
341 /**
342  * We are reconnecting to the backend, due to a suspend/resume, or a backend
343  * driver restart.  We tear down our netif structure and recreate it, but
344  * leave the device-layer structures intact so that this is transparent to the
345  * rest of the kernel.
346  */
347 static int netfront_resume(struct xenbus_device *dev)
348 {
349         struct netfront_info *info = dev_get_drvdata(&dev->dev);
350
351         DPRINTK("%s\n", dev->nodename);
352
353         netfront_accelerator_resume(info, dev);
354
355         netif_disconnect_backend(info);
356         return 0;
357 }
358
359 static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
360 {
361         char *s, *e, *macstr;
362         int i;
363
364         macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
365         if (IS_ERR(macstr))
366                 return PTR_ERR(macstr);
367
368         for (i = 0; i < ETH_ALEN; i++) {
369                 mac[i] = simple_strtoul(s, &e, 16);
370                 if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
371                         kfree(macstr);
372                         return -ENOENT;
373                 }
374                 s = e+1;
375         }
376
377         kfree(macstr);
378         return 0;
379 }
380
381 /* Common code used when first setting up, and when resuming. */
382 static int talk_to_backend(struct xenbus_device *dev,
383                            struct netfront_info *info)
384 {
385         const char *message;
386         struct xenbus_transaction xbt;
387         int err;
388
389         /* Read mac only in the first setup. */
390         if (!is_valid_ether_addr(info->mac)) {
391                 err = xen_net_read_mac(dev, info->mac);
392                 if (err) {
393                         xenbus_dev_fatal(dev, err, "parsing %s/mac",
394                                          dev->nodename);
395                         goto out;
396                 }
397         }
398
399         /* Create shared ring, alloc event channel. */
400         err = setup_device(dev, info);
401         if (err)
402                 goto out;
403
404         /* This will load an accelerator if one is configured when the
405          * watch fires */
406         netfront_accelerator_add_watch(info);
407
408 again:
409         err = xenbus_transaction_start(&xbt);
410         if (err) {
411                 xenbus_dev_fatal(dev, err, "starting transaction");
412                 goto destroy_ring;
413         }
414
415         err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
416                             info->tx_ring_ref);
417         if (err) {
418                 message = "writing tx ring-ref";
419                 goto abort_transaction;
420         }
421         err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
422                             info->rx_ring_ref);
423         if (err) {
424                 message = "writing rx ring-ref";
425                 goto abort_transaction;
426         }
427         err = xenbus_printf(xbt, dev->nodename,
428                             "event-channel", "%u",
429                             irq_to_evtchn_port(info->irq));
430         if (err) {
431                 message = "writing event-channel";
432                 goto abort_transaction;
433         }
434
435         err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
436                             info->copying_receiver);
437         if (err) {
438                 message = "writing request-rx-copy";
439                 goto abort_transaction;
440         }
441
442         err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
443         if (err) {
444                 message = "writing feature-rx-notify";
445                 goto abort_transaction;
446         }
447
448         err = xenbus_printf(xbt, dev->nodename, "feature-no-csum-offload",
449                             "%d", !HAVE_CSUM_OFFLOAD);
450         if (err) {
451                 message = "writing feature-no-csum-offload";
452                 goto abort_transaction;
453         }
454
455         err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
456         if (err) {
457                 message = "writing feature-sg";
458                 goto abort_transaction;
459         }
460
461         err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d",
462                             HAVE_TSO);
463         if (err) {
464                 message = "writing feature-gso-tcpv4";
465                 goto abort_transaction;
466         }
467
468         err = xenbus_transaction_end(xbt, 0);
469         if (err) {
470                 if (err == -EAGAIN)
471                         goto again;
472                 xenbus_dev_fatal(dev, err, "completing transaction");
473                 goto destroy_ring;
474         }
475
476         return 0;
477
478  abort_transaction:
479         xenbus_transaction_end(xbt, 1);
480         xenbus_dev_fatal(dev, err, "%s", message);
481  destroy_ring:
482         netfront_accelerator_call_remove(info, dev);
483         netif_disconnect_backend(info);
484  out:
485         return err;
486 }
487
488 static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
489 {
490         struct netif_tx_sring *txs;
491         struct netif_rx_sring *rxs;
492         int err;
493         struct net_device *netdev = info->netdev;
494
495         info->tx_ring_ref = GRANT_INVALID_REF;
496         info->rx_ring_ref = GRANT_INVALID_REF;
497         info->rx.sring = NULL;
498         info->tx.sring = NULL;
499         info->irq = 0;
500
501         txs = (struct netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
502         if (!txs) {
503                 err = -ENOMEM;
504                 xenbus_dev_fatal(dev, err, "allocating tx ring page");
505                 goto fail;
506         }
507         SHARED_RING_INIT(txs);
508         FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
509
510         err = xenbus_grant_ring(dev, virt_to_mfn(txs));
511         if (err < 0) {
512                 free_page((unsigned long)txs);
513                 goto fail;
514         }
515         info->tx_ring_ref = err;
516
517         rxs = (struct netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
518         if (!rxs) {
519                 err = -ENOMEM;
520                 xenbus_dev_fatal(dev, err, "allocating rx ring page");
521                 goto fail;
522         }
523         SHARED_RING_INIT(rxs);
524         FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
525
526         err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
527         if (err < 0) {
528                 free_page((unsigned long)rxs);
529                 goto fail;
530         }
531         info->rx_ring_ref = err;
532
533         memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
534
535         err = bind_listening_port_to_irqhandler(
536                 dev->otherend_id, netif_int, 0, netdev->name, netdev);
537         if (err < 0)
538                 goto fail;
539         info->irq = err;
540
541         return 0;
542
543  fail:
544         netif_release_rings(info);
545         return err;
546 }
547
548 /**
549  * Callback received when the backend's state changes.
550  */
551 static void backend_changed(struct xenbus_device *dev,
552                             enum xenbus_state backend_state)
553 {
554         struct netfront_info *np = dev_get_drvdata(&dev->dev);
555         struct net_device *netdev = np->netdev;
556
557         DPRINTK("%s\n", xenbus_strstate(backend_state));
558
559         switch (backend_state) {
560         case XenbusStateInitialising:
561         case XenbusStateInitialised:
562         case XenbusStateReconfiguring:
563         case XenbusStateReconfigured:
564         case XenbusStateUnknown:
565         case XenbusStateClosed:
566                 break;
567
568         case XenbusStateInitWait:
569                 if (dev->state != XenbusStateInitialising)
570                         break;
571                 if (network_connect(netdev) != 0)
572                         break;
573                 xenbus_switch_state(dev, XenbusStateConnected);
574                 break;
575
576         case XenbusStateConnected:
577                 netif_notify_peers(netdev);
578                 break;
579
580         case XenbusStateClosing:
581                 xenbus_frontend_closed(dev);
582                 break;
583         }
584 }
585
586 static inline int netfront_tx_slot_available(struct netfront_info *np)
587 {
588         return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
589                 (TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
590 }
591
592
593 static inline void network_maybe_wake_tx(struct net_device *dev)
594 {
595         struct netfront_info *np = netdev_priv(dev);
596
597         if (unlikely(netif_queue_stopped(dev)) &&
598             netfront_tx_slot_available(np) &&
599             likely(netif_running(dev)) &&
600             netfront_check_accelerator_queue_ready(dev, np))
601                 netif_wake_queue(dev);
602 }
603
604
605 int netfront_check_queue_ready(struct net_device *dev)
606 {
607         struct netfront_info *np = netdev_priv(dev);
608
609         return unlikely(netif_queue_stopped(dev)) &&
610                 netfront_tx_slot_available(np) &&
611                 likely(netif_running(dev));
612 }
613 EXPORT_SYMBOL(netfront_check_queue_ready);
614
615 static int network_open(struct net_device *dev)
616 {
617         struct netfront_info *np = netdev_priv(dev);
618
619         napi_enable(&np->napi);
620
621         spin_lock_bh(&np->rx_lock);
622         if (netfront_carrier_ok(np)) {
623                 network_alloc_rx_buffers(dev);
624                 np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
625                 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){
626                         netfront_accelerator_call_stop_napi_irq(np, dev);
627
628                         napi_schedule(&np->napi);
629                 }
630         }
631         spin_unlock_bh(&np->rx_lock);
632
633         netif_start_queue(dev);
634
635         return 0;
636 }
637
638 static void network_tx_buf_gc(struct net_device *dev)
639 {
640         RING_IDX cons, prod;
641         unsigned short id;
642         struct netfront_info *np = netdev_priv(dev);
643         struct sk_buff *skb;
644
645         BUG_ON(!netfront_carrier_ok(np));
646
647         do {
648                 prod = np->tx.sring->rsp_prod;
649                 rmb(); /* Ensure we see responses up to 'rp'. */
650
651                 for (cons = np->tx.rsp_cons; cons != prod; cons++) {
652                         struct netif_tx_response *txrsp;
653
654                         txrsp = RING_GET_RESPONSE(&np->tx, cons);
655                         if (txrsp->status == XEN_NETIF_RSP_NULL)
656                                 continue;
657
658                         id  = txrsp->id;
659                         skb = np->tx_skbs[id];
660                         if (unlikely(gnttab_query_foreign_access(
661                                 np->grant_tx_ref[id]) != 0)) {
662                                 pr_alert("network_tx_buf_gc: grant still"
663                                          " in use by backend domain\n");
664                                 BUG();
665                         }
666                         gnttab_end_foreign_access_ref(np->grant_tx_ref[id]);
667                         gnttab_release_grant_reference(
668                                 &np->gref_tx_head, np->grant_tx_ref[id]);
669                         np->grant_tx_ref[id] = GRANT_INVALID_REF;
670                         add_id_to_freelist(np->tx_skbs, id);
671                         dev_kfree_skb_irq(skb);
672                 }
673
674                 np->tx.rsp_cons = prod;
675
676                 /*
677                  * Set a new event, then check for race with update of tx_cons.
678                  * Note that it is essential to schedule a callback, no matter
679                  * how few buffers are pending. Even if there is space in the
680                  * transmit ring, higher layers may be blocked because too much
681                  * data is outstanding: in such cases notification from Xen is
682                  * likely to be the only kick that we'll get.
683                  */
684                 np->tx.sring->rsp_event =
685                         prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
686                 mb();
687         } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
688
689         network_maybe_wake_tx(dev);
690 }
691
692 static void rx_refill_timeout(unsigned long data)
693 {
694         struct net_device *dev = (struct net_device *)data;
695         struct netfront_info *np = netdev_priv(dev);
696
697         netfront_accelerator_call_stop_napi_irq(np, dev);
698
699         napi_schedule(&np->napi);
700 }
701
702 static void network_alloc_rx_buffers(struct net_device *dev)
703 {
704         unsigned short id;
705         struct netfront_info *np = netdev_priv(dev);
706         struct sk_buff *skb;
707         struct page *page;
708         int i, batch_target, notify;
709         RING_IDX req_prod = np->rx.req_prod_pvt;
710         struct xen_memory_reservation reservation;
711         grant_ref_t ref;
712         unsigned long pfn;
713         void *vaddr;
714         int nr_flips;
715         netif_rx_request_t *req;
716
717         if (unlikely(!netfront_carrier_ok(np)))
718                 return;
719
720         /*
721          * Allocate skbuffs greedily, even though we batch updates to the
722          * receive ring. This creates a less bursty demand on the memory
723          * allocator, so should reduce the chance of failed allocation requests
724          * both for ourself and for other kernel subsystems.
725          */
726         batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
727         for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
728                 /*
729                  * Allocate an skb and a page. Do not use __dev_alloc_skb as
730                  * that will allocate page-sized buffers which is not
731                  * necessary here.
732                  * 16 bytes added as necessary headroom for netif_receive_skb.
733                  */
734                 skb = alloc_skb(RX_COPY_THRESHOLD + 16 + NET_IP_ALIGN,
735                                 GFP_ATOMIC | __GFP_NOWARN);
736                 if (unlikely(!skb))
737                         goto no_skb;
738
739                 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
740                 if (!page) {
741                         kfree_skb(skb);
742 no_skb:
743                         /* Any skbuffs queued for refill? Force them out. */
744                         if (i != 0)
745                                 goto refill;
746                         /* Could not allocate any skbuffs. Try again later. */
747                         mod_timer(&np->rx_refill_timer,
748                                   jiffies + (HZ/10));
749                         break;
750                 }
751
752                 skb_reserve(skb, 16 + NET_IP_ALIGN); /* mimic dev_alloc_skb() */
753                 __skb_fill_page_desc(skb, 0, page, 0, 0);
754                 skb_shinfo(skb)->nr_frags = 1;
755                 __skb_queue_tail(&np->rx_batch, skb);
756         }
757
758         /* Is the batch large enough to be worthwhile? */
759         if (i < (np->rx_target/2)) {
760                 if (req_prod > np->rx.sring->req_prod)
761                         goto push;
762                 return;
763         }
764
765         /* Adjust our fill target if we risked running out of buffers. */
766         if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
767             ((np->rx_target *= 2) > np->rx_max_target))
768                 np->rx_target = np->rx_max_target;
769
770  refill:
771         for (nr_flips = i = 0; ; i++) {
772                 if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
773                         break;
774
775                 skb->dev = dev;
776
777                 id = xennet_rxidx(req_prod + i);
778
779                 BUG_ON(np->rx_skbs[id]);
780                 np->rx_skbs[id] = skb;
781
782                 ref = gnttab_claim_grant_reference(&np->gref_rx_head);
783                 BUG_ON((signed short)ref < 0);
784                 np->grant_rx_ref[id] = ref;
785
786                 page = skb_frag_page(skb_shinfo(skb)->frags);
787                 pfn = page_to_pfn(page);
788                 vaddr = page_address(page);
789
790                 req = RING_GET_REQUEST(&np->rx, req_prod + i);
791                 if (!np->copying_receiver) {
792                         gnttab_grant_foreign_transfer_ref(ref,
793                                                           np->xbdev->otherend_id,
794                                                           pfn);
795                         np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn);
796                         if (!xen_feature(XENFEAT_auto_translated_physmap)) {
797                                 /* Remove this page before passing
798                                  * back to Xen. */
799                                 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
800                                 MULTI_update_va_mapping(np->rx_mcl+i,
801                                                         (unsigned long)vaddr,
802                                                         __pte(0), 0);
803                         }
804                         nr_flips++;
805                 } else {
806                         gnttab_grant_foreign_access_ref(ref,
807                                                         np->xbdev->otherend_id,
808                                                         pfn_to_mfn(pfn),
809                                                         0);
810                 }
811
812                 req->id = id;
813                 req->gref = ref;
814         }
815
816         if ( nr_flips != 0 ) {
817                 /* Tell the ballon driver what is going on. */
818                 balloon_update_driver_allowance(i);
819
820                 set_xen_guest_handle(reservation.extent_start,
821                                      np->rx_pfn_array);
822                 reservation.nr_extents   = nr_flips;
823                 reservation.extent_order = 0;
824                 reservation.address_bits = 0;
825                 reservation.domid        = DOMID_SELF;
826
827                 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
828                         /* After all PTEs have been zapped, flush the TLB. */
829                         np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
830                                 UVMF_TLB_FLUSH|UVMF_ALL;
831
832                         /* Give away a batch of pages. */
833                         np->rx_mcl[i].op = __HYPERVISOR_memory_op;
834                         np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
835                         np->rx_mcl[i].args[1] = (unsigned long)&reservation;
836
837                         /* Zap PTEs and give away pages in one big
838                          * multicall. */
839                         if (unlikely(HYPERVISOR_multicall(np->rx_mcl, i+1)))
840                                 BUG();
841
842                         /* Check return status of HYPERVISOR_memory_op(). */
843                         if (unlikely(np->rx_mcl[i].result != i))
844                                 panic("Unable to reduce memory reservation\n");
845                         while (nr_flips--)
846                                 BUG_ON(np->rx_mcl[nr_flips].result);
847                 } else {
848                         if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
849                                                  &reservation) != i)
850                                 panic("Unable to reduce memory reservation\n");
851                 }
852         } else {
853                 wmb();
854         }
855
856         /* Above is a suitable barrier to ensure backend will see requests. */
857         np->rx.req_prod_pvt = req_prod + i;
858  push:
859         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
860         if (notify)
861                 notify_remote_via_irq(np->irq);
862 }
863
864 static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
865                               struct netif_tx_request *tx)
866 {
867         struct netfront_info *np = netdev_priv(dev);
868         char *data = skb->data;
869         unsigned long mfn;
870         RING_IDX prod = np->tx.req_prod_pvt;
871         int frags = skb_shinfo(skb)->nr_frags;
872         unsigned int offset = offset_in_page(data);
873         unsigned int len = skb_headlen(skb);
874         unsigned int id;
875         grant_ref_t ref;
876         int i;
877
878         while (len > PAGE_SIZE - offset) {
879                 tx->size = PAGE_SIZE - offset;
880                 tx->flags |= XEN_NETTXF_more_data;
881                 len -= tx->size;
882                 data += tx->size;
883                 offset = 0;
884
885                 id = get_id_from_freelist(np->tx_skbs);
886                 np->tx_skbs[id] = skb_get(skb);
887                 tx = RING_GET_REQUEST(&np->tx, prod++);
888                 tx->id = id;
889                 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
890                 BUG_ON((signed short)ref < 0);
891
892                 mfn = virt_to_mfn(data);
893                 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
894                                                 mfn, GTF_readonly);
895
896                 tx->gref = np->grant_tx_ref[id] = ref;
897                 tx->offset = offset;
898                 tx->size = len;
899                 tx->flags = 0;
900         }
901
902         for (i = 0; i < frags; i++) {
903                 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
904
905                 tx->flags |= XEN_NETTXF_more_data;
906
907                 id = get_id_from_freelist(np->tx_skbs);
908                 np->tx_skbs[id] = skb_get(skb);
909                 tx = RING_GET_REQUEST(&np->tx, prod++);
910                 tx->id = id;
911                 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
912                 BUG_ON((signed short)ref < 0);
913
914                 mfn = pfn_to_mfn(page_to_pfn(skb_frag_page(frag)));
915                 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
916                                                 mfn, GTF_readonly);
917
918                 tx->gref = np->grant_tx_ref[id] = ref;
919                 tx->offset = frag->page_offset;
920                 tx->size = skb_frag_size(frag);
921                 tx->flags = 0;
922         }
923
924         np->tx.req_prod_pvt = prod;
925 }
926
927 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
928 {
929         unsigned short id;
930         struct netfront_info *np = netdev_priv(dev);
931         struct netfront_stats *stats = this_cpu_ptr(np->stats);
932         struct netif_tx_request *tx;
933         struct netif_extra_info *extra;
934         char *data = skb->data;
935         RING_IDX i;
936         grant_ref_t ref;
937         unsigned long mfn, flags;
938         int notify;
939         int frags = skb_shinfo(skb)->nr_frags;
940         unsigned int offset = offset_in_page(data);
941         unsigned int len = skb_headlen(skb);
942
943         /* Check the fast path, if hooks are available */
944         if (np->accel_vif_state.hooks && 
945             np->accel_vif_state.hooks->start_xmit(skb, dev)) { 
946                 /* Fast path has sent this packet */ 
947                 return NETDEV_TX_OK;
948         } 
949
950         frags += DIV_ROUND_UP(offset + len, PAGE_SIZE);
951         if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
952                 pr_alert("xennet: skb rides the rocket: %d frags\n", frags);
953                 dump_stack();
954                 goto drop;
955         }
956
957         spin_lock_irqsave(&np->tx_lock, flags);
958
959         if (unlikely(!netfront_carrier_ok(np) ||
960                      (frags > 1 && !xennet_can_sg(dev)) ||
961                      netif_needs_gso(skb, netif_skb_features(skb)))) {
962                 spin_unlock_irqrestore(&np->tx_lock, flags);
963                 goto drop;
964         }
965
966         i = np->tx.req_prod_pvt;
967
968         id = get_id_from_freelist(np->tx_skbs);
969         np->tx_skbs[id] = skb;
970
971         tx = RING_GET_REQUEST(&np->tx, i);
972
973         tx->id   = id;
974         ref = gnttab_claim_grant_reference(&np->gref_tx_head);
975         BUG_ON((signed short)ref < 0);
976         mfn = virt_to_mfn(data);
977         gnttab_grant_foreign_access_ref(
978                 ref, np->xbdev->otherend_id, mfn, GTF_readonly);
979         tx->gref = np->grant_tx_ref[id] = ref;
980         tx->offset = offset;
981         tx->size = len;
982
983         tx->flags = 0;
984         extra = NULL;
985
986         if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
987                 tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
988         else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
989                 tx->flags |= XEN_NETTXF_data_validated;
990
991 #if HAVE_TSO
992         if (skb_shinfo(skb)->gso_size) {
993                 struct netif_extra_info *gso = (struct netif_extra_info *)
994                         RING_GET_REQUEST(&np->tx, ++i);
995
996                 if (extra)
997                         extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
998                 else
999                         tx->flags |= XEN_NETTXF_extra_info;
1000
1001                 gso->u.gso.size = skb_shinfo(skb)->gso_size;
1002                 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
1003                 gso->u.gso.pad = 0;
1004                 gso->u.gso.features = 0;
1005
1006                 gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
1007                 gso->flags = 0;
1008                 extra = gso;
1009         }
1010 #endif
1011
1012         np->tx.req_prod_pvt = i + 1;
1013
1014         xennet_make_frags(skb, dev, tx);
1015         tx->size = skb->len;
1016
1017         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
1018         if (notify)
1019                 notify_remote_via_irq(np->irq);
1020
1021         u64_stats_update_begin(&stats->syncp);
1022         stats->tx_bytes += skb->len;
1023         stats->tx_packets++;
1024         u64_stats_update_end(&stats->syncp);
1025         dev->trans_start = jiffies;
1026
1027         /* Note: It is not safe to access skb after network_tx_buf_gc()! */
1028         network_tx_buf_gc(dev);
1029
1030         if (!netfront_tx_slot_available(np))
1031                 netif_stop_queue(dev);
1032
1033         spin_unlock_irqrestore(&np->tx_lock, flags);
1034
1035         return NETDEV_TX_OK;
1036
1037  drop:
1038         dev->stats.tx_dropped++;
1039         dev_kfree_skb(skb);
1040         return NETDEV_TX_OK;
1041 }
1042
1043 static irqreturn_t netif_int(int irq, void *dev_id)
1044 {
1045         struct net_device *dev = dev_id;
1046         struct netfront_info *np = netdev_priv(dev);
1047         unsigned long flags;
1048
1049         spin_lock_irqsave(&np->tx_lock, flags);
1050
1051         if (likely(netfront_carrier_ok(np))) {
1052                 network_tx_buf_gc(dev);
1053                 /* Under tx_lock: protects access to rx shared-ring indexes. */
1054                 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) {
1055                         netfront_accelerator_call_stop_napi_irq(np, dev);
1056
1057                         napi_schedule(&np->napi);
1058                 }
1059         }
1060
1061         spin_unlock_irqrestore(&np->tx_lock, flags);
1062
1063         return IRQ_HANDLED;
1064 }
1065
1066 static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
1067                                 grant_ref_t ref)
1068 {
1069         int new = xennet_rxidx(np->rx.req_prod_pvt);
1070
1071         BUG_ON(np->rx_skbs[new]);
1072         np->rx_skbs[new] = skb;
1073         np->grant_rx_ref[new] = ref;
1074         RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
1075         RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
1076         np->rx.req_prod_pvt++;
1077 }
1078
1079 int xennet_get_extras(struct netfront_info *np,
1080                       struct netif_extra_info *extras, RING_IDX rp)
1081
1082 {
1083         struct netif_extra_info *extra;
1084         RING_IDX cons = np->rx.rsp_cons;
1085         int err = 0;
1086
1087         do {
1088                 struct sk_buff *skb;
1089                 grant_ref_t ref;
1090
1091                 if (unlikely(cons + 1 == rp)) {
1092                         if (net_ratelimit())
1093                                 WPRINTK("Missing extra info\n");
1094                         err = -EBADR;
1095                         break;
1096                 }
1097
1098                 extra = (struct netif_extra_info *)
1099                         RING_GET_RESPONSE(&np->rx, ++cons);
1100
1101                 if (unlikely(!extra->type ||
1102                              extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1103                         if (net_ratelimit())
1104                                 WPRINTK("Invalid extra type: %d\n",
1105                                         extra->type);
1106                         err = -EINVAL;
1107                 } else {
1108                         memcpy(&extras[extra->type - 1], extra,
1109                                sizeof(*extra));
1110                 }
1111
1112                 skb = xennet_get_rx_skb(np, cons);
1113                 ref = xennet_get_rx_ref(np, cons);
1114                 xennet_move_rx_slot(np, skb, ref);
1115         } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
1116
1117         np->rx.rsp_cons = cons;
1118         return err;
1119 }
1120
1121 static int xennet_get_responses(struct netfront_info *np,
1122                                 struct netfront_rx_info *rinfo, RING_IDX rp,
1123                                 struct sk_buff_head *list,
1124                                 int *pages_flipped_p)
1125 {
1126         int pages_flipped = *pages_flipped_p;
1127         struct mmu_update *mmu;
1128         struct multicall_entry *mcl;
1129         struct netif_rx_response *rx = &rinfo->rx;
1130         struct netif_extra_info *extras = rinfo->extras;
1131         RING_IDX cons = np->rx.rsp_cons;
1132         struct sk_buff *skb = xennet_get_rx_skb(np, cons);
1133         grant_ref_t ref = xennet_get_rx_ref(np, cons);
1134         int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
1135         int frags = 1;
1136         int err = 0;
1137         unsigned long ret;
1138
1139         if (rx->flags & XEN_NETRXF_extra_info) {
1140                 err = xennet_get_extras(np, extras, rp);
1141                 cons = np->rx.rsp_cons;
1142         }
1143
1144         for (;;) {
1145                 unsigned long mfn;
1146
1147                 if (unlikely(rx->status < 0 ||
1148                              rx->offset + rx->status > PAGE_SIZE)) {
1149                         if (net_ratelimit())
1150                                 WPRINTK("rx->offset: %x, size: %u\n",
1151                                         rx->offset, rx->status);
1152                         xennet_move_rx_slot(np, skb, ref);
1153                         err = -EINVAL;
1154                         goto next;
1155                 }
1156
1157                 /*
1158                  * This definitely indicates a bug, either in this driver or in
1159                  * the backend driver. In future this should flag the bad
1160                  * situation to the system controller to reboot the backed.
1161                  */
1162                 if (ref == GRANT_INVALID_REF) {
1163                         if (net_ratelimit())
1164                                 WPRINTK("Bad rx response id %d.\n", rx->id);
1165                         err = -EINVAL;
1166                         goto next;
1167                 }
1168
1169                 if (!np->copying_receiver) {
1170                         /* Memory pressure, insufficient buffer
1171                          * headroom, ... */
1172                         if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
1173                                 if (net_ratelimit())
1174                                         WPRINTK("Unfulfilled rx req "
1175                                                 "(id=%d, st=%d).\n",
1176                                                 rx->id, rx->status);
1177                                 xennet_move_rx_slot(np, skb, ref);
1178                                 err = -ENOMEM;
1179                                 goto next;
1180                         }
1181
1182                         if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1183                                 /* Remap the page. */
1184                                 const struct page *page =
1185                                         skb_frag_page(skb_shinfo(skb)->frags);
1186                                 unsigned long pfn = page_to_pfn(page);
1187                                 void *vaddr = page_address(page);
1188
1189                                 mcl = np->rx_mcl + pages_flipped;
1190                                 mmu = np->rx_mmu + pages_flipped;
1191
1192                                 MULTI_update_va_mapping(mcl,
1193                                                         (unsigned long)vaddr,
1194                                                         pfn_pte_ma(mfn,
1195                                                                    PAGE_KERNEL),
1196                                                         0);
1197                                 mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
1198                                         | MMU_MACHPHYS_UPDATE;
1199                                 mmu->val = pfn;
1200
1201                                 set_phys_to_machine(pfn, mfn);
1202                         }
1203                         pages_flipped++;
1204                 } else {
1205                         ret = gnttab_end_foreign_access_ref(ref);
1206                         BUG_ON(!ret);
1207                 }
1208
1209                 gnttab_release_grant_reference(&np->gref_rx_head, ref);
1210
1211                 __skb_queue_tail(list, skb);
1212
1213 next:
1214                 if (!(rx->flags & XEN_NETRXF_more_data))
1215                         break;
1216
1217                 if (cons + frags == rp) {
1218                         if (net_ratelimit())
1219                                 WPRINTK("Need more frags\n");
1220                         err = -ENOENT;
1221                         break;
1222                 }
1223
1224                 rx = RING_GET_RESPONSE(&np->rx, cons + frags);
1225                 skb = xennet_get_rx_skb(np, cons + frags);
1226                 ref = xennet_get_rx_ref(np, cons + frags);
1227                 frags++;
1228         }
1229
1230         if (unlikely(frags > max)) {
1231                 if (net_ratelimit())
1232                         WPRINTK("Too many frags\n");
1233                 err = -E2BIG;
1234         }
1235
1236         if (unlikely(err))
1237                 np->rx.rsp_cons = cons + frags;
1238
1239         *pages_flipped_p = pages_flipped;
1240
1241         return err;
1242 }
1243
1244 static RING_IDX xennet_fill_frags(struct netfront_info *np,
1245                                   struct sk_buff *skb,
1246                                   struct sk_buff_head *list)
1247 {
1248         struct skb_shared_info *shinfo = skb_shinfo(skb);
1249         int nr_frags = shinfo->nr_frags;
1250         RING_IDX cons = np->rx.rsp_cons;
1251         struct sk_buff *nskb;
1252
1253         while ((nskb = __skb_dequeue(list))) {
1254                 struct netif_rx_response *rx =
1255                         RING_GET_RESPONSE(&np->rx, ++cons);
1256
1257                 __skb_fill_page_desc(skb, nr_frags,
1258                                      skb_frag_page(skb_shinfo(nskb)->frags),
1259                                      rx->offset, rx->status);
1260
1261                 skb->data_len += rx->status;
1262
1263                 skb_shinfo(nskb)->nr_frags = 0;
1264                 kfree_skb(nskb);
1265
1266                 nr_frags++;
1267         }
1268
1269         shinfo->nr_frags = nr_frags;
1270         return cons;
1271 }
1272
1273 static int xennet_set_skb_gso(struct sk_buff *skb,
1274                               struct netif_extra_info *gso)
1275 {
1276         if (!gso->u.gso.size) {
1277                 if (net_ratelimit())
1278                         WPRINTK("GSO size must not be zero.\n");
1279                 return -EINVAL;
1280         }
1281
1282         /* Currently only TCPv4 S.O. is supported. */
1283         if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
1284                 if (net_ratelimit())
1285                         WPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
1286                 return -EINVAL;
1287         }
1288
1289 #if HAVE_TSO
1290         skb_shinfo(skb)->gso_size = gso->u.gso.size;
1291 #if HAVE_GSO
1292         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1293
1294         /* Header must be checked, and gso_segs computed. */
1295         skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1296 #endif
1297         skb_shinfo(skb)->gso_segs = 0;
1298
1299         return 0;
1300 #else
1301         if (net_ratelimit())
1302                 WPRINTK("GSO unsupported by this kernel.\n");
1303         return -EINVAL;
1304 #endif
1305 }
1306
1307 static int netif_poll(struct napi_struct *napi, int budget)
1308 {
1309         struct netfront_info *np = container_of(napi, struct netfront_info, napi);
1310         struct netfront_stats *stats = this_cpu_ptr(np->stats);
1311         struct net_device *dev = np->netdev;
1312         struct sk_buff *skb;
1313         struct netfront_rx_info rinfo;
1314         struct netif_rx_response *rx = &rinfo.rx;
1315         struct netif_extra_info *extras = rinfo.extras;
1316         RING_IDX i, rp;
1317         struct multicall_entry *mcl;
1318         int work_done, more_to_do = 1, accel_more_to_do = 1;
1319         struct sk_buff_head rxq;
1320         struct sk_buff_head errq;
1321         struct sk_buff_head tmpq;
1322         unsigned long flags;
1323         unsigned int len;
1324         int pages_flipped = 0;
1325         int err;
1326
1327         spin_lock(&np->rx_lock); /* no need for spin_lock_bh() in ->poll() */
1328
1329         if (unlikely(!netfront_carrier_ok(np))) {
1330                 spin_unlock(&np->rx_lock);
1331                 return 0;
1332         }
1333
1334         skb_queue_head_init(&rxq);
1335         skb_queue_head_init(&errq);
1336         skb_queue_head_init(&tmpq);
1337
1338         rp = np->rx.sring->rsp_prod;
1339         rmb(); /* Ensure we see queued responses up to 'rp'. */
1340
1341         i = np->rx.rsp_cons;
1342         work_done = 0;
1343         while ((i != rp) && (work_done < budget)) {
1344                 memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
1345                 memset(extras, 0, sizeof(rinfo.extras));
1346
1347                 err = xennet_get_responses(np, &rinfo, rp, &tmpq,
1348                                            &pages_flipped);
1349
1350                 if (unlikely(err)) {
1351 err:    
1352                         while ((skb = __skb_dequeue(&tmpq)))
1353                                 __skb_queue_tail(&errq, skb);
1354                         dev->stats.rx_errors++;
1355                         i = np->rx.rsp_cons;
1356                         continue;
1357                 }
1358
1359                 skb = __skb_dequeue(&tmpq);
1360
1361                 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1362                         struct netif_extra_info *gso;
1363                         gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1364
1365                         if (unlikely(xennet_set_skb_gso(skb, gso))) {
1366                                 __skb_queue_head(&tmpq, skb);
1367                                 np->rx.rsp_cons += skb_queue_len(&tmpq);
1368                                 goto err;
1369                         }
1370                 }
1371
1372                 NETFRONT_SKB_CB(skb)->page =
1373                         skb_frag_page(skb_shinfo(skb)->frags);
1374                 NETFRONT_SKB_CB(skb)->offset = rx->offset;
1375
1376                 len = rx->status;
1377                 if (len > RX_COPY_THRESHOLD)
1378                         len = RX_COPY_THRESHOLD;
1379                 skb_put(skb, len);
1380
1381                 if (rx->status > len) {
1382                         skb_shinfo(skb)->frags[0].page_offset =
1383                                 rx->offset + len;
1384                         skb_frag_size_set(skb_shinfo(skb)->frags,
1385                                           rx->status - len);
1386                         skb->data_len = rx->status - len;
1387                 } else {
1388                         __skb_fill_page_desc(skb, 0, NULL, 0, 0);
1389                         skb_shinfo(skb)->nr_frags = 0;
1390                 }
1391
1392                 i = xennet_fill_frags(np, skb, &tmpq);
1393
1394                 /*
1395                  * Truesize must approximates the size of true data plus
1396                  * any supervisor overheads. Adding hypervisor overheads
1397                  * has been shown to significantly reduce achievable
1398                  * bandwidth with the default receive buffer size. It is
1399                  * therefore not wise to account for it here.
1400                  *
1401                  * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to
1402                  * RX_COPY_THRESHOLD + the supervisor overheads. Here, we
1403                  * add the size of the data pulled in xennet_fill_frags().
1404                  *
1405                  * We also adjust for any unused space in the main data
1406                  * area by subtracting (RX_COPY_THRESHOLD - len). This is
1407                  * especially important with drivers which split incoming
1408                  * packets into header and data, using only 66 bytes of
1409                  * the main data area (see the e1000 driver for example.)
1410                  * On such systems, without this last adjustement, our
1411                  * achievable receive throughout using the standard receive
1412                  * buffer size was cut by 25%(!!!).
1413                  */
1414                 skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
1415                 skb->len += skb->data_len;
1416
1417                 if (rx->flags & XEN_NETRXF_csum_blank)
1418                         skb->ip_summed = CHECKSUM_PARTIAL;
1419                 else if (rx->flags & XEN_NETRXF_data_validated)
1420                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1421                 else
1422                         skb->ip_summed = CHECKSUM_NONE;
1423
1424                 u64_stats_update_begin(&stats->syncp);
1425                 stats->rx_packets++;
1426                 stats->rx_bytes += skb->len;
1427                 u64_stats_update_end(&stats->syncp);
1428
1429                 __skb_queue_tail(&rxq, skb);
1430
1431                 np->rx.rsp_cons = ++i;
1432                 work_done++;
1433         }
1434
1435         if (pages_flipped) {
1436                 /* Some pages are no longer absent... */
1437                 balloon_update_driver_allowance(-pages_flipped);
1438
1439                 /* Do all the remapping work and M2P updates. */
1440                 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1441                         mcl = np->rx_mcl + pages_flipped;
1442                         mcl->op = __HYPERVISOR_mmu_update;
1443                         mcl->args[0] = (unsigned long)np->rx_mmu;
1444                         mcl->args[1] = pages_flipped;
1445                         mcl->args[2] = 0;
1446                         mcl->args[3] = DOMID_SELF;
1447                         err = HYPERVISOR_multicall_check(np->rx_mcl,
1448                                                          pages_flipped + 1,
1449                                                          NULL);
1450                         BUG_ON(err);
1451                 }
1452         }
1453
1454         __skb_queue_purge(&errq);
1455
1456         while ((skb = __skb_dequeue(&rxq)) != NULL) {
1457                 struct page *page = NETFRONT_SKB_CB(skb)->page;
1458                 void *vaddr = page_address(page);
1459                 unsigned offset = NETFRONT_SKB_CB(skb)->offset;
1460
1461                 memcpy(skb->data, vaddr + offset, skb_headlen(skb));
1462
1463                 if (page != skb_frag_page(skb_shinfo(skb)->frags))
1464                         __free_page(page);
1465
1466                 /* Ethernet work: Delayed to here as it peeks the header. */
1467                 skb->protocol = eth_type_trans(skb, dev);
1468
1469                 if (skb_checksum_setup(skb, &np->rx_gso_csum_fixups)) {
1470                         kfree_skb(skb);
1471                         continue;
1472                 }
1473
1474                 /* Pass it up. */
1475                 netif_receive_skb(skb);
1476         }
1477
1478         /* If we get a callback with very few responses, reduce fill target. */
1479         /* NB. Note exponential increase, linear decrease. */
1480         if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
1481              ((3*np->rx_target) / 4)) &&
1482             (--np->rx_target < np->rx_min_target))
1483                 np->rx_target = np->rx_min_target;
1484
1485         network_alloc_rx_buffers(dev);
1486
1487         if (work_done < budget) {
1488                 /* there's some spare capacity, try the accelerated path */
1489                 int accel_budget = budget - work_done;
1490                 int accel_budget_start = accel_budget;
1491
1492                 if (np->accel_vif_state.hooks) { 
1493                         accel_more_to_do =  
1494                                 np->accel_vif_state.hooks->netdev_poll 
1495                                 (dev, &accel_budget); 
1496                         work_done += (accel_budget_start - accel_budget); 
1497                 } else
1498                         accel_more_to_do = 0;
1499         }
1500
1501         if (work_done < budget) {
1502                 local_irq_save(flags);
1503
1504                 RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
1505
1506                 if (!more_to_do && !accel_more_to_do && 
1507                     np->accel_vif_state.hooks) {
1508                         /* 
1509                          *  Slow path has nothing more to do, see if
1510                          *  fast path is likewise
1511                          */
1512                         accel_more_to_do = 
1513                                 np->accel_vif_state.hooks->start_napi_irq(dev);
1514                 }
1515
1516                 if (!more_to_do && !accel_more_to_do)
1517                         __napi_complete(napi);
1518
1519                 local_irq_restore(flags);
1520         }
1521
1522         spin_unlock(&np->rx_lock);
1523         
1524         return work_done;
1525 }
1526
1527 static void netif_release_tx_bufs(struct netfront_info *np)
1528 {
1529         struct sk_buff *skb;
1530         int i;
1531
1532         for (i = 1; i <= NET_TX_RING_SIZE; i++) {
1533                 if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
1534                         continue;
1535
1536                 skb = np->tx_skbs[i];
1537                 gnttab_end_foreign_access_ref(np->grant_tx_ref[i]);
1538                 gnttab_release_grant_reference(
1539                         &np->gref_tx_head, np->grant_tx_ref[i]);
1540                 np->grant_tx_ref[i] = GRANT_INVALID_REF;
1541                 add_id_to_freelist(np->tx_skbs, i);
1542                 dev_kfree_skb_irq(skb);
1543         }
1544 }
1545
1546 static void netif_release_rx_bufs_flip(struct netfront_info *np)
1547 {
1548         struct mmu_update      *mmu = np->rx_mmu;
1549         struct multicall_entry *mcl = np->rx_mcl;
1550         struct sk_buff_head free_list;
1551         struct sk_buff *skb;
1552         unsigned long mfn;
1553         int xfer = 0, noxfer = 0, unused = 0;
1554         int id, ref, rc;
1555
1556         skb_queue_head_init(&free_list);
1557
1558         spin_lock_bh(&np->rx_lock);
1559
1560         for (id = 0; id < NET_RX_RING_SIZE; id++) {
1561                 struct page *page;
1562
1563                 if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) {
1564                         unused++;
1565                         continue;
1566                 }
1567
1568                 skb = np->rx_skbs[id];
1569                 mfn = gnttab_end_foreign_transfer_ref(ref);
1570                 gnttab_release_grant_reference(&np->gref_rx_head, ref);
1571                 np->grant_rx_ref[id] = GRANT_INVALID_REF;
1572                 add_id_to_freelist(np->rx_skbs, id);
1573
1574                 page = skb_frag_page(skb_shinfo(skb)->frags);
1575
1576                 if (0 == mfn) {
1577                         balloon_release_driver_page(page);
1578                         skb_shinfo(skb)->nr_frags = 0;
1579                         dev_kfree_skb(skb);
1580                         noxfer++;
1581                         continue;
1582                 }
1583
1584                 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1585                         /* Remap the page. */
1586                         unsigned long pfn = page_to_pfn(page);
1587                         void *vaddr = page_address(page);
1588
1589                         MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
1590                                                 pfn_pte_ma(mfn, PAGE_KERNEL),
1591                                                 0);
1592                         mcl++;
1593                         mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
1594                                 | MMU_MACHPHYS_UPDATE;
1595                         mmu->val = pfn;
1596                         mmu++;
1597
1598                         set_phys_to_machine(pfn, mfn);
1599                 }
1600                 __skb_queue_tail(&free_list, skb);
1601                 xfer++;
1602         }
1603
1604         DPRINTK("%s: %d xfer, %d noxfer, %d unused\n",
1605                 __FUNCTION__, xfer, noxfer, unused);
1606
1607         if (xfer) {
1608                 /* Some pages are no longer absent... */
1609                 balloon_update_driver_allowance(-xfer);
1610
1611                 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1612                         /* Do all the remapping work and M2P updates. */
1613                         mcl->op = __HYPERVISOR_mmu_update;
1614                         mcl->args[0] = (unsigned long)np->rx_mmu;
1615                         mcl->args[1] = mmu - np->rx_mmu;
1616                         mcl->args[2] = 0;
1617                         mcl->args[3] = DOMID_SELF;
1618                         mcl++;
1619                         rc = HYPERVISOR_multicall_check(
1620                                 np->rx_mcl, mcl - np->rx_mcl, NULL);
1621                         BUG_ON(rc);
1622                 }
1623         }
1624
1625         __skb_queue_purge(&free_list);
1626
1627         spin_unlock_bh(&np->rx_lock);
1628 }
1629
1630 static void netif_release_rx_bufs_copy(struct netfront_info *np)
1631 {
1632         struct sk_buff *skb;
1633         int i, ref;
1634         int busy = 0, inuse = 0;
1635
1636         spin_lock_bh(&np->rx_lock);
1637
1638         for (i = 0; i < NET_RX_RING_SIZE; i++) {
1639                 ref = np->grant_rx_ref[i];
1640
1641                 if (ref == GRANT_INVALID_REF)
1642                         continue;
1643
1644                 inuse++;
1645
1646                 skb = np->rx_skbs[i];
1647
1648                 if (!gnttab_end_foreign_access_ref(ref))
1649                 {
1650                         busy++;
1651                         continue;
1652                 }
1653
1654                 gnttab_release_grant_reference(&np->gref_rx_head, ref);
1655                 np->grant_rx_ref[i] = GRANT_INVALID_REF;
1656                 add_id_to_freelist(np->rx_skbs, i);
1657
1658                 dev_kfree_skb(skb);
1659         }
1660
1661         if (busy)
1662                 DPRINTK("%s: Unable to release %d of %d inuse grant references out of %ld total.\n",
1663                         __FUNCTION__, busy, inuse, NET_RX_RING_SIZE);
1664
1665         spin_unlock_bh(&np->rx_lock);
1666 }
1667
1668 static int network_close(struct net_device *dev)
1669 {
1670         struct netfront_info *np = netdev_priv(dev);
1671         netif_stop_queue(np->netdev);
1672         napi_disable(&np->napi);
1673         return 0;
1674 }
1675
1676
1677 static int xennet_set_mac_address(struct net_device *dev, void *p)
1678 {
1679         struct netfront_info *np = netdev_priv(dev);
1680         struct sockaddr *addr = p;
1681
1682         if (netif_running(dev))
1683                 return -EBUSY;
1684
1685         if (!is_valid_ether_addr(addr->sa_data))
1686                 return -EADDRNOTAVAIL;
1687
1688         memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1689         memcpy(np->mac, addr->sa_data, ETH_ALEN);
1690
1691         return 0;
1692 }
1693
1694 static int xennet_change_mtu(struct net_device *dev, int mtu)
1695 {
1696         int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
1697
1698         if (mtu > max)
1699                 return -EINVAL;
1700         dev->mtu = mtu;
1701         return 0;
1702 }
1703
1704 static struct rtnl_link_stats64 *xennet_get_stats64(struct net_device *dev,
1705                                                     struct rtnl_link_stats64 *tot)
1706 {
1707         struct netfront_info *np = netdev_priv(dev);
1708         int cpu;
1709
1710         netfront_accelerator_call_get_stats(np, dev);
1711
1712         for_each_possible_cpu(cpu) {
1713                 struct netfront_stats *stats = per_cpu_ptr(np->stats, cpu);
1714                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
1715                 unsigned int start;
1716
1717                 do {
1718                         start = u64_stats_fetch_begin_bh(&stats->syncp);
1719
1720                         rx_packets = stats->rx_packets;
1721                         tx_packets = stats->tx_packets;
1722                         rx_bytes = stats->rx_bytes;
1723                         tx_bytes = stats->tx_bytes;
1724                 } while (u64_stats_fetch_retry_bh(&stats->syncp, start));
1725
1726                 tot->rx_packets += rx_packets;
1727                 tot->tx_packets += tx_packets;
1728                 tot->rx_bytes   += rx_bytes;
1729                 tot->tx_bytes   += tx_bytes;
1730         }
1731
1732         tot->rx_errors  = dev->stats.rx_errors;
1733         tot->tx_dropped = dev->stats.tx_dropped;
1734
1735         return tot;
1736 }
1737
1738 static const struct xennet_stat {
1739         char name[ETH_GSTRING_LEN];
1740         u16 offset;
1741 } xennet_stats[] = {
1742         {
1743                 "rx_gso_csum_fixups",
1744                 offsetof(struct netfront_info, rx_gso_csum_fixups) / sizeof(long)
1745         },
1746 };
1747
1748 static int xennet_get_sset_count(struct net_device *dev, int sset)
1749 {
1750         switch (sset) {
1751         case ETH_SS_STATS:
1752                 return ARRAY_SIZE(xennet_stats);
1753         }
1754         return -EOPNOTSUPP;
1755 }
1756
1757 static void xennet_get_ethtool_stats(struct net_device *dev,
1758                                      struct ethtool_stats *stats, u64 *data)
1759 {
1760         unsigned long *np = netdev_priv(dev);
1761         unsigned int i;
1762
1763         for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
1764                 data[i] = np[xennet_stats[i].offset];
1765 }
1766
1767 static void xennet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
1768 {
1769         unsigned int i;
1770
1771         switch (stringset) {
1772         case ETH_SS_STATS:
1773                 for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
1774                         memcpy(data + i * ETH_GSTRING_LEN,
1775                                xennet_stats[i].name, ETH_GSTRING_LEN);
1776                 break;
1777         }
1778 }
1779
1780 static void netfront_get_drvinfo(struct net_device *dev,
1781                                  struct ethtool_drvinfo *info)
1782 {
1783         strcpy(info->driver, "netfront");
1784         strlcpy(info->bus_info, dev_name(dev->dev.parent),
1785                 ARRAY_SIZE(info->bus_info));
1786 }
1787
1788 static int network_connect(struct net_device *dev)
1789 {
1790         struct netfront_info *np = netdev_priv(dev);
1791         int i, requeue_idx, err;
1792         struct sk_buff *skb;
1793         grant_ref_t ref;
1794         netif_rx_request_t *req;
1795         unsigned int feature_rx_copy, feature_rx_flip;
1796
1797         err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1798                            "feature-rx-copy", "%u", &feature_rx_copy);
1799         if (err != 1)
1800                 feature_rx_copy = 0;
1801         err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1802                            "feature-rx-flip", "%u", &feature_rx_flip);
1803         if (err != 1)
1804                 feature_rx_flip = 1;
1805
1806         /*
1807          * Copy packets on receive path if:
1808          *  (a) This was requested by user, and the backend supports it; or
1809          *  (b) Flipping was requested, but this is unsupported by the backend.
1810          */
1811         np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
1812                                 (MODPARM_rx_flip && !feature_rx_flip));
1813
1814         err = talk_to_backend(np->xbdev, np);
1815         if (err)
1816                 return err;
1817
1818         rtnl_lock();
1819         netdev_update_features(dev);
1820         rtnl_unlock();
1821
1822         DPRINTK("device %s has %sing receive path.\n",
1823                 dev->name, np->copying_receiver ? "copy" : "flipp");
1824
1825         spin_lock_bh(&np->rx_lock);
1826         spin_lock_irq(&np->tx_lock);
1827
1828         /*
1829          * Recovery procedure:
1830          *  NB. Freelist index entries are always going to be less than
1831          *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
1832          *  greater than PAGE_OFFSET: we use this property to distinguish
1833          *  them.
1834          */
1835
1836         /* Step 1: Discard all pending TX packet fragments. */
1837         netif_release_tx_bufs(np);
1838
1839         /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
1840         for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
1841                 unsigned long pfn;
1842
1843                 if (!np->rx_skbs[i])
1844                         continue;
1845
1846                 skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
1847                 ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
1848                 req = RING_GET_REQUEST(&np->rx, requeue_idx);
1849                 pfn = page_to_pfn(skb_frag_page(skb_shinfo(skb)->frags));
1850
1851                 if (!np->copying_receiver) {
1852                         gnttab_grant_foreign_transfer_ref(
1853                                 ref, np->xbdev->otherend_id, pfn);
1854                 } else {
1855                         gnttab_grant_foreign_access_ref(
1856                                 ref, np->xbdev->otherend_id,
1857                                 pfn_to_mfn(pfn), 0);
1858                 }
1859                 req->gref = ref;
1860                 req->id   = requeue_idx;
1861
1862                 requeue_idx++;
1863         }
1864
1865         np->rx.req_prod_pvt = requeue_idx;
1866
1867         /*
1868          * Step 3: All public and private state should now be sane.  Get
1869          * ready to start sending and receiving packets and give the driver
1870          * domain a kick because we've probably just requeued some
1871          * packets.
1872          */
1873         netfront_carrier_on(np);
1874         notify_remote_via_irq(np->irq);
1875         network_tx_buf_gc(dev);
1876         network_alloc_rx_buffers(dev);
1877
1878         spin_unlock_irq(&np->tx_lock);
1879         spin_unlock_bh(&np->rx_lock);
1880
1881         return 0;
1882 }
1883
1884 static void netif_uninit(struct net_device *dev)
1885 {
1886         struct netfront_info *np = netdev_priv(dev);
1887         netif_release_tx_bufs(np);
1888         if (np->copying_receiver)
1889                 netif_release_rx_bufs_copy(np);
1890         else
1891                 netif_release_rx_bufs_flip(np);
1892         gnttab_free_grant_references(np->gref_tx_head);
1893         gnttab_free_grant_references(np->gref_rx_head);
1894 }
1895
1896 static const struct ethtool_ops network_ethtool_ops =
1897 {
1898         .get_drvinfo = netfront_get_drvinfo,
1899         .get_link = ethtool_op_get_link,
1900
1901         .get_sset_count = xennet_get_sset_count,
1902         .get_ethtool_stats = xennet_get_ethtool_stats,
1903         .get_strings = xennet_get_strings,
1904 };
1905
1906 #ifdef CONFIG_SYSFS
1907 static ssize_t show_rxbuf_min(struct device *dev,
1908                               struct device_attribute *attr, char *buf)
1909 {
1910         struct netfront_info *info = netdev_priv(to_net_dev(dev));
1911
1912         return sprintf(buf, "%u\n", info->rx_min_target);
1913 }
1914
1915 static ssize_t store_rxbuf_min(struct device *dev,
1916                                struct device_attribute *attr,
1917                                const char *buf, size_t len)
1918 {
1919         struct net_device *netdev = to_net_dev(dev);
1920         struct netfront_info *np = netdev_priv(netdev);
1921         char *endp;
1922         unsigned long target;
1923
1924         if (!capable(CAP_NET_ADMIN))
1925                 return -EPERM;
1926
1927         target = simple_strtoul(buf, &endp, 0);
1928         if (endp == buf)
1929                 return -EBADMSG;
1930
1931         if (target < RX_MIN_TARGET)
1932                 target = RX_MIN_TARGET;
1933         if (target > RX_MAX_TARGET)
1934                 target = RX_MAX_TARGET;
1935
1936         spin_lock_bh(&np->rx_lock);
1937         if (target > np->rx_max_target)
1938                 np->rx_max_target = target;
1939         np->rx_min_target = target;
1940         if (target > np->rx_target)
1941                 np->rx_target = target;
1942
1943         network_alloc_rx_buffers(netdev);
1944
1945         spin_unlock_bh(&np->rx_lock);
1946         return len;
1947 }
1948
1949 static ssize_t show_rxbuf_max(struct device *dev,
1950                               struct device_attribute *attr, char *buf)
1951 {
1952         struct netfront_info *info = netdev_priv(to_net_dev(dev));
1953
1954         return sprintf(buf, "%u\n", info->rx_max_target);
1955 }
1956
1957 static ssize_t store_rxbuf_max(struct device *dev,
1958                                struct device_attribute *attr,
1959                                const char *buf, size_t len)
1960 {
1961         struct net_device *netdev = to_net_dev(dev);
1962         struct netfront_info *np = netdev_priv(netdev);
1963         char *endp;
1964         unsigned long target;
1965
1966         if (!capable(CAP_NET_ADMIN))
1967                 return -EPERM;
1968
1969         target = simple_strtoul(buf, &endp, 0);
1970         if (endp == buf)
1971                 return -EBADMSG;
1972
1973         if (target < RX_MIN_TARGET)
1974                 target = RX_MIN_TARGET;
1975         if (target > RX_MAX_TARGET)
1976                 target = RX_MAX_TARGET;
1977
1978         spin_lock_bh(&np->rx_lock);
1979         if (target < np->rx_min_target)
1980                 np->rx_min_target = target;
1981         np->rx_max_target = target;
1982         if (target < np->rx_target)
1983                 np->rx_target = target;
1984
1985         network_alloc_rx_buffers(netdev);
1986
1987         spin_unlock_bh(&np->rx_lock);
1988         return len;
1989 }
1990
1991 static ssize_t show_rxbuf_cur(struct device *dev,
1992                               struct device_attribute *attr, char *buf)
1993 {
1994         struct netfront_info *info = netdev_priv(to_net_dev(dev));
1995
1996         return sprintf(buf, "%u\n", info->rx_target);
1997 }
1998
1999 static struct device_attribute xennet_attrs[] = {
2000         __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
2001         __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
2002         __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
2003 };
2004
2005 static int xennet_sysfs_addif(struct net_device *netdev)
2006 {
2007         int i;
2008         int error = 0;
2009
2010         for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
2011                 error = device_create_file(&netdev->dev,
2012                                            &xennet_attrs[i]);
2013                 if (error)
2014                         goto fail;
2015         }
2016         return 0;
2017
2018  fail:
2019         while (--i >= 0)
2020                 device_remove_file(&netdev->dev, &xennet_attrs[i]);
2021         return error;
2022 }
2023
2024 static void xennet_sysfs_delif(struct net_device *netdev)
2025 {
2026         int i;
2027
2028         for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++)
2029                 device_remove_file(&netdev->dev, &xennet_attrs[i]);
2030 }
2031
2032 #endif /* CONFIG_SYSFS */
2033
2034
2035 /*
2036  * Nothing to do here. Virtual interface is point-to-point and the
2037  * physical interface is probably promiscuous anyway.
2038  */
2039 static void network_set_multicast_list(struct net_device *dev)
2040 {
2041 }
2042
2043 static netdev_features_t xennet_fix_features(struct net_device *dev,
2044                                              netdev_features_t features)
2045 {
2046         struct netfront_info *np = netdev_priv(dev);
2047         int val;
2048
2049         if (features & NETIF_F_SG) {
2050                 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
2051                                  "%d", &val) < 0)
2052                         val = 0;
2053
2054                 if (!val)
2055                         features &= ~NETIF_F_SG;
2056         }
2057
2058         if (features & NETIF_F_TSO) {
2059                 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
2060                                  "feature-gso-tcpv4", "%d", &val) < 0)
2061                         val = 0;
2062
2063                 if (!val)
2064                         features &= ~NETIF_F_TSO;
2065         }
2066
2067         return features;
2068 }
2069
2070 static int xennet_set_features(struct net_device *dev,
2071                                netdev_features_t features)
2072 {
2073         if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
2074                 netdev_info(dev, "Reducing MTU because no SG offload");
2075                 dev->mtu = ETH_DATA_LEN;
2076         }
2077
2078         return 0;
2079 }
2080
2081 #ifdef CONFIG_NET_POLL_CONTROLLER
2082 static void xennet_poll_controller(struct net_device *dev)
2083 {
2084         netif_int(0, dev);
2085 }
2086 #endif
2087
2088 static const struct net_device_ops xennet_netdev_ops = {
2089         .ndo_uninit             = netif_uninit,
2090         .ndo_open               = network_open,
2091         .ndo_stop               = network_close,
2092         .ndo_start_xmit         = network_start_xmit,
2093         .ndo_set_rx_mode        = network_set_multicast_list,
2094         .ndo_set_mac_address    = xennet_set_mac_address,
2095         .ndo_validate_addr      = eth_validate_addr,
2096         .ndo_fix_features       = xennet_fix_features,
2097         .ndo_set_features       = xennet_set_features,
2098 #ifdef CONFIG_NET_POLL_CONTROLLER
2099         .ndo_poll_controller = xennet_poll_controller,
2100 #endif
2101         .ndo_change_mtu         = xennet_change_mtu,
2102         .ndo_get_stats64        = xennet_get_stats64,
2103 };
2104
2105 static struct net_device * __devinit create_netdev(struct xenbus_device *dev)
2106 {
2107         int i, err = 0;
2108         struct net_device *netdev = NULL;
2109         struct netfront_info *np = NULL;
2110
2111         netdev = alloc_etherdev(sizeof(struct netfront_info));
2112         if (!netdev)
2113                 return ERR_PTR(-ENOMEM);
2114
2115         np                   = netdev_priv(netdev);
2116         np->xbdev            = dev;
2117
2118         spin_lock_init(&np->tx_lock);
2119         spin_lock_init(&np->rx_lock);
2120
2121         init_accelerator_vif(np, dev);
2122
2123         skb_queue_head_init(&np->rx_batch);
2124         np->rx_target     = RX_DFL_MIN_TARGET;
2125         np->rx_min_target = RX_DFL_MIN_TARGET;
2126         np->rx_max_target = RX_MAX_TARGET;
2127
2128         init_timer(&np->rx_refill_timer);
2129         np->rx_refill_timer.data = (unsigned long)netdev;
2130         np->rx_refill_timer.function = rx_refill_timeout;
2131
2132         err = -ENOMEM;
2133         np->stats = alloc_percpu(struct netfront_stats);
2134         if (np->stats == NULL)
2135                 goto exit;
2136
2137         /* Initialise {tx,rx}_skbs as a free chain containing every entry. */
2138         for (i = 0; i <= NET_TX_RING_SIZE; i++) {
2139                 np->tx_skbs[i] = (void *)((unsigned long) i+1);
2140                 np->grant_tx_ref[i] = GRANT_INVALID_REF;
2141         }
2142
2143         for (i = 0; i < NET_RX_RING_SIZE; i++) {
2144                 np->rx_skbs[i] = NULL;
2145                 np->grant_rx_ref[i] = GRANT_INVALID_REF;
2146         }
2147
2148         /* A grant for every tx ring slot */
2149         if (gnttab_alloc_grant_references(TX_MAX_TARGET,
2150                                           &np->gref_tx_head) < 0) {
2151                 pr_alert("#### netfront can't alloc tx grant refs\n");
2152                 err = -ENOMEM;
2153                 goto exit_free_stats;
2154         }
2155         /* A grant for every rx ring slot */
2156         if (gnttab_alloc_grant_references(RX_MAX_TARGET,
2157                                           &np->gref_rx_head) < 0) {
2158                 pr_alert("#### netfront can't alloc rx grant refs\n");
2159                 err = -ENOMEM;
2160                 goto exit_free_tx;
2161         }
2162
2163         netdev->netdev_ops      = &xennet_netdev_ops;
2164         netif_napi_add(netdev, &np->napi, netif_poll, 64);
2165         netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
2166                                   NETIF_F_GSO_ROBUST;
2167         netdev->hw_features     = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO;
2168
2169         /*
2170          * Assume that all hw features are available for now. This set
2171          * will be adjusted by the call to netdev_update_features() in
2172          * xennet_connect() which is the earliest point where we can
2173          * negotiate with the backend regarding supported features.
2174          */
2175         netdev->features |= netdev->hw_features;
2176
2177         SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
2178         SET_NETDEV_DEV(netdev, &dev->dev);
2179
2180         np->netdev = netdev;
2181
2182         netfront_carrier_off(np);
2183
2184         return netdev;
2185
2186  exit_free_tx:
2187         gnttab_free_grant_references(np->gref_tx_head);
2188  exit_free_stats:
2189         free_percpu(np->stats);
2190  exit:
2191         free_netdev(netdev);
2192         return ERR_PTR(err);
2193 }
2194
2195 static void netif_release_rings(struct netfront_info *info)
2196 {
2197         end_access(info->tx_ring_ref, info->tx.sring);
2198         end_access(info->rx_ring_ref, info->rx.sring);
2199         info->tx_ring_ref = GRANT_INVALID_REF;
2200         info->rx_ring_ref = GRANT_INVALID_REF;
2201         info->tx.sring = NULL;
2202         info->rx.sring = NULL;
2203 }
2204
2205 static void netif_disconnect_backend(struct netfront_info *info)
2206 {
2207         /* Stop old i/f to prevent errors whilst we rebuild the state. */
2208         spin_lock_bh(&info->rx_lock);
2209         spin_lock_irq(&info->tx_lock);
2210         netfront_carrier_off(info);
2211         spin_unlock_irq(&info->tx_lock);
2212         spin_unlock_bh(&info->rx_lock);
2213
2214         if (info->irq)
2215                 unbind_from_irqhandler(info->irq, info->netdev);
2216         info->irq = 0;
2217
2218         netif_release_rings(info);
2219 }
2220
2221
2222 static void end_access(int ref, void *page)
2223 {
2224         if (ref != GRANT_INVALID_REF)
2225                 gnttab_end_foreign_access(ref, (unsigned long)page);
2226 }
2227
2228
2229 /* ** Driver registration ** */
2230
2231
2232 static const struct xenbus_device_id netfront_ids[] = {
2233         { "vif" },
2234         { "" }
2235 };
2236 MODULE_ALIAS("xen:vif");
2237
2238 static DEFINE_XENBUS_DRIVER(netfront, ,
2239         .probe = netfront_probe,
2240         .remove = __devexit_p(netfront_remove),
2241         .suspend = netfront_suspend,
2242         .suspend_cancel = netfront_suspend_cancel,
2243         .resume = netfront_resume,
2244         .otherend_changed = backend_changed,
2245 );
2246
2247
2248 static int __init netif_init(void)
2249 {
2250         if (!is_running_on_xen())
2251                 return -ENODEV;
2252
2253 #ifdef CONFIG_XEN
2254         if (MODPARM_rx_flip && MODPARM_rx_copy) {
2255                 WPRINTK("Cannot specify both rx_copy and rx_flip.\n");
2256                 return -EINVAL;
2257         }
2258
2259         if (!MODPARM_rx_flip && !MODPARM_rx_copy)
2260                 MODPARM_rx_copy = true; /* Default is to copy. */
2261 #endif
2262
2263         netif_init_accel();
2264
2265         IPRINTK("Initialising virtual ethernet driver.\n");
2266
2267         return xenbus_register_frontend(&netfront_driver);
2268 }
2269 module_init(netif_init);
2270
2271
2272 static void __exit netif_exit(void)
2273 {
2274         xenbus_unregister_driver(&netfront_driver);
2275
2276         netif_exit_accel();
2277 }
2278 module_exit(netif_exit);
2279
2280 MODULE_LICENSE("Dual BSD/GPL");