1 /****************************************************************************
2 * Solarflare driver for Xen network acceleration
4 * Copyright 2006-2008: Solarflare Communications Inc,
5 * 9501 Jeronimo Road, Suite 250,
6 * Irvine, CA 92618, USA
8 * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License version 2 as published
12 * by the Free Software Foundation, incorporated herein by reference.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 ****************************************************************************
25 #include <xen/evtchn.h>
26 #include <linux/mutex.h>
27 #include <linux/delay.h>
29 /* drivers/xen/netback/common.h */
33 #include "accel_solarflare.h"
34 #include "accel_util.h"
36 #define NODENAME_PATH_FMT "backend/vif/%d/%d"
38 #define NETBACK_ACCEL_FROM_XENBUS_DEVICE(_dev) (struct netback_accel *) \
39 ((struct backend_info *)dev_get_drvdata(&(_dev)->dev))->netback_accel_priv
41 /* List of all the bends currently in existence. */
42 struct netback_accel *bend_list = NULL;
43 DEFINE_MUTEX(bend_list_mutex);
45 /* Put in bend_list. Must hold bend_list_mutex */
46 static void link_bend(struct netback_accel *bend)
48 bend->next_bend = bend_list;
52 /* Remove from bend_list, Must hold bend_list_mutex */
53 static void unlink_bend(struct netback_accel *bend)
55 struct netback_accel *tmp = bend_list;
56 struct netback_accel *prev = NULL;
60 prev->next_bend = bend->next_bend;
62 bend_list = bend->next_bend;
71 /* Demultiplex a message IRQ from the frontend driver. */
72 static irqreturn_t msgirq_from_frontend(int irq, void *context)
74 struct xenbus_device *dev = context;
75 struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
76 VPRINTK("irq %d from device %s\n", irq, dev->nodename);
77 schedule_work(&bend->handle_msg);
83 * Demultiplex an IRQ from the frontend driver. This is never used
84 * functionally, but we need it to pass to the bind function, and may
85 * get called spuriously
87 static irqreturn_t netirq_from_frontend(int irq, void *context)
89 VPRINTK("netirq %d from device %s\n", irq,
90 ((struct xenbus_device *)context)->nodename);
96 /* Read the limits values of the xenbus structure. */
98 void cfg_hw_quotas(struct xenbus_device *dev, struct netback_accel *bend)
100 int err = xenbus_gather
101 (XBT_NIL, dev->nodename,
102 "limits/max-filters", "%d", &bend->quotas.max_filters,
103 "limits/max-buf-pages", "%d", &bend->quotas.max_buf_pages,
104 "limits/max-mcasts", "%d", &bend->quotas.max_mcasts,
108 * TODO what if they have previously been set by the
109 * user? This will overwrite with defaults. Maybe
110 * not what we want to do, but useful in startup
113 DPRINTK("Failed to read quotas from xenbus, using defaults\n");
114 bend->quotas.max_filters = NETBACK_ACCEL_DEFAULT_MAX_FILTERS;
115 bend->quotas.max_buf_pages = sfc_netback_max_pages;
116 bend->quotas.max_mcasts = NETBACK_ACCEL_DEFAULT_MAX_MCASTS;
123 static void bend_config_accel_change(struct xenbus_watch *watch,
124 const char **vec, unsigned int len)
126 struct netback_accel *bend;
128 bend = container_of(watch, struct netback_accel, config_accel_watch);
130 mutex_lock(&bend->bend_mutex);
131 if (bend->config_accel_watch.node != NULL) {
132 struct xenbus_device *dev =
133 (struct xenbus_device *)bend->hdev_data;
134 DPRINTK("Watch matched, got dev %p otherend %p\n",
136 if(!xenbus_exists(XBT_NIL, watch->node, "")) {
137 DPRINTK("Ignoring watch as otherend seems invalid\n");
141 cfg_hw_quotas(dev, bend);
144 mutex_unlock(&bend->bend_mutex);
150 * Setup watch on "limits" in the backend vif info to know when
151 * configuration has been set
153 static int setup_config_accel_watch(struct xenbus_device *dev,
154 struct netback_accel *bend)
158 VPRINTK("Setting watch on %s/%s\n", dev->nodename, "limits");
160 err = xenbus_watch_path2(dev, dev->nodename, "limits",
161 &bend->config_accel_watch,
162 bend_config_accel_change);
165 EPRINTK("%s: Failed to register xenbus watch: %d\n",
167 bend->config_accel_watch.node = NULL;
175 cfg_frontend_info(struct xenbus_device *dev, struct netback_accel *bend,
178 /* Get some info from xenbus on the event channel and shmem grant */
179 int err = xenbus_gather(XBT_NIL, dev->otherend,
180 "accel-msg-channel", "%u", &bend->msg_channel,
181 "accel-ctrl-page", "%d", &(grants[0]),
182 "accel-msg-page", "%d", &(grants[1]),
183 "accel-net-channel", "%u", &bend->net_channel,
186 EPRINTK("failed to read event channels or shmem grant: %d\n",
189 DPRINTK("got event chan %d and net chan %d from frontend\n",
190 bend->msg_channel, bend->net_channel);
195 /* Setup all the comms needed to chat with the front end driver */
196 static int setup_vnic(struct xenbus_device *dev)
198 struct netback_accel *bend;
199 int grants[2], err, msgs_per_queue;
201 bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
203 err = cfg_frontend_info(dev, bend, grants);
208 * If we get here, both frontend Connected and configuration
209 * options available. All is well.
212 /* Get the hardware quotas for the VNIC in question. */
213 cfg_hw_quotas(dev, bend);
215 /* Set up the deferred work handlers */
216 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
217 INIT_WORK(&bend->handle_msg,
218 netback_accel_msg_rx_handler);
220 INIT_WORK(&bend->handle_msg,
221 netback_accel_msg_rx_handler,
225 /* Request the frontend mac */
226 err = net_accel_xen_net_read_mac(dev, bend->mac);
230 /* Set up the shared page. */
231 bend->shared_page = net_accel_map_grants_contig(dev, grants, 2,
232 &bend->sh_pages_unmap);
234 if (bend->shared_page == NULL) {
235 EPRINTK("failed to map shared page for %s\n", dev->otherend);
240 /* Initialise the shared page(s) used for comms */
241 net_accel_msg_init_page(bend->shared_page, PAGE_SIZE,
242 (bend->net_dev->flags & IFF_UP) &&
243 (netif_carrier_ok(bend->net_dev)));
245 msgs_per_queue = (PAGE_SIZE/2) / sizeof(struct net_accel_msg);
247 net_accel_msg_init_queue
248 (&bend->to_domU, &bend->shared_page->queue0,
249 (struct net_accel_msg *)((__u8*)bend->shared_page + PAGE_SIZE),
252 net_accel_msg_init_queue
253 (&bend->from_domU, &bend->shared_page->queue1,
254 (struct net_accel_msg *)((__u8*)bend->shared_page +
255 (3 * PAGE_SIZE / 2)),
258 /* Bind the message event channel to a handler
260 * Note that we will probably get a spurious interrupt when we
261 * do this, so it must not be done until we have set up
262 * everything we need to handle it.
264 err = bind_interdomain_evtchn_to_irqhandler(dev->otherend_id,
266 msgirq_from_frontend,
271 EPRINTK("failed to bind event channel: %d\n", err);
275 bend->msg_channel_irq = err;
277 /* TODO: No need to bind this evtchn to an irq. */
278 err = bind_interdomain_evtchn_to_irqhandler(dev->otherend_id,
280 netirq_from_frontend,
285 EPRINTK("failed to bind net channel: %d\n", err);
289 bend->net_channel_irq = err;
292 * Grab ourselves an entry in the forwarding hash table. We do
293 * this now so we don't have the embarassmesnt of sorting out
294 * an allocation failure while at IRQ. Because we pass NULL as
295 * the context, the actual hash lookup will succeed for this
296 * NIC, but the check for somewhere to forward to will
297 * fail. This is necessary to prevent forwarding before
298 * hardware resources are set up
300 err = netback_accel_fwd_add(bend->mac, NULL, bend->fwd_priv);
302 EPRINTK("failed to add to fwd hash table\n");
307 * Say hello to frontend. Important to do this straight after
308 * obtaining the message queue as otherwise we are vulnerable
309 * to an evil frontend sending a HELLO-REPLY before we've sent
310 * the HELLO and confusing us
312 netback_accel_msg_tx_hello(bend, NET_ACCEL_MSG_VERSION);
316 unbind_from_irqhandler(bend->net_channel_irq, dev);
318 unbind_from_irqhandler(bend->msg_channel_irq, dev);
320 net_accel_unmap_grants_contig(dev, bend->sh_pages_unmap);
321 bend->shared_page = NULL;
322 bend->sh_pages_unmap = NULL;
329 static int read_nicname(struct xenbus_device *dev, struct netback_accel *bend)
333 /* nic name used to select interface used for acceleration */
334 bend->nicname = xenbus_read(XBT_NIL, dev->nodename, "accel", &len);
335 if (IS_ERR(bend->nicname))
336 return PTR_ERR(bend->nicname);
341 static const char *frontend_name = "sfc_netfront";
343 static int publish_frontend_name(struct xenbus_device *dev)
345 struct xenbus_transaction tr;
348 /* Publish the name of the frontend driver */
350 err = xenbus_transaction_start(&tr);
352 EPRINTK("%s: transaction start failed\n", __FUNCTION__);
355 err = xenbus_printf(tr, dev->nodename, "accel-frontend",
356 "%s", frontend_name);
358 EPRINTK("%s: xenbus_printf failed\n", __FUNCTION__);
359 xenbus_transaction_end(tr, 1);
362 err = xenbus_transaction_end(tr, 0);
363 } while (err == -EAGAIN);
366 EPRINTK("failed to end frontend name transaction\n");
373 static int unpublish_frontend_name(struct xenbus_device *dev)
375 struct xenbus_transaction tr;
379 err = xenbus_transaction_start(&tr);
382 err = xenbus_rm(tr, dev->nodename, "accel-frontend");
384 xenbus_transaction_end(tr, 1);
387 err = xenbus_transaction_end(tr, 0);
388 } while (err == -EAGAIN);
394 static void cleanup_vnic(struct netback_accel *bend)
396 struct xenbus_device *dev;
398 dev = (struct xenbus_device *)bend->hdev_data;
400 DPRINTK("%s: bend %p dev %p\n", __FUNCTION__, bend, dev);
402 DPRINTK("%s: Remove %p's mac from fwd table...\n",
404 netback_accel_fwd_remove(bend->mac, bend->fwd_priv);
406 /* Free buffer table allocations */
407 netback_accel_remove_buffers(bend);
409 DPRINTK("%s: Release hardware resources...\n", __FUNCTION__);
410 if (bend->accel_shutdown)
411 bend->accel_shutdown(bend);
413 if (bend->net_channel_irq) {
414 unbind_from_irqhandler(bend->net_channel_irq, dev);
415 bend->net_channel_irq = 0;
418 if (bend->msg_channel_irq) {
419 unbind_from_irqhandler(bend->msg_channel_irq, dev);
420 bend->msg_channel_irq = 0;
423 if (bend->sh_pages_unmap) {
424 DPRINTK("%s: Unmap grants %p\n", __FUNCTION__,
425 bend->sh_pages_unmap);
426 net_accel_unmap_grants_contig(dev, bend->sh_pages_unmap);
427 bend->sh_pages_unmap = NULL;
428 bend->shared_page = NULL;
433 /*************************************************************************/
436 * The following code handles accelstate changes between the frontend
437 * and the backend. It calls setup_vnic and cleanup_vnic in matching
438 * pairs in response to transitions.
440 * Valid state transitions for Dom0 are as follows:
442 * Closed->Init on probe or in response to Init from domU
443 * Closed->Closing on error/remove
445 * Init->Connected in response to Connected from domU
446 * Init->Closing on error/remove or in response to Closing from domU
448 * Connected->Closing on error/remove or in response to Closing from domU
450 * Closing->Closed in response to Closed from domU
455 static void netback_accel_frontend_changed(struct xenbus_device *dev,
456 XenbusState frontend_state)
458 struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
459 XenbusState backend_state;
461 DPRINTK("%s: changing from %s to %s. nodename %s, otherend %s\n",
462 __FUNCTION__, xenbus_strstate(bend->frontend_state),
463 xenbus_strstate(frontend_state),dev->nodename, dev->otherend);
466 * Ignore duplicate state changes. This can happen if the
467 * frontend changes state twice in quick succession and the
468 * first watch fires in the backend after the second
469 * transition has completed.
471 if (bend->frontend_state == frontend_state)
474 bend->frontend_state = frontend_state;
475 backend_state = bend->backend_state;
477 switch (frontend_state) {
478 case XenbusStateInitialising:
479 if (backend_state == XenbusStateClosed &&
481 backend_state = XenbusStateInitialising;
484 case XenbusStateConnected:
485 if (backend_state == XenbusStateInitialising) {
486 if (!bend->vnic_is_setup &&
487 setup_vnic(dev) == 0) {
488 bend->vnic_is_setup = 1;
489 backend_state = XenbusStateConnected;
491 backend_state = XenbusStateClosing;
496 case XenbusStateInitWait:
497 case XenbusStateInitialised:
499 DPRINTK("Unknown state %s (%d) from frontend.\n",
500 xenbus_strstate(frontend_state), frontend_state);
501 /* Unknown state. Fall through. */
502 case XenbusStateClosing:
503 if (backend_state != XenbusStateClosed)
504 backend_state = XenbusStateClosing;
507 * The bend will now persist (with watches active) in
508 * case the frontend comes back again, eg. after
509 * frontend module reload or suspend/resume
514 case XenbusStateUnknown:
515 case XenbusStateClosed:
516 if (bend->vnic_is_setup) {
517 bend->vnic_is_setup = 0;
521 if (backend_state == XenbusStateClosing)
522 backend_state = XenbusStateClosed;
526 if (backend_state != bend->backend_state) {
527 DPRINTK("Switching from state %s (%d) to %s (%d)\n",
528 xenbus_strstate(bend->backend_state),
530 xenbus_strstate(backend_state), backend_state);
531 bend->backend_state = backend_state;
532 net_accel_update_state(dev, backend_state);
535 wake_up(&bend->state_wait_queue);
539 /* accelstate on the frontend's xenbus node has changed */
540 static void bend_domu_accel_change(struct xenbus_watch *watch,
541 const char **vec, unsigned int len)
544 struct netback_accel *bend;
546 bend = container_of(watch, struct netback_accel, domu_accel_watch);
547 if (bend->domu_accel_watch.node != NULL) {
548 struct xenbus_device *dev =
549 (struct xenbus_device *)bend->hdev_data;
550 VPRINTK("Watch matched, got dev %p otherend %p\n",
553 * dev->otherend != NULL check to protect against
554 * watch firing when domain goes away and we haven't
557 if (!dev->otherend ||
558 !xenbus_exists(XBT_NIL, watch->node, "") ||
559 strncmp(dev->otherend, vec[XS_WATCH_PATH],
560 strlen(dev->otherend))) {
561 DPRINTK("Ignoring watch as otherend seems invalid\n");
565 mutex_lock(&bend->bend_mutex);
567 xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d",
569 netback_accel_frontend_changed(dev, state);
571 mutex_unlock(&bend->bend_mutex);
575 /* Setup watch on frontend's accelstate */
576 static int setup_domu_accel_watch(struct xenbus_device *dev,
577 struct netback_accel *bend)
581 VPRINTK("Setting watch on %s/%s\n", dev->otherend, "accelstate");
583 err = xenbus_watch_path2(dev, dev->otherend, "accelstate",
584 &bend->domu_accel_watch,
585 bend_domu_accel_change);
587 EPRINTK("%s: Failed to register xenbus watch: %d\n",
593 bend->domu_accel_watch.node = NULL;
598 int netback_accel_probe(struct xenbus_device *dev)
600 struct netback_accel *bend;
601 struct backend_info *binfo;
604 DPRINTK("%s: passed device %s\n", __FUNCTION__, dev->nodename);
606 /* Allocate structure to store all our state... */
607 bend = kzalloc(sizeof(struct netback_accel), GFP_KERNEL);
609 DPRINTK("%s: no memory for bend\n", __FUNCTION__);
613 mutex_init(&bend->bend_mutex);
615 mutex_lock(&bend->bend_mutex);
617 /* ...and store it where we can get at it */
618 binfo = dev_get_drvdata(&dev->dev);
619 binfo->netback_accel_priv = bend;
621 bend->hdev_data = dev;
623 DPRINTK("%s: Adding bend %p to list\n", __FUNCTION__, bend);
625 init_waitqueue_head(&bend->state_wait_queue);
626 bend->vnic_is_setup = 0;
627 bend->frontend_state = XenbusStateUnknown;
628 bend->backend_state = XenbusStateClosed;
631 sscanf(dev->nodename, NODENAME_PATH_FMT, &bend->far_end,
634 err = read_nicname(dev, bend);
637 * Technically not an error, just means we're not
638 * supposed to accelerate this
640 DPRINTK("failed to get device name\n");
645 * Look up the device name in the list of NICs provided by
646 * driverlink to get the hardware type.
648 err = netback_accel_sf_hwtype(bend);
651 * Technically not an error, just means we're not
652 * supposed to accelerate this, probably belongs to
655 DPRINTK("failed to match device name\n");
659 err = publish_frontend_name(dev);
663 err = netback_accel_debugfs_create(bend);
667 mutex_unlock(&bend->bend_mutex);
669 err = setup_config_accel_watch(dev, bend);
671 goto fail_config_watch;
673 err = setup_domu_accel_watch(dev, bend);
675 goto fail_domu_watch;
678 * Indicate to the other end that we're ready to start unless
679 * the watch has already fired.
681 mutex_lock(&bend->bend_mutex);
682 if (bend->backend_state == XenbusStateClosed) {
683 bend->backend_state = XenbusStateInitialising;
684 net_accel_update_state(dev, XenbusStateInitialising);
686 mutex_unlock(&bend->bend_mutex);
688 mutex_lock(&bend_list_mutex);
690 mutex_unlock(&bend_list_mutex);
696 unregister_xenbus_watch(&bend->config_accel_watch);
697 kfree(bend->config_accel_watch.node);
701 * Flush the scheduled work queue before freeing bend to get
702 * rid of any pending netback_accel_msg_rx_handler()
704 flush_work_sync(&bend->handle_msg);
706 mutex_lock(&bend->bend_mutex);
707 net_accel_update_state(dev, XenbusStateUnknown);
708 netback_accel_debugfs_remove(bend);
711 unpublish_frontend_name(dev);
714 /* No need to reverse netback_accel_sf_hwtype. */
717 kfree(bend->nicname);
719 binfo->netback_accel_priv = NULL;
720 mutex_unlock(&bend->bend_mutex);
726 int netback_accel_remove(struct xenbus_device *dev)
728 struct backend_info *binfo;
729 struct netback_accel *bend;
732 binfo = dev_get_drvdata(&dev->dev);
733 bend = (struct netback_accel *) binfo->netback_accel_priv;
735 DPRINTK("%s: dev %p bend %p\n", __FUNCTION__, dev, bend);
737 BUG_ON(bend == NULL);
739 mutex_lock(&bend_list_mutex);
741 mutex_unlock(&bend_list_mutex);
743 mutex_lock(&bend->bend_mutex);
745 /* Reject any requests to connect. */
749 * Switch to closing to tell the other end that we're going
752 if (bend->backend_state != XenbusStateClosing) {
753 bend->backend_state = XenbusStateClosing;
754 net_accel_update_state(dev, XenbusStateClosing);
757 frontend_state = (int)XenbusStateUnknown;
758 xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d",
761 mutex_unlock(&bend->bend_mutex);
764 * Wait until this end goes to the closed state. This happens
765 * in response to the other end going to the closed state.
766 * Don't bother doing this if the other end is already closed
767 * because if it is then there is nothing to do.
769 if (frontend_state != (int)XenbusStateClosed &&
770 frontend_state != (int)XenbusStateUnknown)
771 wait_event(bend->state_wait_queue,
772 bend->backend_state == XenbusStateClosed);
774 unregister_xenbus_watch(&bend->domu_accel_watch);
775 kfree(bend->domu_accel_watch.node);
777 unregister_xenbus_watch(&bend->config_accel_watch);
778 kfree(bend->config_accel_watch.node);
781 * Flush the scheduled work queue before freeing bend to get
782 * rid of any pending netback_accel_msg_rx_handler()
784 flush_work_sync(&bend->handle_msg);
786 mutex_lock(&bend->bend_mutex);
788 /* Tear down the vnic if it was set up. */
789 if (bend->vnic_is_setup) {
790 bend->vnic_is_setup = 0;
794 bend->backend_state = XenbusStateUnknown;
795 net_accel_update_state(dev, XenbusStateUnknown);
797 netback_accel_debugfs_remove(bend);
799 unpublish_frontend_name(dev);
801 kfree(bend->nicname);
803 binfo->netback_accel_priv = NULL;
805 mutex_unlock(&bend->bend_mutex);
813 void netback_accel_shutdown_bends(void)
815 mutex_lock(&bend_list_mutex);
817 * I think we should have had a remove callback for all
818 * interfaces before being allowed to unload the module
820 BUG_ON(bend_list != NULL);
821 mutex_unlock(&bend_list_mutex);
825 void netback_accel_set_closing(struct netback_accel *bend)
828 bend->backend_state = XenbusStateClosing;
829 net_accel_update_state((struct xenbus_device *)bend->hdev_data,