2 * PCI Backend Xenbus Setup - handles setup with frontend and xend
4 * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
6 #include <linux/module.h>
7 #include <linux/init.h>
8 #include <linux/list.h>
9 #include <linux/workqueue.h>
10 #include <xen/xenbus.h>
12 #include <xen/events.h>
13 #include <asm/xen/pci.h>
15 #include <xen/evtchn.h>
19 #define INVALID_EVTCHN_IRQ (-1)
20 struct workqueue_struct *xen_pcibk_wq;
22 static char __read_mostly mode[16] = CONFIG_XEN_PCIDEV_BACKEND_DEFAULT;
23 module_param_string(mode, mode, sizeof(mode), S_IRUGO);
24 MODULE_PARM_DESC(mode,
25 "Option to specify how to export PCI topology to guest:\n"
26 #ifdef CONFIG_XEN_PCIDEV_BACKEND_VPCI
28 # ifdef CONFIG_XEN_PCIDEV_BACKEND_DEFAULT_VPCI
32 " Hides the true PCI topology and makes the frontend think there\n"
33 " is a single PCI bus with only the exported devices on it.\n"
34 " For example, a device at 03:05.0 will be re-assigned to 00:00.0\n"
35 " while second device at 02:1a.1 will be re-assigned to 00:01.1.\n"
37 #ifdef CONFIG_XEN_PCIDEV_BACKEND_PASSTHROUGH
39 # ifdef CONFIG_XEN_PCIDEV_BACKEND_DEFAULT_PASSTHROUGH
43 " Passthrough provides a real view of the PCI topology to the\n"
44 " frontend (for example, a device at 06:01.b will still appear at\n"
45 " 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"
46 " exposed PCI devices to its driver domains. This may be required\n"
47 " for drivers which depend on finding their hardware in certain\n"
48 " bus/slot locations.\n"
50 #ifdef CONFIG_XEN_PCIDEV_BACKEND_SLOT
52 # ifdef CONFIG_XEN_PCIDEV_BACKEND_DEFAULT_SLOT
55 " Hides the true PCI topology and makes the frontend think there\n"
56 " is a single PCI bus with only the exported devices on it.\n"
57 " Contrary to the virtual PCI backend, each function becomes a\n"
59 " For example, a device at 03:05.2 will be re-assigned to 00:00.0.\n"
60 " A second device at 02:1a.1 will be re-assigned to 00:01.0.\n"
62 #ifdef CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER
64 # ifdef CONFIG_XEN_PCIDEV_BACKEND_DEFAULT_CONTROLLER
67 " Virtualizes the PCI bus topology by providing a virtual bus\n"
68 " per PCI root device. Devices which are physically under\n"
69 " the same root bus will appear on the same virtual bus. For\n"
70 " systems with complex I/O addressing, this is the only backend\n"
71 " which supports extended I/O port spaces and MMIO translation\n"
72 " offsets. This backend also supports slot virtualization.\n"
73 " For example, a device at 0000:01:02.1 will be re-assigned to\n"
74 " 0000:00:00.0. A second device at 0000:02:05.0 (behind a P2P\n"
75 " bridge on bus 0000:01) will be re-assigned to 0000:00:01.0. A\n"
76 " third device at 0000:16:05.0 (under a different PCI root bus)\n"
77 " will be re-assigned to 0000:01:00.0.\n"
81 static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)
83 struct xen_pcibk_device *pdev;
85 pdev = kzalloc(sizeof(struct xen_pcibk_device), GFP_KERNEL);
88 dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
91 dev_set_drvdata(&xdev->dev, pdev);
93 mutex_init(&pdev->dev_lock);
99 pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
100 pdev->be_watching = 0;
102 INIT_WORK(&pdev->op_work, xen_pcibk_do_op);
104 if (xen_pcibk_init_devices(pdev)) {
112 static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
114 mutex_lock(&pdev->dev_lock);
115 /* Ensure the guest can't trigger our handler before removing devices */
116 if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
117 unbind_from_irqhandler(pdev->evtchn_irq, pdev);
118 pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
121 /* If the driver domain started an op, make sure we complete it
122 * before releasing the shared memory */
124 /* Note, the workqueue does not use spinlocks at all.*/
125 flush_workqueue(xen_pcibk_wq);
127 if (pdev->sh_info != NULL) {
129 xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
131 xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area);
133 pdev->sh_info = NULL;
135 mutex_unlock(&pdev->dev_lock);
138 static void free_pdev(struct xen_pcibk_device *pdev)
140 if (pdev->be_watching) {
141 unregister_xenbus_watch(&pdev->be_watch);
142 pdev->be_watching = 0;
145 xen_pcibk_disconnect(pdev);
147 xen_pcibk_release_devices(pdev);
149 dev_set_drvdata(&pdev->xdev->dev, NULL);
155 static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
162 struct vm_struct *area;
165 dev_dbg(&pdev->xdev->dev,
166 "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
167 gnt_ref, remote_evtchn);
170 err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
173 area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref);
177 xenbus_dev_fatal(pdev->xdev, err,
178 "Error mapping other domain page in ours.");
183 pdev->sh_info = vaddr;
185 pdev->sh_area = area;
186 pdev->sh_info = area->addr;
189 err = bind_interdomain_evtchn_to_irqhandler(
190 pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
193 xenbus_dev_fatal(pdev->xdev, err,
194 "Error binding event channel to IRQ");
197 pdev->evtchn_irq = err;
200 dev_dbg(&pdev->xdev->dev, "Attached!\n");
205 static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
208 int gnt_ref, remote_evtchn;
212 mutex_lock(&pdev->dev_lock);
213 /* Make sure we only do this setup once */
214 if (xenbus_read_driver_state(pdev->xdev->nodename) !=
215 XenbusStateInitialised)
218 /* Wait for frontend to state that it has published the configuration */
219 if (xenbus_read_driver_state(pdev->xdev->otherend) !=
220 XenbusStateInitialised)
223 dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
225 err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
226 "pci-op-ref", "%u", &gnt_ref,
227 "event-channel", "%u", &remote_evtchn,
228 "magic", NULL, &magic, NULL);
230 /* If configuration didn't get read correctly, wait longer */
231 xenbus_dev_fatal(pdev->xdev, err,
232 "Error reading configuration from frontend");
236 if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
237 xenbus_dev_fatal(pdev->xdev, -EFAULT,
238 "version mismatch (%s/%s) with pcifront - "
240 magic, XEN_PCI_MAGIC);
244 err = xen_pcibk_do_attach(pdev, gnt_ref, remote_evtchn);
248 dev_dbg(&pdev->xdev->dev, "Connecting...\n");
250 err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
252 xenbus_dev_fatal(pdev->xdev, err,
253 "Error switching to connected state!");
255 dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
257 mutex_unlock(&pdev->dev_lock);
264 static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device *pdev,
265 unsigned int domain, unsigned int bus,
266 unsigned int devfn, unsigned int devid)
272 len = snprintf(str, sizeof(str), "vdev-%d", devid);
273 if (unlikely(len >= (sizeof(str) - 1))) {
278 /* Note: The PV protocol uses %02x, don't change it */
279 err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
280 "%04x:%02x:%02x.%02x", domain, bus,
281 PCI_SLOT(devfn), PCI_FUNC(devfn));
287 static int xen_pcibk_export_device(struct xen_pcibk_device *pdev,
288 int domain, int bus, int slot, int func,
294 dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
295 domain, bus, slot, func);
297 dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
300 xenbus_dev_fatal(pdev->xdev, err,
301 "Couldn't locate PCI device "
302 "(%04x:%02x:%02x.%d)! "
303 "perhaps already in-use?",
304 domain, bus, slot, func);
308 err = xen_pcibk_add_pci_dev(pdev, dev, devid,
309 xen_pcibk_publish_pci_dev);
314 dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
315 if (xen_register_device_domain_owner(dev,
316 pdev->xdev->otherend_id) != 0) {
317 dev_err(&dev->dev, "Stealing ownership from dom%d.\n",
318 xen_find_device_domain_owner(dev));
319 xen_unregister_device_domain_owner(dev);
320 xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
324 /* TODO: It'd be nice to export a bridge and have all of its children
325 * get exported with it. This may be best done in xend (which will
326 * have to calculate resource usage anyway) but we probably want to
327 * put something in here to ensure that if a bridge gets given to a
328 * driver domain, that all devices under that bridge are not given
329 * to other driver domains (as he who controls the bridge can disable
330 * it and stop the other devices from working).
336 static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
337 int domain, int bus, int slot, int func)
342 dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
343 domain, bus, slot, func);
345 dev = xen_pcibk_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
348 dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
349 "(%04x:%02x:%02x.%d)! not owned by this domain\n",
350 domain, bus, slot, func);
355 dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
356 xen_unregister_device_domain_owner(dev);
359 xen_pcibk_release_pci_dev(pdev, dev);
365 static int xen_pcibk_publish_pci_root(struct xen_pcibk_device *pdev,
366 unsigned int domain, unsigned int bus)
369 int i, root_num, len, err;
372 dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
374 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
375 "root_num", "%d", &root_num);
376 if (err == 0 || err == -ENOENT)
381 /* Verify that we haven't already published this pci root */
382 for (i = 0; i < root_num; i++) {
383 len = snprintf(str, sizeof(str), "root-%d", i);
384 if (unlikely(len >= (sizeof(str) - 1))) {
389 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
390 str, "%x:%x", &d, &b);
398 if (d == domain && b == bus) {
404 len = snprintf(str, sizeof(str), "root-%d", root_num);
405 if (unlikely(len >= (sizeof(str) - 1))) {
410 dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
411 root_num, domain, bus);
413 err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
414 "%04x:%02x", domain, bus);
418 err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
419 "root_num", "%d", (root_num + 1));
425 static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
429 int domain, bus, slot, func;
436 dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
438 mutex_lock(&pdev->dev_lock);
439 /* Make sure we only reconfigure once */
440 if (xenbus_read_driver_state(pdev->xdev->nodename) !=
441 XenbusStateReconfiguring)
444 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
449 xenbus_dev_fatal(pdev->xdev, err,
450 "Error reading number of devices");
454 for (i = 0; i < num_devs; i++) {
455 len = snprintf(state_str, sizeof(state_str), "state-%d", i);
456 if (unlikely(len >= (sizeof(state_str) - 1))) {
458 xenbus_dev_fatal(pdev->xdev, err,
459 "String overflow while reading "
463 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
466 substate = XenbusStateUnknown;
469 case XenbusStateInitialising:
470 dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
472 len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
473 if (unlikely(len >= (sizeof(dev_str) - 1))) {
475 xenbus_dev_fatal(pdev->xdev, err,
476 "String overflow while "
477 "reading configuration");
480 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
481 dev_str, "%x:%x:%x.%x",
482 &domain, &bus, &slot, &func);
484 xenbus_dev_fatal(pdev->xdev, err,
485 "Error reading device "
491 xenbus_dev_fatal(pdev->xdev, err,
492 "Error parsing pci device "
497 err = xen_pcibk_export_device(pdev, domain, bus, slot,
502 /* Publish pci roots. */
503 err = xen_pcibk_publish_pci_roots(pdev,
504 xen_pcibk_publish_pci_root);
506 xenbus_dev_fatal(pdev->xdev, err,
507 "Error while publish PCI root"
508 "buses for frontend");
512 err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
514 XenbusStateInitialised);
516 xenbus_dev_fatal(pdev->xdev, err,
517 "Error switching substate of "
523 case XenbusStateClosing:
524 dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
526 len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
527 if (unlikely(len >= (sizeof(dev_str) - 1))) {
529 xenbus_dev_fatal(pdev->xdev, err,
530 "String overflow while "
531 "reading configuration");
534 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
535 dev_str, "%x:%x:%x.%x",
536 &domain, &bus, &slot, &func);
538 xenbus_dev_fatal(pdev->xdev, err,
539 "Error reading device "
545 xenbus_dev_fatal(pdev->xdev, err,
546 "Error parsing pci device "
551 err = xen_pcibk_remove_device(pdev, domain, bus, slot,
556 /* TODO: If at some point we implement support for pci
557 * root hot-remove on pcifront side, we'll need to
558 * remove unnecessary xenstore nodes of pci roots here.
568 err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
570 xenbus_dev_fatal(pdev->xdev, err,
571 "Error switching to reconfigured state!");
576 mutex_unlock(&pdev->dev_lock);
580 static void xen_pcibk_frontend_changed(struct xenbus_device *xdev,
581 enum xenbus_state fe_state)
583 struct xen_pcibk_device *pdev = dev_get_drvdata(&xdev->dev);
585 dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
588 case XenbusStateInitialised:
589 xen_pcibk_attach(pdev);
592 case XenbusStateReconfiguring:
593 xen_pcibk_reconfigure(pdev);
596 case XenbusStateConnected:
597 /* pcifront switched its state from reconfiguring to connected.
598 * Then switch to connected state.
600 xenbus_switch_state(xdev, XenbusStateConnected);
603 case XenbusStateClosing:
604 xen_pcibk_disconnect(pdev);
605 xenbus_switch_state(xdev, XenbusStateClosing);
608 case XenbusStateClosed:
609 xen_pcibk_disconnect(pdev);
610 xenbus_switch_state(xdev, XenbusStateClosed);
611 if (xenbus_dev_is_online(xdev))
613 /* fall through if not online */
614 case XenbusStateUnknown:
615 dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
616 device_unregister(&xdev->dev);
624 static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
626 /* Get configuration from xend (if available now) */
627 int domain, bus, slot, func;
633 mutex_lock(&pdev->dev_lock);
634 /* It's possible we could get the call to setup twice, so make sure
635 * we're not already connected.
637 if (xenbus_read_driver_state(pdev->xdev->nodename) !=
641 dev_dbg(&pdev->xdev->dev, "getting be setup\n");
643 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
648 xenbus_dev_fatal(pdev->xdev, err,
649 "Error reading number of devices");
653 for (i = 0; i < num_devs; i++) {
654 int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
655 if (unlikely(l >= (sizeof(dev_str) - 1))) {
657 xenbus_dev_fatal(pdev->xdev, err,
658 "String overflow while reading "
663 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
664 "%x:%x:%x.%x", &domain, &bus, &slot, &func);
666 xenbus_dev_fatal(pdev->xdev, err,
667 "Error reading device configuration");
672 xenbus_dev_fatal(pdev->xdev, err,
673 "Error parsing pci device "
678 err = xen_pcibk_export_device(pdev, domain, bus, slot, func, i);
682 /* Switch substate of this device. */
683 l = snprintf(state_str, sizeof(state_str), "state-%d", i);
684 if (unlikely(l >= (sizeof(state_str) - 1))) {
686 xenbus_dev_fatal(pdev->xdev, err,
687 "String overflow while reading "
691 err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
692 "%d", XenbusStateInitialised);
694 xenbus_dev_fatal(pdev->xdev, err, "Error switching "
695 "substate of dev-%d\n", i);
700 err = xen_pcibk_publish_pci_roots(pdev, xen_pcibk_publish_pci_root);
702 xenbus_dev_fatal(pdev->xdev, err,
703 "Error while publish PCI root buses "
708 err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
710 xenbus_dev_fatal(pdev->xdev, err,
711 "Error switching to initialised state!");
714 mutex_unlock(&pdev->dev_lock);
716 /* see if pcifront is already configured (if not, we'll wait) */
717 xen_pcibk_attach(pdev);
721 static void xen_pcibk_be_watch(struct xenbus_watch *watch,
722 const char **vec, unsigned int len)
724 struct xen_pcibk_device *pdev =
725 container_of(watch, struct xen_pcibk_device, be_watch);
727 switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
728 case XenbusStateInitWait:
729 xen_pcibk_setup_backend(pdev);
737 static int xen_pcibk_xenbus_probe(struct xenbus_device *dev,
738 const struct xenbus_device_id *id)
741 struct xen_pcibk_device *pdev = alloc_pdev(dev);
745 xenbus_dev_fatal(dev, err,
746 "Error allocating xen_pcibk_device struct");
750 /* wait for xend to configure us */
751 err = xenbus_switch_state(dev, XenbusStateInitWait);
755 /* watch the backend node for backend configuration information */
756 err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
761 pdev->be_watching = 1;
763 /* We need to force a call to our callback here in case
764 * xend already configured us!
766 xen_pcibk_be_watch(&pdev->be_watch, NULL, 0);
772 static int xen_pcibk_xenbus_remove(struct xenbus_device *dev)
774 struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev);
782 static const struct xenbus_device_id xen_pcibk_ids[] = {
787 static DEFINE_XENBUS_DRIVER(xen_pcibk, DRV_NAME,
788 .probe = xen_pcibk_xenbus_probe,
789 .remove = xen_pcibk_xenbus_remove,
790 .otherend_changed = xen_pcibk_frontend_changed,
793 const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;
794 static const struct xen_pcibk_backend *__initdata xen_pcibk_backends[] = {
795 &xen_pcibk_vpci_backend,
796 &xen_pcibk_passthrough_backend,
797 &xen_pcibk_slot_backend,
798 &xen_pcibk_controller_backend,
801 int __init xen_pcibk_xenbus_register(void)
805 xen_pcibk_wq = create_workqueue("xen_pciback_workqueue");
807 printk(KERN_ERR "%s: create"
808 "xen_pciback_workqueue failed\n", __func__);
811 for (i = 0; i < ARRAY_SIZE(xen_pcibk_backends); ++i) {
812 if (!xen_pcibk_backends[i])
814 if (strcmp(xen_pcibk_backends[i]->name, mode) == 0) {
815 xen_pcibk_backend = xen_pcibk_backends[i];
819 pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name);
820 return xenbus_register_backend(&xen_pcibk_driver);
823 void __exit xen_pcibk_xenbus_unregister(void)
825 destroy_workqueue(xen_pcibk_wq);
826 xenbus_unregister_driver(&xen_pcibk_driver);