2 * Dynamic DMA mapping support for AMD Hammer.
4 * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
5 * This allows to use PCI devices that only support 32bit addresses on systems
8 * See Documentation/DMA-mapping.txt for the interface specification.
10 * Copyright 2002 Andi Kleen, SuSE Labs.
13 #include <linux/config.h>
14 #include <linux/types.h>
15 #include <linux/ctype.h>
16 #include <linux/agp_backend.h>
17 #include <linux/init.h>
19 #include <linux/string.h>
20 #include <linux/spinlock.h>
21 #include <linux/pci.h>
22 #include <linux/module.h>
23 #include <linux/topology.h>
26 #include <asm/bitops.h>
27 #include <asm/pgtable.h>
28 #include <asm/proto.h>
29 #include <asm/cacheflush.h>
30 #include <asm/kdebug.h>
31 #include <asm/proto.h>
33 dma_addr_t bad_dma_address;
35 unsigned long iommu_bus_base; /* GART remapping area (physical) */
36 static unsigned long iommu_size; /* size of remapping area bytes */
37 static unsigned long iommu_pages; /* .. and in pages */
39 u32 *iommu_gatt_base; /* Remapping table */
43 #ifdef CONFIG_IOMMU_DEBUG
49 /* Allocation bitmap for the remapping area */
50 static spinlock_t iommu_bitmap_lock = SPIN_LOCK_UNLOCKED;
51 static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
54 #define GPTE_COHERENT 2
55 #define GPTE_ENCODE(x) \
56 (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
57 #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
59 #define to_pages(addr,size) \
60 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
62 #define for_all_nb(dev) \
64 while ((dev = pci_find_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)\
65 if (dev->bus->number == 0 && \
66 (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31))
68 static struct pci_dev *northbridges[NR_CPUS + 1];
69 static u32 northbridge_flush_word[NR_CPUS + 1];
71 #define EMERGENCY_PAGES 32 /* = 128KB */
74 extern int agp_init(void);
75 #define AGPEXTERN extern
80 /* backdoor interface to AGP driver */
81 AGPEXTERN int agp_memory_reserved;
82 AGPEXTERN __u32 *agp_gatt_table;
84 static unsigned long next_bit; /* protected by iommu_bitmap_lock */
86 static unsigned long alloc_iommu(int size, int *flush)
88 unsigned long offset, flags;
90 spin_lock_irqsave(&iommu_bitmap_lock, flags);
91 offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
94 offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size);
97 set_bit_string(iommu_gart_bitmap, offset, size);
98 next_bit = offset+size;
99 if (next_bit >= iommu_pages) {
104 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
108 static void free_iommu(unsigned long offset, int size)
111 clear_bit(offset, iommu_gart_bitmap);
115 spin_lock_irqsave(&iommu_bitmap_lock, flags);
116 __clear_bit_string(iommu_gart_bitmap, offset, size);
117 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
121 * Only flush the aperture on the CPU the PCI bridge is connected to.
123 static void flush_gart(int bus)
127 for (i = 0; northbridges[i]; i++) {
128 if (bus >= 0 && !(pcibus_to_cpumask(bus) & (1UL << i)))
130 pci_write_config_dword(northbridges[i], 0x9c,
131 northbridge_flush_word[i] | 1);
135 printk("nothing to flush? %d\n", bus);
139 * Allocate memory for a consistent mapping.
140 * All mappings are consistent here, so this is just a wrapper around
143 void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
144 dma_addr_t *dma_handle)
147 int gfp = GFP_ATOMIC;
148 unsigned long dma_mask;
152 dma_mask = 0xffffffff;
154 dma_mask = hwdev->consistent_dma_mask;
157 dma_mask = 0xffffffff;
158 if (dma_mask < 0xffffffff || no_iommu)
161 memory = (void *)__get_free_pages(gfp, get_order(size));
162 if (memory == NULL) {
166 high = ((unsigned long)virt_to_bus(memory) + size) >= dma_mask;
168 if (force_iommu && !(gfp & GFP_DMA))
171 if (high) goto error;
174 memset(memory, 0, size);
176 *dma_handle = virt_to_bus(memory);
181 *dma_handle = pci_map_single(hwdev, memory, size, 0);
182 if (*dma_handle == bad_dma_address)
188 free_pages((unsigned long)memory, get_order(size));
193 * Unmap consistent memory.
194 * The caller must ensure that the device has finished accessing the mapping.
196 void pci_free_consistent(struct pci_dev *hwdev, size_t size,
197 void *vaddr, dma_addr_t bus)
199 pci_unmap_single(hwdev, bus, size, 0);
200 free_pages((unsigned long)vaddr, get_order(size));
203 #ifdef CONFIG_IOMMU_LEAK
205 #define SET_LEAK(x) if (iommu_leak_tab) \
206 iommu_leak_tab[x] = __builtin_return_address(0);
207 #define CLEAR_LEAK(x) if (iommu_leak_tab) \
208 iommu_leak_tab[x] = 0;
210 /* Debugging aid for drivers that don't free their IOMMU tables */
211 static void **iommu_leak_tab;
212 static int leak_trace;
213 int iommu_leak_pages = 20;
218 if (dump || !iommu_leak_tab) return;
220 show_stack(NULL,NULL);
221 /* Very crude. dump some from the end of the table too */
222 printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages);
223 for (i = 0; i < iommu_leak_pages; i+=2) {
224 printk("%lu: ", iommu_pages-i);
225 printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]);
226 printk("%c", (i+1)%2 == 0 ? '\n' : ' ');
232 #define CLEAR_LEAK(x)
235 static void iommu_full(struct pci_dev *dev, size_t size, int dir)
238 * Ran out of IOMMU space for this operation. This is very bad.
239 * Unfortunately the drivers cannot handle this operation properly.
240 * Return some non mapped prereserved space in the aperture and
241 * let the Northbridge deal with it. This will result in garbage
242 * in the IO operation. When the size exceeds the prereserved space
243 * memory corruption will occur or random memory will be DMAed
244 * out. Hopefully no network devices use single mappings that big.
248 "PCI-DMA: Out of IOMMU space for %lu bytes at device %s[%s]\n",
249 size, dev ? dev->dev.name : "?", dev ? dev->slot_name : "?");
251 if (size > PAGE_SIZE*EMERGENCY_PAGES) {
252 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
253 panic("PCI-DMA: Memory will be corrupted\n");
254 if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
255 panic("PCI-DMA: Random memory will be DMAed\n");
258 #ifdef CONFIG_IOMMU_LEAK
263 static inline int need_iommu(struct pci_dev *dev, unsigned long addr, size_t size)
265 u64 mask = dev ? dev->dma_mask : 0xffffffff;
266 int high = addr + size >= mask;
272 panic("PCI-DMA: high address but no IOMMU.\n");
278 static inline int nonforced_iommu(struct pci_dev *dev, unsigned long addr, size_t size)
280 u64 mask = dev ? dev->dma_mask : 0xffffffff;
281 int high = addr + size >= mask;
285 panic("PCI-DMA: high address but no IOMMU.\n");
291 /* Map a single continuous physical area into the IOMMU.
292 * Caller needs to check if the iommu is needed and flush.
294 static dma_addr_t pci_map_area(struct pci_dev *dev, unsigned long phys_mem,
295 size_t size, int *flush, int dir)
297 unsigned long npages = to_pages(phys_mem, size);
298 unsigned long iommu_page = alloc_iommu(npages, flush);
299 if (iommu_page == -1) {
300 if (!nonforced_iommu(dev, phys_mem, size))
302 iommu_full(dev, size, dir);
303 return bad_dma_address;
307 for (i = 0; i < npages; i++) {
308 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
309 SET_LEAK(iommu_page + i);
310 phys_mem += PAGE_SIZE;
312 return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
315 /* Map a single area into the IOMMU */
316 dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size,
319 unsigned long phys_mem, bus;
322 BUG_ON(dir == PCI_DMA_NONE);
324 phys_mem = virt_to_phys(addr);
325 if (!need_iommu(dev, phys_mem, size))
328 bus = pci_map_area(dev, phys_mem, size, &flush, dir);
330 flush_gart(dev->bus->number);
334 /* Fallback for pci_map_sg in case of overflow */
335 static int pci_map_sg_nonforce(struct pci_dev *dev, struct scatterlist *sg,
340 for (i = 0; i < nents; i++ ) {
341 struct scatterlist *s = &sg[i];
342 unsigned long addr = page_to_phys(s->page) + s->offset;
343 if (nonforced_iommu(dev, addr, s->length)) {
344 addr = pci_map_area(dev, addr, s->length, &flush, dir);
345 if (addr == bad_dma_address) {
347 pci_unmap_sg(dev, sg, i, dir);
352 s->dma_address = addr;
355 flush_gart(dev->bus->number);
359 /* Map multiple scatterlist entries continuous into the first. */
360 static int __pci_map_cont(struct scatterlist *sg, int start, int stopat,
361 struct scatterlist *sout,
362 unsigned long pages, int *flush)
364 unsigned long iommu_start = alloc_iommu(pages, flush);
365 if (iommu_start == -1)
368 unsigned long iommu_page = iommu_start;
371 for (i = start; i < stopat; i++) {
372 struct scatterlist *s = &sg[i];
373 unsigned long start_addr = s->dma_address;
374 BUG_ON(i > 0 && s->offset);
377 sout->dma_address = iommu_bus_base;
378 sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
380 sout->length += s->length;
382 unsigned long addr = start_addr;
383 while (addr < start_addr + s->length) {
384 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
385 SET_LEAK(iommu_page);
389 BUG_ON(i > 0 && addr % PAGE_SIZE);
391 BUG_ON(iommu_page - iommu_start != pages);
395 static inline int pci_map_cont(struct scatterlist *sg, int start, int stopat,
396 struct scatterlist *sout,
397 unsigned long pages, int *flush, int need)
400 BUG_ON(stopat - start != 1);
401 if (sout != sg + start)
405 return __pci_map_cont(sg, start, stopat, sout, pages, flush);
409 * DMA map all entries in a scatterlist.
410 * Merge chunks that have page aligned sizes into a continuous mapping.
412 int pci_map_sg(struct pci_dev *dev, struct scatterlist *sg,
419 unsigned long pages = 0;
422 BUG_ON(dir == PCI_DMA_NONE);
427 for (i = 0; i < nents; i++) {
428 struct scatterlist *s = &sg[i];
429 dma_addr_t addr = page_to_phys(s->page) + s->offset;
430 s->dma_address = addr;
431 BUG_ON(s->length == 0);
433 /* Handle the previous not yet processed entries */
435 struct scatterlist *ps = &sg[i-1];
436 /* Can only merge when the last chunk ends on a page
438 if (!need || (i > start+1 && ps->offset) ||
439 (ps->offset + ps->length) % PAGE_SIZE) {
440 if (pci_map_cont(sg, start, i, sg+out, pages,
449 need = need_iommu(dev, addr, s->length);
450 pages += to_pages(s->offset, s->length);
452 if (pci_map_cont(sg, start, i, sg+out, pages, &flush, need) < 0)
456 flush_gart(dev->bus->number);
464 pci_unmap_sg(dev, sg, nents, dir);
465 /* When it was forced try again unforced */
467 return pci_map_sg_nonforce(dev, sg, nents, dir);
468 iommu_full(dev, pages << PAGE_SHIFT, dir);
469 for (i = 0; i < nents; i++)
470 sg[i].dma_address = bad_dma_address;
475 * Free a PCI mapping.
477 void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
478 size_t size, int direction)
480 unsigned long iommu_page;
482 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
483 dma_addr > iommu_bus_base + iommu_size)
485 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
486 npages = to_pages(dma_addr, size);
488 for (i = 0; i < npages; i++) {
489 iommu_gatt_base[iommu_page + i] = 0;
490 CLEAR_LEAK(iommu_page + i);
492 free_iommu(iommu_page, npages);
496 * Wrapper for pci_unmap_single working with scatterlists.
498 void pci_unmap_sg(struct pci_dev *dev, struct scatterlist *sg, int nents,
502 for (i = 0; i < nents; i++) {
503 struct scatterlist *s = &sg[i];
506 pci_unmap_single(dev, s->dma_address, s->length, dir);
510 int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
512 /* Copied from i386. Doesn't make much sense, because it will
513 only work for pci_alloc_consistent.
514 The caller just has to use GFP_DMA in this case. */
515 if (mask < 0x00ffffff)
518 /* Tell the device to use SAC. This allows it to use cheaper accesses
520 Problem with this is that if we overflow the IOMMU area
521 and return DAC as fallback address the device may not handle it correctly.
522 As a compromise we only do this if the IOMMU area is >= 256MB,
523 which should make overflow unlikely enough. */
524 if (force_iommu && mask > 0xffffffff && iommu_size >= 256*1024*1024)
527 if (no_iommu && (~mask & (end_pfn << PAGE_SHIFT)))
533 EXPORT_SYMBOL(pci_unmap_sg);
534 EXPORT_SYMBOL(pci_map_sg);
535 EXPORT_SYMBOL(pci_map_single);
536 EXPORT_SYMBOL(pci_unmap_single);
537 EXPORT_SYMBOL(pci_dma_supported);
538 EXPORT_SYMBOL(no_iommu);
539 EXPORT_SYMBOL(force_iommu);
541 static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
545 iommu_size = aper_size;
550 a = aper + iommu_size;
551 iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
553 if (iommu_size < 64*1024*1024)
555 "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20);
560 static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
562 unsigned aper_size = 0, aper_base_32;
566 pci_read_config_dword(dev, 0x94, &aper_base_32);
567 pci_read_config_dword(dev, 0x90, &aper_order);
568 aper_order = (aper_order >> 1) & 7;
570 aper_base = aper_base_32 & 0x7fff;
573 aper_size = (32 * 1024 * 1024) << aper_order;
574 if (aper_base + aper_size >= 0xffffffff || !aper_size)
582 * Private Northbridge GATT initialization in case we cannot use the
583 * AGP driver for some reason.
585 static __init int init_k8_gatt(struct agp_kern_info *info)
589 unsigned aper_base, new_aper_base;
590 unsigned aper_size, gatt_size, new_aper_size;
592 aper_size = aper_base = info->aper_size = 0;
594 new_aper_base = read_aperture(dev, &new_aper_size);
599 aper_size = new_aper_size;
600 aper_base = new_aper_base;
602 if (aper_size != new_aper_size || aper_base != new_aper_base)
607 info->aper_base = aper_base;
608 info->aper_size = aper_size>>20;
610 gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
611 gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
613 panic("Cannot allocate GATT table");
614 memset(gatt, 0, gatt_size);
615 agp_gatt_table = gatt;
621 gatt_reg = __pa(gatt) >> 12;
623 pci_write_config_dword(dev, 0x98, gatt_reg);
624 pci_read_config_dword(dev, 0x90, &ctl);
627 ctl &= ~((1<<4) | (1<<5));
629 pci_write_config_dword(dev, 0x90, ctl);
633 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
637 printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
638 KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.");
642 extern int agp_amdk8_init(void);
644 static int __init pci_iommu_init(void)
646 struct agp_kern_info info;
647 unsigned long aper_size;
648 unsigned long iommu_start;
651 #ifndef CONFIG_AGP_AMD_8151
654 /* Makefile puts PCI initialization via subsys_initcall first. */
655 /* Add other K8 AGP bridge drivers here */
657 (agp_amdk8_init() < 0) ||
658 (agp_copy_info(&info) < 0);
661 if (no_iommu || (!force_iommu && end_pfn < 0xffffffff>>PAGE_SHIFT)) {
662 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
669 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
671 if (force_iommu || end_pfn >= 0xffffffff>>PAGE_SHIFT)
672 err = init_k8_gatt(&info);
674 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
680 aper_size = info.aper_size * 1024 * 1024;
681 iommu_size = check_iommu_size(info.aper_base, aper_size);
682 iommu_pages = iommu_size >> PAGE_SHIFT;
684 iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL,
685 get_order(iommu_pages/8));
686 if (!iommu_gart_bitmap)
687 panic("Cannot allocate iommu bitmap\n");
688 memset(iommu_gart_bitmap, 0, iommu_pages/8);
690 #ifdef CONFIG_IOMMU_LEAK
692 iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
693 get_order(iommu_pages*sizeof(void *)));
695 memset(iommu_leak_tab, 0, iommu_pages * 8);
697 printk("PCI-DMA: Cannot allocate leak trace area\n");
702 * Out of IOMMU space handling.
703 * Reserve some invalid pages at the beginning of the GART.
705 set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
707 agp_memory_reserved = iommu_size;
709 "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
712 iommu_start = aper_size - iommu_size;
713 iommu_bus_base = info.aper_base + iommu_start;
714 bad_dma_address = iommu_bus_base;
715 iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
718 * Unmap the IOMMU part of the GART. The alias of the page is
719 * always mapped with cache enabled and there is no full cache
720 * coherency across the GART remapping. The unmapping avoids
721 * automatic prefetches from the CPU allocating cache lines in
722 * there. All CPU accesses are done via the direct mapping to
723 * the backing memory. The GART address is only used by PCI
726 clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
730 int cpu = PCI_SLOT(dev->devfn) - 24;
733 northbridges[cpu] = dev;
735 pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
736 northbridge_flush_word[cpu] = flag;
744 /* Must execute after PCI subsystem */
745 fs_initcall(pci_iommu_init);
747 /* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]]
748 size set size of iommu (in bytes)
749 noagp don't initialize the AGP driver and use full aperture.
750 off don't use the IOMMU
751 leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on)
752 memaper[=order] allocate an own aperture over RAM with size 32MB^order.
753 noforce don't force IOMMU usage. Default.
756 __init int iommu_setup(char *opt)
762 if (!memcmp(p,"noagp", 5))
764 if (!memcmp(p,"off", 3))
766 if (!memcmp(p,"force", 5))
768 if (!memcmp(p,"noforce", 7))
770 if (!memcmp(p, "memaper", 7)) {
771 fallback_aper_force = 1;
773 if (*p == '=' && get_option(&p, &arg))
774 fallback_aper_order = arg;
776 #ifdef CONFIG_IOMMU_LEAK
777 if (!memcmp(p,"leak", 4)) {
781 if (isdigit(*p) && get_option(&p, &arg))
782 iommu_leak_pages = arg;
785 if (isdigit(*p) && get_option(&p, &arg))
788 if (*p == ' ' || *p == 0)
790 } while (*p++ != ',');