#include <linux/cache.h>
#include <linux/mm.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/pci.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/bootmem.h>
#include <linux/iommu-helper.h>
#include <linux/highmem.h>
+#include <linux/gfp.h>
+
#include <asm/io.h>
#include <asm/pci.h>
#include <asm/dma.h>
int swiotlb_force;
/*
- * Used to do a quick range check in unmap_single and
- * sync_single_*, to see if the memory was in fact allocated by this
+ * Used to do a quick range check in swiotlb_tbl_unmap_single and
+ * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
* API.
*/
static char *io_tlb_start, *io_tlb_end;
/*
- * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
+ * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
* io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
*/
static unsigned long io_tlb_nslabs;
*/
static unsigned long io_tlb_overflow = 32*1024;
-void *io_tlb_overflow_buffer;
+static void *io_tlb_overflow_buffer;
/*
* This is a free list describing the number of free entries available from
swiotlb_force = 1;
else if (!strcmp(str, "off"))
swiotlb_force = -1;
+
return 1;
}
__setup("swiotlb=", setup_io_tlb_npages);
/* make io_tlb_overflow tunable too? */
-void *__init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
-{
- void *start = alloc_bootmem_pages(size);
- unsigned int i;
- int rc;
-
- dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
- for (i = 0; i < nslabs; i += IO_TLB_SEGSIZE) {
- do {
- rc = xen_create_contiguous_region(
- (unsigned long)start + (i << IO_TLB_SHIFT),
- get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
- dma_bits);
- } while (rc && dma_bits++ < max_dma_bits);
- if (rc) {
- if (i == 0)
- panic("No suitable physical memory available for SWIOTLB buffer!\n"
- "Use dom0_mem Xen boot parameter to reserve\n"
- "some DMA memory (e.g., dom0_mem=-128M).\n");
- io_tlb_nslabs = i;
- i <<= IO_TLB_SHIFT;
- free_bootmem(__pa(start + i), size - i);
- size = i;
- for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) {
- unsigned int bits = fls64(virt_to_bus(start + i - 1));
-
- if (bits > dma_bits)
- dma_bits = bits;
- }
- break;
- }
- }
-
- return start;
-}
-
-#ifndef CONFIG_XEN
-void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
-{
- return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
-}
-#endif
-
-dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
-{
- return phys_to_machine(paddr);
-}
-
-phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
+unsigned long swiotlb_nr_tbl(void)
{
- return machine_to_phys(baddr);
+ return io_tlb_nslabs;
}
-
+EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
+/* Note that this doesn't work with highmem page */
static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
volatile void *address)
{
- return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
-}
-
-void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
-{
- return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
+ return phys_to_dma(hwdev, virt_to_phys(address));
}
-int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
- dma_addr_t addr, size_t size)
+void swiotlb_print_info(void)
{
- return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
-}
-
-int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
-{
- return 0;
-}
-
-static void swiotlb_print_info(unsigned long bytes)
-{
- phys_addr_t pstart, pend;
-
- pstart = virt_to_phys(io_tlb_start);
- pend = virt_to_phys(io_tlb_end);
+ unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
printk(KERN_INFO "Software IO TLB enabled: \n"
" Aperture: %lu megabytes\n"
io_tlb_start, io_tlb_end);
}
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the PCI DMA API.
- */
-void __init
-swiotlb_init_with_default_size(size_t default_size)
+void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
{
unsigned long i, bytes;
int rc;
- if (!io_tlb_nslabs) {
- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
- }
+ bytes = nslabs << IO_TLB_SHIFT;
- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ io_tlb_nslabs = nslabs;
+ io_tlb_start = tlb;
+ dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
+ for (nslabs = 0; nslabs < io_tlb_nslabs; nslabs += IO_TLB_SEGSIZE) {
+ do {
+ rc = xen_create_contiguous_region(
+ (unsigned long)io_tlb_start + (nslabs << IO_TLB_SHIFT),
+ get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
+ dma_bits);
+ } while (rc && dma_bits++ < max_dma_bits);
+ if (rc) {
+ if (nslabs == 0)
+ panic("No suitable physical memory available for SWIOTLB buffer!\n"
+ "Use dom0_mem Xen boot parameter to reserve\n"
+ "some DMA memory (e.g., dom0_mem=-128M).\n");
+ io_tlb_nslabs = nslabs;
+ i = nslabs << IO_TLB_SHIFT;
+ free_bootmem(__pa(io_tlb_start + i), bytes - i);
+ bytes = i;
+ for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) {
+ unsigned int bits = fls64(virt_to_bus(io_tlb_start + i - 1));
- /*
- * Get IO TLB memory from the low pages
- */
- io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs);
- if (!io_tlb_start)
- panic("Cannot allocate SWIOTLB buffer!\n");
- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ if (bits > dma_bits)
+ dma_bits = bits;
+ }
+ break;
+ }
+ }
io_tlb_end = io_tlb_start + bytes;
/*
* Allocate and initialize the free list array. This array is used
* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE.
*/
- io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
+ io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
for (i = 0; i < io_tlb_nslabs; i++)
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
io_tlb_index = 0;
- io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t));
+ io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
/*
* Get the overflow emergency buffer
*/
- io_tlb_overflow_buffer = alloc_bootmem(io_tlb_overflow);
+ io_tlb_overflow_buffer = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_overflow));
if (!io_tlb_overflow_buffer)
panic("Cannot allocate SWIOTLB overflow buffer!\n");
} while (rc && dma_bits++ < max_dma_bits);
if (rc)
panic("No suitable physical memory available for SWIOTLB overflow buffer!\n");
+ if (verbose)
+ swiotlb_print_info();
+}
- swiotlb_print_info(bytes);
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the DMA API.
+ */
+void __init
+swiotlb_init_with_default_size(size_t default_size, int verbose)
+{
+ unsigned long bytes;
+
+ if (!io_tlb_nslabs) {
+ io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ }
+
+ bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
+ /*
+ * Get IO TLB memory from the low pages
+ */
+ io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes));
+ if (!io_tlb_start)
+ panic("Cannot allocate SWIOTLB buffer");
+
+ swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose);
}
void __init
-swiotlb_init(void)
+swiotlb_init(int verbose)
{
unsigned long ram_end;
size_t defsz = 64 << 20; /* 64MB default size */
}
if (swiotlb)
- swiotlb_init_with_default_size(defsz);
+ swiotlb_init_with_default_size(defsz, verbose);
else
printk(KERN_INFO "Software IO TLB disabled\n");
}
return range_straddles_page_boundary(pa, size);
}
-static int is_swiotlb_buffer(char *addr)
+static int is_swiotlb_buffer(dma_addr_t addr)
{
- return addr >= io_tlb_start && addr < io_tlb_end;
+ unsigned long pfn = mfn_to_local_pfn(PFN_DOWN(addr));
+ phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT;
+
+ return paddr >= virt_to_phys(io_tlb_start) &&
+ paddr < virt_to_phys(io_tlb_end);
}
/*
* drivers map the buffer for DMA_BIDIRECTIONAL access. This causes an
* unnecessary copy from the aperture to the host buffer, and a page fault.
*/
-static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
- enum dma_data_direction dir)
+void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
+ enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(phys);
sz = min_t(size_t, PAGE_SIZE - offset, size);
local_irq_save(flags);
- buffer = kmap_atomic(pfn_to_page(pfn),
- KM_BOUNCE_READ);
+ buffer = kmap_atomic(pfn_to_page(pfn));
if (dir == DMA_TO_DEVICE)
memcpy(dma_addr, buffer + offset, sz);
else if (__copy_to_user_inatomic(buffer + offset,
dma_addr, sz))
/* inaccessible */;
- kunmap_atomic(buffer, KM_BOUNCE_READ);
+ kunmap_atomic(buffer);
local_irq_restore(flags);
size -= sz;
} else {
if (dir == DMA_TO_DEVICE)
memcpy(dma_addr, phys_to_virt(phys), size);
- else
- memcpy(phys_to_virt(phys), dma_addr, size);
+ else if (__copy_to_user_inatomic(phys_to_virt(phys),
+ dma_addr, size))
+ /* inaccessible */;
}
}
+EXPORT_SYMBOL_GPL(swiotlb_bounce);
-/*
- * Allocates bounce buffer and returns its kernel virtual address.
- */
-static void *
-map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
+void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
+ phys_addr_t phys, size_t size,
+ enum dma_data_direction dir)
{
unsigned long flags;
char *dma_addr;
return dma_addr;
}
+EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
+
+/*
+ * Allocates bounce buffer and returns its kernel virtual address.
+ */
+
+static void *
+map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+ enum dma_data_direction dir)
+{
+ dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
+
+ return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir);
+}
/*
* dma_addr is the kernel virtual address of the bounce buffer to unmap.
*/
-static void
-do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+void
+swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
+ enum dma_data_direction dir)
{
unsigned long flags;
int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
/*
* Return the buffer to the free list by setting the corresponding
- * entries to indicate the number of contigous entries available.
+ * entries to indicate the number of contiguous entries available.
* While returning the entries to the free list, we merge the entries
* with slots below and above the pool being returned.
*/
}
spin_unlock_irqrestore(&io_tlb_lock, flags);
}
+EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
-static void
-sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+void
+swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
+ enum dma_data_direction dir,
+ enum dma_sync_target target)
{
int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
phys_addr_t phys = io_tlb_orig_addr[index];
phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
- BUG_ON((dir != DMA_FROM_DEVICE) && (dir != DMA_TO_DEVICE));
- swiotlb_bounce(phys, dma_addr, size, dir);
+ switch (target) {
+ case SYNC_FOR_CPU:
+ if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
+ swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
+ else
+ BUG_ON(dir != DMA_TO_DEVICE);
+ break;
+ case SYNC_FOR_DEVICE:
+ if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+ swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
+ else
+ BUG_ON(dir != DMA_FROM_DEVICE);
+ break;
+ default:
+ BUG();
+ }
}
+EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
static void
-swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
+swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
+ int do_panic)
{
/*
* Ran out of IOMMU space for this operation. This is very bad.
printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %zu bytes at "
"device %s\n", size, dev ? dev_name(dev) : "?");
- if (size > io_tlb_overflow && do_panic) {
- if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
- panic("PCI-DMA: Memory would be corrupted\n");
- if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
- panic("PCI-DMA: Random memory would be DMAed\n");
- }
+ if (size <= io_tlb_overflow || !do_panic)
+ return;
+
+ if (dir == DMA_BIDIRECTIONAL)
+ panic("DMA: Random memory could be DMA accessed\n");
+ if (dir == DMA_FROM_DEVICE)
+ panic("DMA: Random memory could be DMA written\n");
+ if (dir == DMA_TO_DEVICE)
+ panic("DMA: Random memory could be DMA read\n");
}
/*
* we can safely return the device addr and not worry about bounce
* buffering it.
*/
- if (!address_needs_mapping(dev, dev_addr, size) &&
+ if (dma_capable(dev, dev_addr, size) &&
!range_needs_mapping(phys, size))
return dev_addr;
}
dev_addr = swiotlb_virt_to_bus(dev, map);
+
+ /*
+ * Ensure that the address returned is DMA'ble
+ */
+ if (!dma_capable(dev, dev_addr, size)) {
+ swiotlb_tbl_unmap_single(dev, map, size, dir);
+ dev_addr = swiotlb_virt_to_bus(dev, io_tlb_overflow_buffer);
+ }
+
return dev_addr;
}
EXPORT_SYMBOL_GPL(swiotlb_map_page);
* whatever the device wrote there.
*/
static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir)
+ size_t size, enum dma_data_direction dir)
{
- char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+ phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
- if (is_swiotlb_buffer(dma_addr)) {
- do_unmap_single(hwdev, dma_addr, size, dir);
+ if (is_swiotlb_buffer(dev_addr)) {
+ swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
return;
}
* address back to the card, you must first perform a
* swiotlb_dma_sync_for_device, and then the device again owns the buffer
*/
-void
-swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir)
+static void
+swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir,
+ enum dma_sync_target target)
{
- char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+ phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
- if (is_swiotlb_buffer(dma_addr))
- sync_single(hwdev, dma_addr, size, dir);
+ if (is_swiotlb_buffer(dev_addr))
+ swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
+ target);
}
-EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
void
-swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir)
-{
- char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
-
- BUG_ON(dir == DMA_NONE);
-
- if (is_swiotlb_buffer(dma_addr))
- sync_single(hwdev, dma_addr, size, dir);
-}
-EXPORT_SYMBOL(swiotlb_sync_single_for_device);
-
-void
-swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
- unsigned long offset, size_t size,
- enum dma_data_direction dir)
+swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir)
{
- swiotlb_sync_single_for_cpu(hwdev, dev_addr + offset, size, dir);
+ swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
}
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
+EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
void
-swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
- unsigned long offset, size_t size,
- enum dma_data_direction dir)
+swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir)
{
- swiotlb_sync_single_for_device(hwdev, dev_addr + offset, size, dir);
+ swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
}
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
+EXPORT_SYMBOL(swiotlb_sync_single_for_device);
/*
* Map a set of buffers described by scatterlist in streaming mode for DMA.
phys_addr_t paddr = page_to_pseudophys(sg_page(sg))
+ sg->offset;
- if (range_needs_mapping(paddr, sg->length)
- || address_needs_mapping(hwdev, dev_addr, sg->length)) {
+ if (range_needs_mapping(paddr, sg->length) ||
+ !dma_capable(hwdev, dev_addr, sg->length)) {
void *map;
gnttab_dma_unmap_page(dev_addr);
int
swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
- int dir)
+ enum dma_data_direction dir)
{
return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
}
void
swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
- int dir)
+ enum dma_data_direction dir)
{
return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
}
* The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
* and usage.
*/
-void
-swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sgl,
- int nelems, enum dma_data_direction dir)
+static void
+swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
+ int nelems, enum dma_data_direction dir,
+ enum dma_sync_target target)
{
struct scatterlist *sg;
int i;
for_each_sg(sgl, sg, nelems, i)
- swiotlb_sync_single_for_cpu(hwdev, sg->dma_address,
- sg->dma_length, dir);
+ swiotlb_sync_single(hwdev, sg->dma_address,
+ sg->dma_length, dir, target);
+}
+
+void
+swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+ int nelems, enum dma_data_direction dir)
+{
+ swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
}
EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
void
-swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sgl,
+swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir)
{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sgl, sg, nelems, i)
- swiotlb_sync_single_for_device(hwdev, sg->dma_address,
- sg->dma_length, dir);
+ swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
}
EXPORT_SYMBOL(swiotlb_sync_sg_for_device);