ioat: preserve chanctrl bits when re-arming interrupts
[linux-flexiantxendom0.git] / drivers / dma / ioat / dma.c
1 /*
2  * Intel I/OAT DMA Linux driver
3  * Copyright(c) 2004 - 2009 Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  */
22
23 /*
24  * This driver supports an Intel I/OAT DMA engine, which does asynchronous
25  * copy operations.
26  */
27
28 #include <linux/init.h>
29 #include <linux/module.h>
30 #include <linux/pci.h>
31 #include <linux/interrupt.h>
32 #include <linux/dmaengine.h>
33 #include <linux/delay.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/workqueue.h>
36 #include <linux/i7300_idle.h>
37 #include "dma.h"
38 #include "registers.h"
39 #include "hw.h"
40
41 int ioat_pending_level = 4;
42 module_param(ioat_pending_level, int, 0644);
43 MODULE_PARM_DESC(ioat_pending_level,
44                  "high-water mark for pushing ioat descriptors (default: 4)");
45
46 /* internal functions */
47 static void ioat1_cleanup(struct ioat_dma_chan *ioat);
48 static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat);
49
50 /**
51  * ioat_dma_do_interrupt - handler used for single vector interrupt mode
52  * @irq: interrupt id
53  * @data: interrupt data
54  */
55 static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
56 {
57         struct ioatdma_device *instance = data;
58         struct ioat_chan_common *chan;
59         unsigned long attnstatus;
60         int bit;
61         u8 intrctrl;
62
63         intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
64
65         if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
66                 return IRQ_NONE;
67
68         if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
69                 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
70                 return IRQ_NONE;
71         }
72
73         attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
74         for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
75                 chan = ioat_chan_by_index(instance, bit);
76                 tasklet_schedule(&chan->cleanup_task);
77         }
78
79         writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
80         return IRQ_HANDLED;
81 }
82
83 /**
84  * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
85  * @irq: interrupt id
86  * @data: interrupt data
87  */
88 static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
89 {
90         struct ioat_chan_common *chan = data;
91
92         tasklet_schedule(&chan->cleanup_task);
93
94         return IRQ_HANDLED;
95 }
96
97 static void ioat1_cleanup_tasklet(unsigned long data);
98
99 /* common channel initialization */
100 void ioat_init_channel(struct ioatdma_device *device,
101                        struct ioat_chan_common *chan, int idx,
102                        work_func_t work_fn, void (*tasklet)(unsigned long),
103                        unsigned long tasklet_data)
104 {
105         struct dma_device *dma = &device->common;
106
107         chan->device = device;
108         chan->reg_base = device->reg_base + (0x80 * (idx + 1));
109         INIT_DELAYED_WORK(&chan->work, work_fn);
110         spin_lock_init(&chan->cleanup_lock);
111         chan->common.device = dma;
112         list_add_tail(&chan->common.device_node, &dma->channels);
113         device->idx[idx] = chan;
114         tasklet_init(&chan->cleanup_task, tasklet, tasklet_data);
115         tasklet_disable(&chan->cleanup_task);
116 }
117
118 static void ioat1_reset_part2(struct work_struct *work);
119
120 /**
121  * ioat1_dma_enumerate_channels - find and initialize the device's channels
122  * @device: the device to be enumerated
123  */
124 static int ioat1_enumerate_channels(struct ioatdma_device *device)
125 {
126         u8 xfercap_scale;
127         u32 xfercap;
128         int i;
129         struct ioat_dma_chan *ioat;
130         struct device *dev = &device->pdev->dev;
131         struct dma_device *dma = &device->common;
132
133         INIT_LIST_HEAD(&dma->channels);
134         dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
135         dma->chancnt &= 0x1f; /* bits [4:0] valid */
136         if (dma->chancnt > ARRAY_SIZE(device->idx)) {
137                 dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
138                          dma->chancnt, ARRAY_SIZE(device->idx));
139                 dma->chancnt = ARRAY_SIZE(device->idx);
140         }
141         xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
142         xfercap_scale &= 0x1f; /* bits [4:0] valid */
143         xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
144         dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap);
145
146 #ifdef  CONFIG_I7300_IDLE_IOAT_CHANNEL
147         if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
148                 dma->chancnt--;
149 #endif
150         for (i = 0; i < dma->chancnt; i++) {
151                 ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
152                 if (!ioat)
153                         break;
154
155                 ioat_init_channel(device, &ioat->base, i,
156                                   ioat1_reset_part2,
157                                   ioat1_cleanup_tasklet,
158                                   (unsigned long) ioat);
159                 ioat->xfercap = xfercap;
160                 spin_lock_init(&ioat->desc_lock);
161                 INIT_LIST_HEAD(&ioat->free_desc);
162                 INIT_LIST_HEAD(&ioat->used_desc);
163         }
164         dma->chancnt = i;
165         return i;
166 }
167
168 /**
169  * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
170  *                                 descriptors to hw
171  * @chan: DMA channel handle
172  */
173 static inline void
174 __ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat)
175 {
176         void __iomem *reg_base = ioat->base.reg_base;
177
178         dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n",
179                 __func__, ioat->pending);
180         ioat->pending = 0;
181         writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET);
182 }
183
184 static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
185 {
186         struct ioat_dma_chan *ioat = to_ioat_chan(chan);
187
188         if (ioat->pending > 0) {
189                 spin_lock_bh(&ioat->desc_lock);
190                 __ioat1_dma_memcpy_issue_pending(ioat);
191                 spin_unlock_bh(&ioat->desc_lock);
192         }
193 }
194
195 /**
196  * ioat1_reset_part2 - reinit the channel after a reset
197  */
198 static void ioat1_reset_part2(struct work_struct *work)
199 {
200         struct ioat_chan_common *chan;
201         struct ioat_dma_chan *ioat;
202         struct ioat_desc_sw *desc;
203         int dmacount;
204         bool start_null = false;
205
206         chan = container_of(work, struct ioat_chan_common, work.work);
207         ioat = container_of(chan, struct ioat_dma_chan, base);
208         spin_lock_bh(&chan->cleanup_lock);
209         spin_lock_bh(&ioat->desc_lock);
210
211         *chan->completion = 0;
212         ioat->pending = 0;
213
214         /* count the descriptors waiting */
215         dmacount = 0;
216         if (ioat->used_desc.prev) {
217                 desc = to_ioat_desc(ioat->used_desc.prev);
218                 do {
219                         dmacount++;
220                         desc = to_ioat_desc(desc->node.next);
221                 } while (&desc->node != ioat->used_desc.next);
222         }
223
224         if (dmacount) {
225                 /*
226                  * write the new starting descriptor address
227                  * this puts channel engine into ARMED state
228                  */
229                 desc = to_ioat_desc(ioat->used_desc.prev);
230                 writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF,
231                        chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
232                 writel(((u64) desc->txd.phys) >> 32,
233                        chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
234
235                 writeb(IOAT_CHANCMD_START, chan->reg_base
236                         + IOAT_CHANCMD_OFFSET(chan->device->version));
237         } else
238                 start_null = true;
239         spin_unlock_bh(&ioat->desc_lock);
240         spin_unlock_bh(&chan->cleanup_lock);
241
242         dev_err(to_dev(chan),
243                 "chan%d reset - %d descs waiting, %d total desc\n",
244                 chan_num(chan), dmacount, ioat->desccount);
245
246         if (start_null)
247                 ioat1_dma_start_null_desc(ioat);
248 }
249
250 /**
251  * ioat1_reset_channel - restart a channel
252  * @ioat: IOAT DMA channel handle
253  */
254 static void ioat1_reset_channel(struct ioat_dma_chan *ioat)
255 {
256         struct ioat_chan_common *chan = &ioat->base;
257         void __iomem *reg_base = chan->reg_base;
258         u32 chansts, chanerr;
259
260         if (!ioat->used_desc.prev)
261                 return;
262
263         dev_dbg(to_dev(chan), "%s\n", __func__);
264         chanerr = readl(reg_base + IOAT_CHANERR_OFFSET);
265         chansts = *chan->completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS;
266         if (chanerr) {
267                 dev_err(to_dev(chan),
268                         "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
269                         chan_num(chan), chansts, chanerr);
270                 writel(chanerr, reg_base + IOAT_CHANERR_OFFSET);
271         }
272
273         /*
274          * whack it upside the head with a reset
275          * and wait for things to settle out.
276          * force the pending count to a really big negative
277          * to make sure no one forces an issue_pending
278          * while we're waiting.
279          */
280
281         spin_lock_bh(&ioat->desc_lock);
282         ioat->pending = INT_MIN;
283         writeb(IOAT_CHANCMD_RESET,
284                reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
285         spin_unlock_bh(&ioat->desc_lock);
286
287         /* schedule the 2nd half instead of sleeping a long time */
288         schedule_delayed_work(&chan->work, RESET_DELAY);
289 }
290
291 /**
292  * ioat1_chan_watchdog - watch for stuck channels
293  */
294 static void ioat1_chan_watchdog(struct work_struct *work)
295 {
296         struct ioatdma_device *device =
297                 container_of(work, struct ioatdma_device, work.work);
298         struct ioat_dma_chan *ioat;
299         struct ioat_chan_common *chan;
300         int i;
301         u64 completion;
302         u32 completion_low;
303         unsigned long compl_desc_addr_hw;
304
305         for (i = 0; i < device->common.chancnt; i++) {
306                 chan = ioat_chan_by_index(device, i);
307                 ioat = container_of(chan, struct ioat_dma_chan, base);
308
309                 if (/* have we started processing anything yet */
310                     chan->last_completion
311                     /* have we completed any since last watchdog cycle? */
312                     && (chan->last_completion == chan->watchdog_completion)
313                     /* has TCP stuck on one cookie since last watchdog? */
314                     && (chan->watchdog_tcp_cookie == chan->watchdog_last_tcp_cookie)
315                     && (chan->watchdog_tcp_cookie != chan->completed_cookie)
316                     /* is there something in the chain to be processed? */
317                     /* CB1 chain always has at least the last one processed */
318                     && (ioat->used_desc.prev != ioat->used_desc.next)
319                     && ioat->pending == 0) {
320
321                         /*
322                          * check CHANSTS register for completed
323                          * descriptor address.
324                          * if it is different than completion writeback,
325                          * it is not zero
326                          * and it has changed since the last watchdog
327                          *     we can assume that channel
328                          *     is still working correctly
329                          *     and the problem is in completion writeback.
330                          *     update completion writeback
331                          *     with actual CHANSTS value
332                          * else
333                          *     try resetting the channel
334                          */
335
336                         /* we need to read the low address first as this
337                          * causes the chipset to latch the upper bits
338                          * for the subsequent read
339                          */
340                         completion_low = readl(chan->reg_base +
341                                 IOAT_CHANSTS_OFFSET_LOW(chan->device->version));
342                         completion = readl(chan->reg_base +
343                                 IOAT_CHANSTS_OFFSET_HIGH(chan->device->version));
344                         completion <<= 32;
345                         completion |= completion_low;
346                         compl_desc_addr_hw = completion &
347                                         IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
348
349                         if ((compl_desc_addr_hw != 0)
350                            && (compl_desc_addr_hw != chan->watchdog_completion)
351                            && (compl_desc_addr_hw != chan->last_compl_desc_addr_hw)) {
352                                 chan->last_compl_desc_addr_hw = compl_desc_addr_hw;
353                                 *chan->completion = completion;
354                         } else {
355                                 ioat1_reset_channel(ioat);
356                                 chan->watchdog_completion = 0;
357                                 chan->last_compl_desc_addr_hw = 0;
358                         }
359                 } else {
360                         chan->last_compl_desc_addr_hw = 0;
361                         chan->watchdog_completion = chan->last_completion;
362                 }
363
364                 chan->watchdog_last_tcp_cookie = chan->watchdog_tcp_cookie;
365         }
366
367         schedule_delayed_work(&device->work, WATCHDOG_DELAY);
368 }
369
370 static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
371 {
372         struct dma_chan *c = tx->chan;
373         struct ioat_dma_chan *ioat = to_ioat_chan(c);
374         struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
375         struct ioat_desc_sw *first;
376         struct ioat_desc_sw *chain_tail;
377         dma_cookie_t cookie;
378
379         spin_lock_bh(&ioat->desc_lock);
380         /* cookie incr and addition to used_list must be atomic */
381         cookie = c->cookie;
382         cookie++;
383         if (cookie < 0)
384                 cookie = 1;
385         c->cookie = cookie;
386         tx->cookie = cookie;
387         dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
388
389         /* write address into NextDescriptor field of last desc in chain */
390         first = to_ioat_desc(tx->tx_list.next);
391         chain_tail = to_ioat_desc(ioat->used_desc.prev);
392         /* make descriptor updates globally visible before chaining */
393         wmb();
394         chain_tail->hw->next = first->txd.phys;
395         list_splice_tail_init(&tx->tx_list, &ioat->used_desc);
396         dump_desc_dbg(ioat, chain_tail);
397         dump_desc_dbg(ioat, first);
398
399         ioat->pending += desc->tx_cnt;
400         if (ioat->pending >= ioat_pending_level)
401                 __ioat1_dma_memcpy_issue_pending(ioat);
402         spin_unlock_bh(&ioat->desc_lock);
403
404         return cookie;
405 }
406
407 /**
408  * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
409  * @ioat: the channel supplying the memory pool for the descriptors
410  * @flags: allocation flags
411  */
412 static struct ioat_desc_sw *
413 ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags)
414 {
415         struct ioat_dma_descriptor *desc;
416         struct ioat_desc_sw *desc_sw;
417         struct ioatdma_device *ioatdma_device;
418         dma_addr_t phys;
419
420         ioatdma_device = ioat->base.device;
421         desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
422         if (unlikely(!desc))
423                 return NULL;
424
425         desc_sw = kzalloc(sizeof(*desc_sw), flags);
426         if (unlikely(!desc_sw)) {
427                 pci_pool_free(ioatdma_device->dma_pool, desc, phys);
428                 return NULL;
429         }
430
431         memset(desc, 0, sizeof(*desc));
432
433         dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
434         desc_sw->txd.tx_submit = ioat1_tx_submit;
435         desc_sw->hw = desc;
436         desc_sw->txd.phys = phys;
437         set_desc_id(desc_sw, -1);
438
439         return desc_sw;
440 }
441
442 static int ioat_initial_desc_count = 256;
443 module_param(ioat_initial_desc_count, int, 0644);
444 MODULE_PARM_DESC(ioat_initial_desc_count,
445                  "ioat1: initial descriptors per channel (default: 256)");
446 /**
447  * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors
448  * @chan: the channel to be filled out
449  */
450 static int ioat1_dma_alloc_chan_resources(struct dma_chan *c)
451 {
452         struct ioat_dma_chan *ioat = to_ioat_chan(c);
453         struct ioat_chan_common *chan = &ioat->base;
454         struct ioat_desc_sw *desc;
455         u32 chanerr;
456         int i;
457         LIST_HEAD(tmp_list);
458
459         /* have we already been set up? */
460         if (!list_empty(&ioat->free_desc))
461                 return ioat->desccount;
462
463         /* Setup register to interrupt and write completion status on error */
464         writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
465
466         chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
467         if (chanerr) {
468                 dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
469                 writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
470         }
471
472         /* Allocate descriptors */
473         for (i = 0; i < ioat_initial_desc_count; i++) {
474                 desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL);
475                 if (!desc) {
476                         dev_err(to_dev(chan), "Only %d initial descriptors\n", i);
477                         break;
478                 }
479                 set_desc_id(desc, i);
480                 list_add_tail(&desc->node, &tmp_list);
481         }
482         spin_lock_bh(&ioat->desc_lock);
483         ioat->desccount = i;
484         list_splice(&tmp_list, &ioat->free_desc);
485         spin_unlock_bh(&ioat->desc_lock);
486
487         /* allocate a completion writeback area */
488         /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
489         chan->completion = pci_pool_alloc(chan->device->completion_pool,
490                                           GFP_KERNEL, &chan->completion_dma);
491         memset(chan->completion, 0, sizeof(*chan->completion));
492         writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
493                chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
494         writel(((u64) chan->completion_dma) >> 32,
495                chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
496
497         tasklet_enable(&chan->cleanup_task);
498         ioat1_dma_start_null_desc(ioat);  /* give chain to dma device */
499         dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
500                 __func__, ioat->desccount);
501         return ioat->desccount;
502 }
503
504 /**
505  * ioat1_dma_free_chan_resources - release all the descriptors
506  * @chan: the channel to be cleaned
507  */
508 static void ioat1_dma_free_chan_resources(struct dma_chan *c)
509 {
510         struct ioat_dma_chan *ioat = to_ioat_chan(c);
511         struct ioat_chan_common *chan = &ioat->base;
512         struct ioatdma_device *ioatdma_device = chan->device;
513         struct ioat_desc_sw *desc, *_desc;
514         int in_use_descs = 0;
515
516         /* Before freeing channel resources first check
517          * if they have been previously allocated for this channel.
518          */
519         if (ioat->desccount == 0)
520                 return;
521
522         tasklet_disable(&chan->cleanup_task);
523         ioat1_cleanup(ioat);
524
525         /* Delay 100ms after reset to allow internal DMA logic to quiesce
526          * before removing DMA descriptor resources.
527          */
528         writeb(IOAT_CHANCMD_RESET,
529                chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
530         mdelay(100);
531
532         spin_lock_bh(&ioat->desc_lock);
533         list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
534                 dev_dbg(to_dev(chan), "%s: freeing %d from used list\n",
535                         __func__, desc_id(desc));
536                 dump_desc_dbg(ioat, desc);
537                 in_use_descs++;
538                 list_del(&desc->node);
539                 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
540                               desc->txd.phys);
541                 kfree(desc);
542         }
543         list_for_each_entry_safe(desc, _desc,
544                                  &ioat->free_desc, node) {
545                 list_del(&desc->node);
546                 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
547                               desc->txd.phys);
548                 kfree(desc);
549         }
550         spin_unlock_bh(&ioat->desc_lock);
551
552         pci_pool_free(ioatdma_device->completion_pool,
553                       chan->completion,
554                       chan->completion_dma);
555
556         /* one is ok since we left it on there on purpose */
557         if (in_use_descs > 1)
558                 dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
559                         in_use_descs - 1);
560
561         chan->last_completion = 0;
562         chan->completion_dma = 0;
563         chan->watchdog_completion = 0;
564         chan->last_compl_desc_addr_hw = 0;
565         chan->watchdog_tcp_cookie = chan->watchdog_last_tcp_cookie = 0;
566         ioat->pending = 0;
567         ioat->desccount = 0;
568 }
569
570 /**
571  * ioat1_dma_get_next_descriptor - return the next available descriptor
572  * @ioat: IOAT DMA channel handle
573  *
574  * Gets the next descriptor from the chain, and must be called with the
575  * channel's desc_lock held.  Allocates more descriptors if the channel
576  * has run out.
577  */
578 static struct ioat_desc_sw *
579 ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat)
580 {
581         struct ioat_desc_sw *new;
582
583         if (!list_empty(&ioat->free_desc)) {
584                 new = to_ioat_desc(ioat->free_desc.next);
585                 list_del(&new->node);
586         } else {
587                 /* try to get another desc */
588                 new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC);
589                 if (!new) {
590                         dev_err(to_dev(&ioat->base), "alloc failed\n");
591                         return NULL;
592                 }
593         }
594         dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n",
595                 __func__, desc_id(new));
596         prefetch(new->hw);
597         return new;
598 }
599
600 static struct dma_async_tx_descriptor *
601 ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
602                       dma_addr_t dma_src, size_t len, unsigned long flags)
603 {
604         struct ioat_dma_chan *ioat = to_ioat_chan(c);
605         struct ioat_desc_sw *desc;
606         size_t copy;
607         LIST_HEAD(chain);
608         dma_addr_t src = dma_src;
609         dma_addr_t dest = dma_dest;
610         size_t total_len = len;
611         struct ioat_dma_descriptor *hw = NULL;
612         int tx_cnt = 0;
613
614         spin_lock_bh(&ioat->desc_lock);
615         desc = ioat1_dma_get_next_descriptor(ioat);
616         do {
617                 if (!desc)
618                         break;
619
620                 tx_cnt++;
621                 copy = min_t(size_t, len, ioat->xfercap);
622
623                 hw = desc->hw;
624                 hw->size = copy;
625                 hw->ctl = 0;
626                 hw->src_addr = src;
627                 hw->dst_addr = dest;
628
629                 list_add_tail(&desc->node, &chain);
630
631                 len -= copy;
632                 dest += copy;
633                 src += copy;
634                 if (len) {
635                         struct ioat_desc_sw *next;
636
637                         async_tx_ack(&desc->txd);
638                         next = ioat1_dma_get_next_descriptor(ioat);
639                         hw->next = next ? next->txd.phys : 0;
640                         dump_desc_dbg(ioat, desc);
641                         desc = next;
642                 } else
643                         hw->next = 0;
644         } while (len);
645
646         if (!desc) {
647                 struct ioat_chan_common *chan = &ioat->base;
648
649                 dev_err(to_dev(chan),
650                         "chan%d - get_next_desc failed\n", chan_num(chan));
651                 list_splice(&chain, &ioat->free_desc);
652                 spin_unlock_bh(&ioat->desc_lock);
653                 return NULL;
654         }
655         spin_unlock_bh(&ioat->desc_lock);
656
657         desc->txd.flags = flags;
658         desc->tx_cnt = tx_cnt;
659         desc->len = total_len;
660         list_splice(&chain, &desc->txd.tx_list);
661         hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
662         hw->ctl_f.compl_write = 1;
663         dump_desc_dbg(ioat, desc);
664
665         return &desc->txd;
666 }
667
668 static void ioat1_cleanup_tasklet(unsigned long data)
669 {
670         struct ioat_dma_chan *chan = (void *)data;
671
672         ioat1_cleanup(chan);
673         writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
674 }
675
676 static void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
677                        int direction, enum dma_ctrl_flags flags, bool dst)
678 {
679         if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
680             (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
681                 pci_unmap_single(pdev, addr, len, direction);
682         else
683                 pci_unmap_page(pdev, addr, len, direction);
684 }
685
686
687 void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
688                     size_t len, struct ioat_dma_descriptor *hw)
689 {
690         struct pci_dev *pdev = chan->device->pdev;
691         size_t offset = len - hw->size;
692
693         if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
694                 ioat_unmap(pdev, hw->dst_addr - offset, len,
695                            PCI_DMA_FROMDEVICE, flags, 1);
696
697         if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
698                 ioat_unmap(pdev, hw->src_addr - offset, len,
699                            PCI_DMA_TODEVICE, flags, 0);
700 }
701
702 unsigned long ioat_get_current_completion(struct ioat_chan_common *chan)
703 {
704         unsigned long phys_complete;
705         u64 completion;
706
707         completion = *chan->completion;
708         phys_complete = completion & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
709
710         dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
711                 (unsigned long long) phys_complete);
712
713         if ((completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
714                                 IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
715                 dev_err(to_dev(chan), "Channel halted, chanerr = %x\n",
716                         readl(chan->reg_base + IOAT_CHANERR_OFFSET));
717
718                 /* TODO do something to salvage the situation */
719         }
720
721         return phys_complete;
722 }
723
724 /**
725  * ioat1_cleanup - cleanup up finished descriptors
726  * @chan: ioat channel to be cleaned up
727  */
728 static void ioat1_cleanup(struct ioat_dma_chan *ioat)
729 {
730         struct ioat_chan_common *chan = &ioat->base;
731         unsigned long phys_complete;
732         struct ioat_desc_sw *desc, *_desc;
733         dma_cookie_t cookie = 0;
734         struct dma_async_tx_descriptor *tx;
735
736         prefetch(chan->completion);
737
738         if (!spin_trylock_bh(&chan->cleanup_lock))
739                 return;
740
741         phys_complete = ioat_get_current_completion(chan);
742         if (phys_complete == chan->last_completion) {
743                 spin_unlock_bh(&chan->cleanup_lock);
744                 /*
745                  * perhaps we're stuck so hard that the watchdog can't go off?
746                  * try to catch it after 2 seconds
747                  */
748                 if (time_after(jiffies,
749                                chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
750                         ioat1_chan_watchdog(&(chan->device->work.work));
751                         chan->last_completion_time = jiffies;
752                 }
753                 return;
754         }
755         chan->last_completion_time = jiffies;
756
757         cookie = 0;
758         if (!spin_trylock_bh(&ioat->desc_lock)) {
759                 spin_unlock_bh(&chan->cleanup_lock);
760                 return;
761         }
762
763         dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n",
764                  __func__, phys_complete);
765         list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
766                 tx = &desc->txd;
767                 /*
768                  * Incoming DMA requests may use multiple descriptors,
769                  * due to exceeding xfercap, perhaps. If so, only the
770                  * last one will have a cookie, and require unmapping.
771                  */
772                 dump_desc_dbg(ioat, desc);
773                 if (tx->cookie) {
774                         cookie = tx->cookie;
775                         ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
776                         if (tx->callback) {
777                                 tx->callback(tx->callback_param);
778                                 tx->callback = NULL;
779                         }
780                 }
781
782                 if (tx->phys != phys_complete) {
783                         /*
784                          * a completed entry, but not the last, so clean
785                          * up if the client is done with the descriptor
786                          */
787                         if (async_tx_test_ack(tx))
788                                 list_move_tail(&desc->node, &ioat->free_desc);
789                         else
790                                 tx->cookie = 0;
791                 } else {
792                         /*
793                          * last used desc. Do not remove, so we can
794                          * append from it, but don't look at it next
795                          * time, either
796                          */
797                         tx->cookie = 0;
798
799                         /* TODO check status bits? */
800                         break;
801                 }
802         }
803
804         spin_unlock_bh(&ioat->desc_lock);
805
806         chan->last_completion = phys_complete;
807         if (cookie != 0)
808                 chan->completed_cookie = cookie;
809
810         spin_unlock_bh(&chan->cleanup_lock);
811 }
812
813 static enum dma_status
814 ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie,
815                       dma_cookie_t *done, dma_cookie_t *used)
816 {
817         struct ioat_dma_chan *ioat = to_ioat_chan(c);
818
819         if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
820                 return DMA_SUCCESS;
821
822         ioat1_cleanup(ioat);
823
824         return ioat_is_complete(c, cookie, done, used);
825 }
826
827 static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
828 {
829         struct ioat_chan_common *chan = &ioat->base;
830         struct ioat_desc_sw *desc;
831         struct ioat_dma_descriptor *hw;
832
833         spin_lock_bh(&ioat->desc_lock);
834
835         desc = ioat1_dma_get_next_descriptor(ioat);
836
837         if (!desc) {
838                 dev_err(to_dev(chan),
839                         "Unable to start null desc - get next desc failed\n");
840                 spin_unlock_bh(&ioat->desc_lock);
841                 return;
842         }
843
844         hw = desc->hw;
845         hw->ctl = 0;
846         hw->ctl_f.null = 1;
847         hw->ctl_f.int_en = 1;
848         hw->ctl_f.compl_write = 1;
849         /* set size to non-zero value (channel returns error when size is 0) */
850         hw->size = NULL_DESC_BUFFER_SIZE;
851         hw->src_addr = 0;
852         hw->dst_addr = 0;
853         async_tx_ack(&desc->txd);
854         hw->next = 0;
855         list_add_tail(&desc->node, &ioat->used_desc);
856         dump_desc_dbg(ioat, desc);
857
858         writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF,
859                chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
860         writel(((u64) desc->txd.phys) >> 32,
861                chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
862
863         writeb(IOAT_CHANCMD_START, chan->reg_base
864                 + IOAT_CHANCMD_OFFSET(chan->device->version));
865         spin_unlock_bh(&ioat->desc_lock);
866 }
867
868 /*
869  * Perform a IOAT transaction to verify the HW works.
870  */
871 #define IOAT_TEST_SIZE 2000
872
873 static void ioat_dma_test_callback(void *dma_async_param)
874 {
875         struct completion *cmp = dma_async_param;
876
877         complete(cmp);
878 }
879
880 /**
881  * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
882  * @device: device to be tested
883  */
884 static int ioat_dma_self_test(struct ioatdma_device *device)
885 {
886         int i;
887         u8 *src;
888         u8 *dest;
889         struct dma_device *dma = &device->common;
890         struct device *dev = &device->pdev->dev;
891         struct dma_chan *dma_chan;
892         struct dma_async_tx_descriptor *tx;
893         dma_addr_t dma_dest, dma_src;
894         dma_cookie_t cookie;
895         int err = 0;
896         struct completion cmp;
897         unsigned long tmo;
898         unsigned long flags;
899
900         src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
901         if (!src)
902                 return -ENOMEM;
903         dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
904         if (!dest) {
905                 kfree(src);
906                 return -ENOMEM;
907         }
908
909         /* Fill in src buffer */
910         for (i = 0; i < IOAT_TEST_SIZE; i++)
911                 src[i] = (u8)i;
912
913         /* Start copy, using first DMA channel */
914         dma_chan = container_of(dma->channels.next, struct dma_chan,
915                                 device_node);
916         if (dma->device_alloc_chan_resources(dma_chan) < 1) {
917                 dev_err(dev, "selftest cannot allocate chan resource\n");
918                 err = -ENODEV;
919                 goto out;
920         }
921
922         dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
923         dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
924         flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE |
925                 DMA_PREP_INTERRUPT;
926         tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
927                                                    IOAT_TEST_SIZE, flags);
928         if (!tx) {
929                 dev_err(dev, "Self-test prep failed, disabling\n");
930                 err = -ENODEV;
931                 goto free_resources;
932         }
933
934         async_tx_ack(tx);
935         init_completion(&cmp);
936         tx->callback = ioat_dma_test_callback;
937         tx->callback_param = &cmp;
938         cookie = tx->tx_submit(tx);
939         if (cookie < 0) {
940                 dev_err(dev, "Self-test setup failed, disabling\n");
941                 err = -ENODEV;
942                 goto free_resources;
943         }
944         dma->device_issue_pending(dma_chan);
945
946         tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
947
948         if (tmo == 0 ||
949             dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL)
950                                         != DMA_SUCCESS) {
951                 dev_err(dev, "Self-test copy timed out, disabling\n");
952                 err = -ENODEV;
953                 goto free_resources;
954         }
955         if (memcmp(src, dest, IOAT_TEST_SIZE)) {
956                 dev_err(dev, "Self-test copy failed compare, disabling\n");
957                 err = -ENODEV;
958                 goto free_resources;
959         }
960
961 free_resources:
962         dma->device_free_chan_resources(dma_chan);
963 out:
964         kfree(src);
965         kfree(dest);
966         return err;
967 }
968
969 static char ioat_interrupt_style[32] = "msix";
970 module_param_string(ioat_interrupt_style, ioat_interrupt_style,
971                     sizeof(ioat_interrupt_style), 0644);
972 MODULE_PARM_DESC(ioat_interrupt_style,
973                  "set ioat interrupt style: msix (default), "
974                  "msix-single-vector, msi, intx)");
975
976 /**
977  * ioat_dma_setup_interrupts - setup interrupt handler
978  * @device: ioat device
979  */
980 static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
981 {
982         struct ioat_chan_common *chan;
983         struct pci_dev *pdev = device->pdev;
984         struct device *dev = &pdev->dev;
985         struct msix_entry *msix;
986         int i, j, msixcnt;
987         int err = -EINVAL;
988         u8 intrctrl = 0;
989
990         if (!strcmp(ioat_interrupt_style, "msix"))
991                 goto msix;
992         if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
993                 goto msix_single_vector;
994         if (!strcmp(ioat_interrupt_style, "msi"))
995                 goto msi;
996         if (!strcmp(ioat_interrupt_style, "intx"))
997                 goto intx;
998         dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
999         goto err_no_irq;
1000
1001 msix:
1002         /* The number of MSI-X vectors should equal the number of channels */
1003         msixcnt = device->common.chancnt;
1004         for (i = 0; i < msixcnt; i++)
1005                 device->msix_entries[i].entry = i;
1006
1007         err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
1008         if (err < 0)
1009                 goto msi;
1010         if (err > 0)
1011                 goto msix_single_vector;
1012
1013         for (i = 0; i < msixcnt; i++) {
1014                 msix = &device->msix_entries[i];
1015                 chan = ioat_chan_by_index(device, i);
1016                 err = devm_request_irq(dev, msix->vector,
1017                                        ioat_dma_do_interrupt_msix, 0,
1018                                        "ioat-msix", chan);
1019                 if (err) {
1020                         for (j = 0; j < i; j++) {
1021                                 msix = &device->msix_entries[j];
1022                                 chan = ioat_chan_by_index(device, j);
1023                                 devm_free_irq(dev, msix->vector, chan);
1024                         }
1025                         goto msix_single_vector;
1026                 }
1027         }
1028         intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
1029         goto done;
1030
1031 msix_single_vector:
1032         msix = &device->msix_entries[0];
1033         msix->entry = 0;
1034         err = pci_enable_msix(pdev, device->msix_entries, 1);
1035         if (err)
1036                 goto msi;
1037
1038         err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
1039                                "ioat-msix", device);
1040         if (err) {
1041                 pci_disable_msix(pdev);
1042                 goto msi;
1043         }
1044         goto done;
1045
1046 msi:
1047         err = pci_enable_msi(pdev);
1048         if (err)
1049                 goto intx;
1050
1051         err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
1052                                "ioat-msi", device);
1053         if (err) {
1054                 pci_disable_msi(pdev);
1055                 goto intx;
1056         }
1057         goto done;
1058
1059 intx:
1060         err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
1061                                IRQF_SHARED, "ioat-intx", device);
1062         if (err)
1063                 goto err_no_irq;
1064
1065 done:
1066         if (device->intr_quirk)
1067                 device->intr_quirk(device);
1068         intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
1069         writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
1070         return 0;
1071
1072 err_no_irq:
1073         /* Disable all interrupt generation */
1074         writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
1075         dev_err(dev, "no usable interrupts\n");
1076         return err;
1077 }
1078
1079 static void ioat_disable_interrupts(struct ioatdma_device *device)
1080 {
1081         /* Disable all interrupt generation */
1082         writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
1083 }
1084
1085 int ioat_probe(struct ioatdma_device *device)
1086 {
1087         int err = -ENODEV;
1088         struct dma_device *dma = &device->common;
1089         struct pci_dev *pdev = device->pdev;
1090         struct device *dev = &pdev->dev;
1091
1092         /* DMA coherent memory pool for DMA descriptor allocations */
1093         device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
1094                                            sizeof(struct ioat_dma_descriptor),
1095                                            64, 0);
1096         if (!device->dma_pool) {
1097                 err = -ENOMEM;
1098                 goto err_dma_pool;
1099         }
1100
1101         device->completion_pool = pci_pool_create("completion_pool", pdev,
1102                                                   sizeof(u64), SMP_CACHE_BYTES,
1103                                                   SMP_CACHE_BYTES);
1104
1105         if (!device->completion_pool) {
1106                 err = -ENOMEM;
1107                 goto err_completion_pool;
1108         }
1109
1110         device->enumerate_channels(device);
1111
1112         dma_cap_set(DMA_MEMCPY, dma->cap_mask);
1113         dma->dev = &pdev->dev;
1114
1115         dev_err(dev, "Intel(R) I/OAT DMA Engine found,"
1116                 " %d channels, device version 0x%02x, driver version %s\n",
1117                 dma->chancnt, device->version, IOAT_DMA_VERSION);
1118
1119         if (!dma->chancnt) {
1120                 dev_err(dev, "Intel(R) I/OAT DMA Engine problem found: "
1121                         "zero channels detected\n");
1122                 goto err_setup_interrupts;
1123         }
1124
1125         err = ioat_dma_setup_interrupts(device);
1126         if (err)
1127                 goto err_setup_interrupts;
1128
1129         err = ioat_dma_self_test(device);
1130         if (err)
1131                 goto err_self_test;
1132
1133         return 0;
1134
1135 err_self_test:
1136         ioat_disable_interrupts(device);
1137 err_setup_interrupts:
1138         pci_pool_destroy(device->completion_pool);
1139 err_completion_pool:
1140         pci_pool_destroy(device->dma_pool);
1141 err_dma_pool:
1142         return err;
1143 }
1144
1145 int ioat_register(struct ioatdma_device *device)
1146 {
1147         int err = dma_async_device_register(&device->common);
1148
1149         if (err) {
1150                 ioat_disable_interrupts(device);
1151                 pci_pool_destroy(device->completion_pool);
1152                 pci_pool_destroy(device->dma_pool);
1153         }
1154
1155         return err;
1156 }
1157
1158 /* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */
1159 static void ioat1_intr_quirk(struct ioatdma_device *device)
1160 {
1161         struct pci_dev *pdev = device->pdev;
1162         u32 dmactrl;
1163
1164         pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
1165         if (pdev->msi_enabled)
1166                 dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
1167         else
1168                 dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN;
1169         pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
1170 }
1171
1172 int ioat1_dma_probe(struct ioatdma_device *device, int dca)
1173 {
1174         struct pci_dev *pdev = device->pdev;
1175         struct dma_device *dma;
1176         int err;
1177
1178         device->intr_quirk = ioat1_intr_quirk;
1179         device->enumerate_channels = ioat1_enumerate_channels;
1180         dma = &device->common;
1181         dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
1182         dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
1183         dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources;
1184         dma->device_free_chan_resources = ioat1_dma_free_chan_resources;
1185         dma->device_is_tx_complete = ioat1_dma_is_complete;
1186
1187         err = ioat_probe(device);
1188         if (err)
1189                 return err;
1190         ioat_set_tcp_copy_break(4096);
1191         err = ioat_register(device);
1192         if (err)
1193                 return err;
1194         if (dca)
1195                 device->dca = ioat_dca_init(pdev, device->reg_base);
1196
1197         INIT_DELAYED_WORK(&device->work, ioat1_chan_watchdog);
1198         schedule_delayed_work(&device->work, WATCHDOG_DELAY);
1199
1200         return err;
1201 }
1202
1203 void ioat_dma_remove(struct ioatdma_device *device)
1204 {
1205         struct dma_device *dma = &device->common;
1206
1207         if (device->version != IOAT_VER_3_0)
1208                 cancel_delayed_work(&device->work);
1209
1210         ioat_disable_interrupts(device);
1211
1212         dma_async_device_unregister(dma);
1213
1214         pci_pool_destroy(device->dma_pool);
1215         pci_pool_destroy(device->completion_pool);
1216
1217         INIT_LIST_HEAD(&dma->channels);
1218 }