ioat: cleanup some long deref chains and 80 column collisions
[linux-flexiantxendom0.git] / drivers / dma / ioat / dma.c
1 /*
2  * Intel I/OAT DMA Linux driver
3  * Copyright(c) 2004 - 2009 Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  */
22
23 /*
24  * This driver supports an Intel I/OAT DMA engine, which does asynchronous
25  * copy operations.
26  */
27
28 #include <linux/init.h>
29 #include <linux/module.h>
30 #include <linux/pci.h>
31 #include <linux/interrupt.h>
32 #include <linux/dmaengine.h>
33 #include <linux/delay.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/workqueue.h>
36 #include <linux/i7300_idle.h>
37 #include "dma.h"
38 #include "registers.h"
39 #include "hw.h"
40
41 static int ioat_pending_level = 4;
42 module_param(ioat_pending_level, int, 0644);
43 MODULE_PARM_DESC(ioat_pending_level,
44                  "high-water mark for pushing ioat descriptors (default: 4)");
45
46 static void ioat_dma_chan_reset_part2(struct work_struct *work);
47 static void ioat_dma_chan_watchdog(struct work_struct *work);
48
49 /* internal functions */
50 static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
51 static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
52
53 static struct ioat_desc_sw *
54 ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
55 static struct ioat_desc_sw *
56 ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
57
58 static inline struct ioat_dma_chan *
59 ioat_chan_by_index(struct ioatdma_device *device, int index)
60 {
61         return device->idx[index];
62 }
63
64 /**
65  * ioat_dma_do_interrupt - handler used for single vector interrupt mode
66  * @irq: interrupt id
67  * @data: interrupt data
68  */
69 static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
70 {
71         struct ioatdma_device *instance = data;
72         struct ioat_dma_chan *ioat_chan;
73         unsigned long attnstatus;
74         int bit;
75         u8 intrctrl;
76
77         intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
78
79         if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
80                 return IRQ_NONE;
81
82         if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
83                 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
84                 return IRQ_NONE;
85         }
86
87         attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
88         for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
89                 ioat_chan = ioat_chan_by_index(instance, bit);
90                 tasklet_schedule(&ioat_chan->cleanup_task);
91         }
92
93         writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
94         return IRQ_HANDLED;
95 }
96
97 /**
98  * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
99  * @irq: interrupt id
100  * @data: interrupt data
101  */
102 static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
103 {
104         struct ioat_dma_chan *ioat_chan = data;
105
106         tasklet_schedule(&ioat_chan->cleanup_task);
107
108         return IRQ_HANDLED;
109 }
110
111 static void ioat_dma_cleanup_tasklet(unsigned long data);
112
113 /**
114  * ioat_dma_enumerate_channels - find and initialize the device's channels
115  * @device: the device to be enumerated
116  */
117 static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
118 {
119         u8 xfercap_scale;
120         u32 xfercap;
121         int i;
122         struct ioat_dma_chan *ioat_chan;
123         struct device *dev = &device->pdev->dev;
124
125         /*
126          * IOAT ver.3 workarounds
127          */
128         if (device->version == IOAT_VER_3_0) {
129                 u32 chan_err_mask;
130                 u16 dev_id;
131                 u32 dmauncerrsts;
132
133                 /*
134                  * Write CHANERRMSK_INT with 3E07h to mask out the errors
135                  * that can cause stability issues for IOAT ver.3
136                  */
137                 chan_err_mask = 0x3E07;
138                 pci_write_config_dword(device->pdev,
139                         IOAT_PCI_CHANERRMASK_INT_OFFSET,
140                         chan_err_mask);
141
142                 /*
143                  * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
144                  * (workaround for spurious config parity error after restart)
145                  */
146                 pci_read_config_word(device->pdev,
147                         IOAT_PCI_DEVICE_ID_OFFSET,
148                         &dev_id);
149                 if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
150                         dmauncerrsts = 0x10;
151                         pci_write_config_dword(device->pdev,
152                                 IOAT_PCI_DMAUNCERRSTS_OFFSET,
153                                 dmauncerrsts);
154                 }
155         }
156
157         device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
158         xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
159         xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
160
161 #ifdef  CONFIG_I7300_IDLE_IOAT_CHANNEL
162         if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) {
163                 device->common.chancnt--;
164         }
165 #endif
166         for (i = 0; i < device->common.chancnt; i++) {
167                 ioat_chan = devm_kzalloc(dev, sizeof(*ioat_chan), GFP_KERNEL);
168                 if (!ioat_chan) {
169                         device->common.chancnt = i;
170                         break;
171                 }
172
173                 ioat_chan->device = device;
174                 ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
175                 ioat_chan->xfercap = xfercap;
176                 ioat_chan->desccount = 0;
177                 INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2);
178                 if (ioat_chan->device->version == IOAT_VER_2_0)
179                         writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE |
180                                IOAT_DMA_DCA_ANY_CPU,
181                                ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
182                 else if (ioat_chan->device->version == IOAT_VER_3_0)
183                         writel(IOAT_DMA_DCA_ANY_CPU,
184                                ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
185                 spin_lock_init(&ioat_chan->cleanup_lock);
186                 spin_lock_init(&ioat_chan->desc_lock);
187                 INIT_LIST_HEAD(&ioat_chan->free_desc);
188                 INIT_LIST_HEAD(&ioat_chan->used_desc);
189                 /* This should be made common somewhere in dmaengine.c */
190                 ioat_chan->common.device = &device->common;
191                 list_add_tail(&ioat_chan->common.device_node,
192                               &device->common.channels);
193                 device->idx[i] = ioat_chan;
194                 tasklet_init(&ioat_chan->cleanup_task,
195                              ioat_dma_cleanup_tasklet,
196                              (unsigned long) ioat_chan);
197                 tasklet_disable(&ioat_chan->cleanup_task);
198         }
199         return device->common.chancnt;
200 }
201
202 /**
203  * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
204  *                                 descriptors to hw
205  * @chan: DMA channel handle
206  */
207 static inline void
208 __ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat_chan)
209 {
210         ioat_chan->pending = 0;
211         writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET);
212 }
213
214 static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
215 {
216         struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
217
218         if (ioat_chan->pending > 0) {
219                 spin_lock_bh(&ioat_chan->desc_lock);
220                 __ioat1_dma_memcpy_issue_pending(ioat_chan);
221                 spin_unlock_bh(&ioat_chan->desc_lock);
222         }
223 }
224
225 static inline void
226 __ioat2_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat_chan)
227 {
228         ioat_chan->pending = 0;
229         writew(ioat_chan->dmacount,
230                ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
231 }
232
233 static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan)
234 {
235         struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
236
237         if (ioat_chan->pending > 0) {
238                 spin_lock_bh(&ioat_chan->desc_lock);
239                 __ioat2_dma_memcpy_issue_pending(ioat_chan);
240                 spin_unlock_bh(&ioat_chan->desc_lock);
241         }
242 }
243
244
245 /**
246  * ioat_dma_chan_reset_part2 - reinit the channel after a reset
247  */
248 static void ioat_dma_chan_reset_part2(struct work_struct *work)
249 {
250         struct ioat_dma_chan *ioat_chan =
251                 container_of(work, struct ioat_dma_chan, work.work);
252         struct ioat_desc_sw *desc;
253
254         spin_lock_bh(&ioat_chan->cleanup_lock);
255         spin_lock_bh(&ioat_chan->desc_lock);
256
257         ioat_chan->completion_virt->low = 0;
258         ioat_chan->completion_virt->high = 0;
259         ioat_chan->pending = 0;
260
261         /*
262          * count the descriptors waiting, and be sure to do it
263          * right for both the CB1 line and the CB2 ring
264          */
265         ioat_chan->dmacount = 0;
266         if (ioat_chan->used_desc.prev) {
267                 desc = to_ioat_desc(ioat_chan->used_desc.prev);
268                 do {
269                         ioat_chan->dmacount++;
270                         desc = to_ioat_desc(desc->node.next);
271                 } while (&desc->node != ioat_chan->used_desc.next);
272         }
273
274         /*
275          * write the new starting descriptor address
276          * this puts channel engine into ARMED state
277          */
278         desc = to_ioat_desc(ioat_chan->used_desc.prev);
279         switch (ioat_chan->device->version) {
280         case IOAT_VER_1_2:
281                 writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF,
282                        ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
283                 writel(((u64) desc->txd.phys) >> 32,
284                        ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
285
286                 writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
287                         + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
288                 break;
289         case IOAT_VER_2_0:
290                 writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF,
291                        ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
292                 writel(((u64) desc->txd.phys) >> 32,
293                        ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
294
295                 /* tell the engine to go with what's left to be done */
296                 writew(ioat_chan->dmacount,
297                        ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
298
299                 break;
300         }
301         dev_err(to_dev(ioat_chan),
302                 "chan%d reset - %d descs waiting, %d total desc\n",
303                 chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
304
305         spin_unlock_bh(&ioat_chan->desc_lock);
306         spin_unlock_bh(&ioat_chan->cleanup_lock);
307 }
308
309 /**
310  * ioat_dma_reset_channel - restart a channel
311  * @ioat_chan: IOAT DMA channel handle
312  */
313 static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan)
314 {
315         u32 chansts, chanerr;
316
317         if (!ioat_chan->used_desc.prev)
318                 return;
319
320         chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
321         chansts = (ioat_chan->completion_virt->low
322                                         & IOAT_CHANSTS_DMA_TRANSFER_STATUS);
323         if (chanerr) {
324                 dev_err(to_dev(ioat_chan),
325                         "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
326                         chan_num(ioat_chan), chansts, chanerr);
327                 writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
328         }
329
330         /*
331          * whack it upside the head with a reset
332          * and wait for things to settle out.
333          * force the pending count to a really big negative
334          * to make sure no one forces an issue_pending
335          * while we're waiting.
336          */
337
338         spin_lock_bh(&ioat_chan->desc_lock);
339         ioat_chan->pending = INT_MIN;
340         writeb(IOAT_CHANCMD_RESET,
341                ioat_chan->reg_base
342                + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
343         spin_unlock_bh(&ioat_chan->desc_lock);
344
345         /* schedule the 2nd half instead of sleeping a long time */
346         schedule_delayed_work(&ioat_chan->work, RESET_DELAY);
347 }
348
349 /**
350  * ioat_dma_chan_watchdog - watch for stuck channels
351  */
352 static void ioat_dma_chan_watchdog(struct work_struct *work)
353 {
354         struct ioatdma_device *device =
355                 container_of(work, struct ioatdma_device, work.work);
356         struct ioat_dma_chan *ioat_chan;
357         int i;
358
359         union {
360                 u64 full;
361                 struct {
362                         u32 low;
363                         u32 high;
364                 };
365         } completion_hw;
366         unsigned long compl_desc_addr_hw;
367
368         for (i = 0; i < device->common.chancnt; i++) {
369                 ioat_chan = ioat_chan_by_index(device, i);
370
371                 if (ioat_chan->device->version == IOAT_VER_1_2
372                         /* have we started processing anything yet */
373                     && ioat_chan->last_completion
374                         /* have we completed any since last watchdog cycle? */
375                     && (ioat_chan->last_completion ==
376                                 ioat_chan->watchdog_completion)
377                         /* has TCP stuck on one cookie since last watchdog? */
378                     && (ioat_chan->watchdog_tcp_cookie ==
379                                 ioat_chan->watchdog_last_tcp_cookie)
380                     && (ioat_chan->watchdog_tcp_cookie !=
381                                 ioat_chan->completed_cookie)
382                         /* is there something in the chain to be processed? */
383                         /* CB1 chain always has at least the last one processed */
384                     && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next)
385                     && ioat_chan->pending == 0) {
386
387                         /*
388                          * check CHANSTS register for completed
389                          * descriptor address.
390                          * if it is different than completion writeback,
391                          * it is not zero
392                          * and it has changed since the last watchdog
393                          *     we can assume that channel
394                          *     is still working correctly
395                          *     and the problem is in completion writeback.
396                          *     update completion writeback
397                          *     with actual CHANSTS value
398                          * else
399                          *     try resetting the channel
400                          */
401
402                         completion_hw.low = readl(ioat_chan->reg_base +
403                                 IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version));
404                         completion_hw.high = readl(ioat_chan->reg_base +
405                                 IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version));
406 #if (BITS_PER_LONG == 64)
407                         compl_desc_addr_hw =
408                                 completion_hw.full
409                                 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
410 #else
411                         compl_desc_addr_hw =
412                                 completion_hw.low & IOAT_LOW_COMPLETION_MASK;
413 #endif
414
415                         if ((compl_desc_addr_hw != 0)
416                            && (compl_desc_addr_hw != ioat_chan->watchdog_completion)
417                            && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) {
418                                 ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw;
419                                 ioat_chan->completion_virt->low = completion_hw.low;
420                                 ioat_chan->completion_virt->high = completion_hw.high;
421                         } else {
422                                 ioat_dma_reset_channel(ioat_chan);
423                                 ioat_chan->watchdog_completion = 0;
424                                 ioat_chan->last_compl_desc_addr_hw = 0;
425                         }
426
427                 /*
428                  * for version 2.0 if there are descriptors yet to be processed
429                  * and the last completed hasn't changed since the last watchdog
430                  *      if they haven't hit the pending level
431                  *          issue the pending to push them through
432                  *      else
433                  *          try resetting the channel
434                  */
435                 } else if (ioat_chan->device->version == IOAT_VER_2_0
436                     && ioat_chan->used_desc.prev
437                     && ioat_chan->last_completion
438                     && ioat_chan->last_completion == ioat_chan->watchdog_completion) {
439
440                         if (ioat_chan->pending < ioat_pending_level)
441                                 ioat2_dma_memcpy_issue_pending(&ioat_chan->common);
442                         else {
443                                 ioat_dma_reset_channel(ioat_chan);
444                                 ioat_chan->watchdog_completion = 0;
445                         }
446                 } else {
447                         ioat_chan->last_compl_desc_addr_hw = 0;
448                         ioat_chan->watchdog_completion
449                                         = ioat_chan->last_completion;
450                 }
451
452                 ioat_chan->watchdog_last_tcp_cookie =
453                         ioat_chan->watchdog_tcp_cookie;
454         }
455
456         schedule_delayed_work(&device->work, WATCHDOG_DELAY);
457 }
458
459 static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
460 {
461         struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
462         struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
463         struct ioat_desc_sw *prev, *new;
464         struct ioat_dma_descriptor *hw;
465         dma_cookie_t cookie;
466         LIST_HEAD(new_chain);
467         u32 copy;
468         size_t len;
469         dma_addr_t src, dst;
470         unsigned long orig_flags;
471         unsigned int desc_count = 0;
472
473         /* src and dest and len are stored in the initial descriptor */
474         len = first->len;
475         src = first->src;
476         dst = first->dst;
477         orig_flags = first->txd.flags;
478         new = first;
479
480         spin_lock_bh(&ioat_chan->desc_lock);
481         prev = to_ioat_desc(ioat_chan->used_desc.prev);
482         prefetch(prev->hw);
483         do {
484                 copy = min_t(size_t, len, ioat_chan->xfercap);
485
486                 async_tx_ack(&new->txd);
487
488                 hw = new->hw;
489                 hw->size = copy;
490                 hw->ctl = 0;
491                 hw->src_addr = src;
492                 hw->dst_addr = dst;
493                 hw->next = 0;
494
495                 /* chain together the physical address list for the HW */
496                 wmb();
497                 prev->hw->next = (u64) new->txd.phys;
498
499                 len -= copy;
500                 dst += copy;
501                 src += copy;
502
503                 list_add_tail(&new->node, &new_chain);
504                 desc_count++;
505                 prev = new;
506         } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan)));
507
508         if (!new) {
509                 dev_err(to_dev(ioat_chan), "tx submit failed\n");
510                 spin_unlock_bh(&ioat_chan->desc_lock);
511                 return -ENOMEM;
512         }
513
514         hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
515         if (first->txd.callback) {
516                 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
517                 if (first != new) {
518                         /* move callback into to last desc */
519                         new->txd.callback = first->txd.callback;
520                         new->txd.callback_param
521                                         = first->txd.callback_param;
522                         first->txd.callback = NULL;
523                         first->txd.callback_param = NULL;
524                 }
525         }
526
527         new->tx_cnt = desc_count;
528         new->txd.flags = orig_flags; /* client is in control of this ack */
529
530         /* store the original values for use in later cleanup */
531         if (new != first) {
532                 new->src = first->src;
533                 new->dst = first->dst;
534                 new->len = first->len;
535         }
536
537         /* cookie incr and addition to used_list must be atomic */
538         cookie = ioat_chan->common.cookie;
539         cookie++;
540         if (cookie < 0)
541                 cookie = 1;
542         ioat_chan->common.cookie = new->txd.cookie = cookie;
543
544         /* write address into NextDescriptor field of last desc in chain */
545         to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
546                                                         first->txd.phys;
547         list_splice_tail(&new_chain, &ioat_chan->used_desc);
548
549         ioat_chan->dmacount += desc_count;
550         ioat_chan->pending += desc_count;
551         if (ioat_chan->pending >= ioat_pending_level)
552                 __ioat1_dma_memcpy_issue_pending(ioat_chan);
553         spin_unlock_bh(&ioat_chan->desc_lock);
554
555         return cookie;
556 }
557
558 static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx)
559 {
560         struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
561         struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
562         struct ioat_desc_sw *new;
563         struct ioat_dma_descriptor *hw;
564         dma_cookie_t cookie;
565         u32 copy;
566         size_t len;
567         dma_addr_t src, dst;
568         unsigned long orig_flags;
569         unsigned int desc_count = 0;
570
571         /* src and dest and len are stored in the initial descriptor */
572         len = first->len;
573         src = first->src;
574         dst = first->dst;
575         orig_flags = first->txd.flags;
576         new = first;
577
578         /*
579          * ioat_chan->desc_lock is still in force in version 2 path
580          * it gets unlocked at end of this function
581          */
582         do {
583                 copy = min_t(size_t, len, ioat_chan->xfercap);
584
585                 async_tx_ack(&new->txd);
586
587                 hw = new->hw;
588                 hw->size = copy;
589                 hw->ctl = 0;
590                 hw->src_addr = src;
591                 hw->dst_addr = dst;
592
593                 len -= copy;
594                 dst += copy;
595                 src += copy;
596                 desc_count++;
597         } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));
598
599         if (!new) {
600                 dev_err(to_dev(ioat_chan), "tx submit failed\n");
601                 spin_unlock_bh(&ioat_chan->desc_lock);
602                 return -ENOMEM;
603         }
604
605         hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
606         if (first->txd.callback) {
607                 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
608                 if (first != new) {
609                         /* move callback into to last desc */
610                         new->txd.callback = first->txd.callback;
611                         new->txd.callback_param
612                                         = first->txd.callback_param;
613                         first->txd.callback = NULL;
614                         first->txd.callback_param = NULL;
615                 }
616         }
617
618         new->tx_cnt = desc_count;
619         new->txd.flags = orig_flags; /* client is in control of this ack */
620
621         /* store the original values for use in later cleanup */
622         if (new != first) {
623                 new->src = first->src;
624                 new->dst = first->dst;
625                 new->len = first->len;
626         }
627
628         /* cookie incr and addition to used_list must be atomic */
629         cookie = ioat_chan->common.cookie;
630         cookie++;
631         if (cookie < 0)
632                 cookie = 1;
633         ioat_chan->common.cookie = new->txd.cookie = cookie;
634
635         ioat_chan->dmacount += desc_count;
636         ioat_chan->pending += desc_count;
637         if (ioat_chan->pending >= ioat_pending_level)
638                 __ioat2_dma_memcpy_issue_pending(ioat_chan);
639         spin_unlock_bh(&ioat_chan->desc_lock);
640
641         return cookie;
642 }
643
644 /**
645  * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
646  * @ioat_chan: the channel supplying the memory pool for the descriptors
647  * @flags: allocation flags
648  */
649 static struct ioat_desc_sw *
650 ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat_chan, gfp_t flags)
651 {
652         struct ioat_dma_descriptor *desc;
653         struct ioat_desc_sw *desc_sw;
654         struct ioatdma_device *ioatdma_device;
655         dma_addr_t phys;
656
657         ioatdma_device = to_ioatdma_device(ioat_chan->common.device);
658         desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
659         if (unlikely(!desc))
660                 return NULL;
661
662         desc_sw = kzalloc(sizeof(*desc_sw), flags);
663         if (unlikely(!desc_sw)) {
664                 pci_pool_free(ioatdma_device->dma_pool, desc, phys);
665                 return NULL;
666         }
667
668         memset(desc, 0, sizeof(*desc));
669         dma_async_tx_descriptor_init(&desc_sw->txd, &ioat_chan->common);
670         switch (ioat_chan->device->version) {
671         case IOAT_VER_1_2:
672                 desc_sw->txd.tx_submit = ioat1_tx_submit;
673                 break;
674         case IOAT_VER_2_0:
675         case IOAT_VER_3_0:
676                 desc_sw->txd.tx_submit = ioat2_tx_submit;
677                 break;
678         }
679
680         desc_sw->hw = desc;
681         desc_sw->txd.phys = phys;
682
683         return desc_sw;
684 }
685
686 static int ioat_initial_desc_count = 256;
687 module_param(ioat_initial_desc_count, int, 0644);
688 MODULE_PARM_DESC(ioat_initial_desc_count,
689                  "initial descriptors per channel (default: 256)");
690
691 /**
692  * ioat2_dma_massage_chan_desc - link the descriptors into a circle
693  * @ioat_chan: the channel to be massaged
694  */
695 static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan)
696 {
697         struct ioat_desc_sw *desc, *_desc;
698
699         /* setup used_desc */
700         ioat_chan->used_desc.next = ioat_chan->free_desc.next;
701         ioat_chan->used_desc.prev = NULL;
702
703         /* pull free_desc out of the circle so that every node is a hw
704          * descriptor, but leave it pointing to the list
705          */
706         ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next;
707         ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev;
708
709         /* circle link the hw descriptors */
710         desc = to_ioat_desc(ioat_chan->free_desc.next);
711         desc->hw->next = to_ioat_desc(desc->node.next)->txd.phys;
712         list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) {
713                 desc->hw->next = to_ioat_desc(desc->node.next)->txd.phys;
714         }
715 }
716
717 /**
718  * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors
719  * @chan: the channel to be filled out
720  */
721 static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
722 {
723         struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
724         struct ioat_desc_sw *desc;
725         u16 chanctrl;
726         u32 chanerr;
727         int i;
728         LIST_HEAD(tmp_list);
729
730         /* have we already been set up? */
731         if (!list_empty(&ioat_chan->free_desc))
732                 return ioat_chan->desccount;
733
734         /* Setup register to interrupt and write completion status on error */
735         chanctrl = IOAT_CHANCTRL_ERR_INT_EN |
736                 IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
737                 IOAT_CHANCTRL_ERR_COMPLETION_EN;
738         writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
739
740         chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
741         if (chanerr) {
742                 dev_err(to_dev(ioat_chan), "CHANERR = %x, clearing\n", chanerr);
743                 writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
744         }
745
746         /* Allocate descriptors */
747         for (i = 0; i < ioat_initial_desc_count; i++) {
748                 desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
749                 if (!desc) {
750                         dev_err(to_dev(ioat_chan),
751                                 "Only %d initial descriptors\n", i);
752                         break;
753                 }
754                 list_add_tail(&desc->node, &tmp_list);
755         }
756         spin_lock_bh(&ioat_chan->desc_lock);
757         ioat_chan->desccount = i;
758         list_splice(&tmp_list, &ioat_chan->free_desc);
759         if (ioat_chan->device->version != IOAT_VER_1_2)
760                 ioat2_dma_massage_chan_desc(ioat_chan);
761         spin_unlock_bh(&ioat_chan->desc_lock);
762
763         /* allocate a completion writeback area */
764         /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
765         ioat_chan->completion_virt =
766                 pci_pool_alloc(ioat_chan->device->completion_pool,
767                                GFP_KERNEL,
768                                &ioat_chan->completion_addr);
769         memset(ioat_chan->completion_virt, 0,
770                sizeof(*ioat_chan->completion_virt));
771         writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
772                ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
773         writel(((u64) ioat_chan->completion_addr) >> 32,
774                ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
775
776         tasklet_enable(&ioat_chan->cleanup_task);
777         ioat_dma_start_null_desc(ioat_chan);  /* give chain to dma device */
778         return ioat_chan->desccount;
779 }
780
781 /**
782  * ioat_dma_free_chan_resources - release all the descriptors
783  * @chan: the channel to be cleaned
784  */
785 static void ioat_dma_free_chan_resources(struct dma_chan *chan)
786 {
787         struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
788         struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device);
789         struct ioat_desc_sw *desc, *_desc;
790         int in_use_descs = 0;
791
792         /* Before freeing channel resources first check
793          * if they have been previously allocated for this channel.
794          */
795         if (ioat_chan->desccount == 0)
796                 return;
797
798         tasklet_disable(&ioat_chan->cleanup_task);
799         ioat_dma_memcpy_cleanup(ioat_chan);
800
801         /* Delay 100ms after reset to allow internal DMA logic to quiesce
802          * before removing DMA descriptor resources.
803          */
804         writeb(IOAT_CHANCMD_RESET,
805                ioat_chan->reg_base
806                         + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
807         mdelay(100);
808
809         spin_lock_bh(&ioat_chan->desc_lock);
810         switch (ioat_chan->device->version) {
811         case IOAT_VER_1_2:
812                 list_for_each_entry_safe(desc, _desc,
813                                          &ioat_chan->used_desc, node) {
814                         in_use_descs++;
815                         list_del(&desc->node);
816                         pci_pool_free(ioatdma_device->dma_pool, desc->hw,
817                                       desc->txd.phys);
818                         kfree(desc);
819                 }
820                 list_for_each_entry_safe(desc, _desc,
821                                          &ioat_chan->free_desc, node) {
822                         list_del(&desc->node);
823                         pci_pool_free(ioatdma_device->dma_pool, desc->hw,
824                                       desc->txd.phys);
825                         kfree(desc);
826                 }
827                 break;
828         case IOAT_VER_2_0:
829         case IOAT_VER_3_0:
830                 list_for_each_entry_safe(desc, _desc,
831                                          ioat_chan->free_desc.next, node) {
832                         list_del(&desc->node);
833                         pci_pool_free(ioatdma_device->dma_pool, desc->hw,
834                                       desc->txd.phys);
835                         kfree(desc);
836                 }
837                 desc = to_ioat_desc(ioat_chan->free_desc.next);
838                 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
839                               desc->txd.phys);
840                 kfree(desc);
841                 INIT_LIST_HEAD(&ioat_chan->free_desc);
842                 INIT_LIST_HEAD(&ioat_chan->used_desc);
843                 break;
844         }
845         spin_unlock_bh(&ioat_chan->desc_lock);
846
847         pci_pool_free(ioatdma_device->completion_pool,
848                       ioat_chan->completion_virt,
849                       ioat_chan->completion_addr);
850
851         /* one is ok since we left it on there on purpose */
852         if (in_use_descs > 1)
853                 dev_err(to_dev(ioat_chan), "Freeing %d in use descriptors!\n",
854                         in_use_descs - 1);
855
856         ioat_chan->last_completion = ioat_chan->completion_addr = 0;
857         ioat_chan->pending = 0;
858         ioat_chan->dmacount = 0;
859         ioat_chan->desccount = 0;
860         ioat_chan->watchdog_completion = 0;
861         ioat_chan->last_compl_desc_addr_hw = 0;
862         ioat_chan->watchdog_tcp_cookie =
863                 ioat_chan->watchdog_last_tcp_cookie = 0;
864 }
865
866 /**
867  * ioat_dma_get_next_descriptor - return the next available descriptor
868  * @ioat_chan: IOAT DMA channel handle
869  *
870  * Gets the next descriptor from the chain, and must be called with the
871  * channel's desc_lock held.  Allocates more descriptors if the channel
872  * has run out.
873  */
874 static struct ioat_desc_sw *
875 ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
876 {
877         struct ioat_desc_sw *new;
878
879         if (!list_empty(&ioat_chan->free_desc)) {
880                 new = to_ioat_desc(ioat_chan->free_desc.next);
881                 list_del(&new->node);
882         } else {
883                 /* try to get another desc */
884                 new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
885                 if (!new) {
886                         dev_err(to_dev(ioat_chan), "alloc failed\n");
887                         return NULL;
888                 }
889         }
890
891         prefetch(new->hw);
892         return new;
893 }
894
895 static struct ioat_desc_sw *
896 ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
897 {
898         struct ioat_desc_sw *new;
899
900         /*
901          * used.prev points to where to start processing
902          * used.next points to next free descriptor
903          * if used.prev == NULL, there are none waiting to be processed
904          * if used.next == used.prev.prev, there is only one free descriptor,
905          *      and we need to use it to as a noop descriptor before
906          *      linking in a new set of descriptors, since the device
907          *      has probably already read the pointer to it
908          */
909         if (ioat_chan->used_desc.prev &&
910             ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) {
911
912                 struct ioat_desc_sw *desc;
913                 struct ioat_desc_sw *noop_desc;
914                 int i;
915
916                 /* set up the noop descriptor */
917                 noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
918                 /* set size to non-zero value (channel returns error when size is 0) */
919                 noop_desc->hw->size = NULL_DESC_BUFFER_SIZE;
920                 noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
921                 noop_desc->hw->src_addr = 0;
922                 noop_desc->hw->dst_addr = 0;
923
924                 ioat_chan->used_desc.next = ioat_chan->used_desc.next->next;
925                 ioat_chan->pending++;
926                 ioat_chan->dmacount++;
927
928                 /* try to get a few more descriptors */
929                 for (i = 16; i; i--) {
930                         desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
931                         if (!desc) {
932                                 dev_err(to_dev(ioat_chan), "alloc failed\n");
933                                 break;
934                         }
935                         list_add_tail(&desc->node, ioat_chan->used_desc.next);
936
937                         desc->hw->next
938                                 = to_ioat_desc(desc->node.next)->txd.phys;
939                         to_ioat_desc(desc->node.prev)->hw->next
940                                 = desc->txd.phys;
941                         ioat_chan->desccount++;
942                 }
943
944                 ioat_chan->used_desc.next = noop_desc->node.next;
945         }
946         new = to_ioat_desc(ioat_chan->used_desc.next);
947         prefetch(new);
948         ioat_chan->used_desc.next = new->node.next;
949
950         if (ioat_chan->used_desc.prev == NULL)
951                 ioat_chan->used_desc.prev = &new->node;
952
953         prefetch(new->hw);
954         return new;
955 }
956
957 static struct ioat_desc_sw *
958 ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
959 {
960         if (!ioat_chan)
961                 return NULL;
962
963         switch (ioat_chan->device->version) {
964         case IOAT_VER_1_2:
965                 return ioat1_dma_get_next_descriptor(ioat_chan);
966         case IOAT_VER_2_0:
967         case IOAT_VER_3_0:
968                 return ioat2_dma_get_next_descriptor(ioat_chan);
969         }
970         return NULL;
971 }
972
973 static struct dma_async_tx_descriptor *
974 ioat1_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
975                       dma_addr_t dma_src, size_t len, unsigned long flags)
976 {
977         struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
978         struct ioat_desc_sw *new;
979
980         spin_lock_bh(&ioat_chan->desc_lock);
981         new = ioat_dma_get_next_descriptor(ioat_chan);
982         spin_unlock_bh(&ioat_chan->desc_lock);
983
984         if (new) {
985                 new->len = len;
986                 new->dst = dma_dest;
987                 new->src = dma_src;
988                 new->txd.flags = flags;
989                 return &new->txd;
990         } else {
991                 dev_err(to_dev(ioat_chan),
992                         "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
993                         chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
994                 return NULL;
995         }
996 }
997
998 static struct dma_async_tx_descriptor *
999 ioat2_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
1000                       dma_addr_t dma_src, size_t len, unsigned long flags)
1001 {
1002         struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
1003         struct ioat_desc_sw *new;
1004
1005         spin_lock_bh(&ioat_chan->desc_lock);
1006         new = ioat2_dma_get_next_descriptor(ioat_chan);
1007
1008         /*
1009          * leave ioat_chan->desc_lock set in ioat 2 path
1010          * it will get unlocked at end of tx_submit
1011          */
1012
1013         if (new) {
1014                 new->len = len;
1015                 new->dst = dma_dest;
1016                 new->src = dma_src;
1017                 new->txd.flags = flags;
1018                 return &new->txd;
1019         } else {
1020                 spin_unlock_bh(&ioat_chan->desc_lock);
1021                 dev_err(to_dev(ioat_chan),
1022                         "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
1023                         chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
1024                 return NULL;
1025         }
1026 }
1027
1028 static void ioat_dma_cleanup_tasklet(unsigned long data)
1029 {
1030         struct ioat_dma_chan *chan = (void *)data;
1031         ioat_dma_memcpy_cleanup(chan);
1032         writew(IOAT_CHANCTRL_INT_DISABLE,
1033                chan->reg_base + IOAT_CHANCTRL_OFFSET);
1034 }
1035
1036 static void
1037 ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc)
1038 {
1039         if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
1040                 if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
1041                         pci_unmap_single(ioat_chan->device->pdev,
1042                                          pci_unmap_addr(desc, dst),
1043                                          pci_unmap_len(desc, len),
1044                                          PCI_DMA_FROMDEVICE);
1045                 else
1046                         pci_unmap_page(ioat_chan->device->pdev,
1047                                        pci_unmap_addr(desc, dst),
1048                                        pci_unmap_len(desc, len),
1049                                        PCI_DMA_FROMDEVICE);
1050         }
1051
1052         if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
1053                 if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
1054                         pci_unmap_single(ioat_chan->device->pdev,
1055                                          pci_unmap_addr(desc, src),
1056                                          pci_unmap_len(desc, len),
1057                                          PCI_DMA_TODEVICE);
1058                 else
1059                         pci_unmap_page(ioat_chan->device->pdev,
1060                                        pci_unmap_addr(desc, src),
1061                                        pci_unmap_len(desc, len),
1062                                        PCI_DMA_TODEVICE);
1063         }
1064 }
1065
1066 /**
1067  * ioat_dma_memcpy_cleanup - cleanup up finished descriptors
1068  * @chan: ioat channel to be cleaned up
1069  */
1070 static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
1071 {
1072         unsigned long phys_complete;
1073         struct ioat_desc_sw *desc, *_desc;
1074         dma_cookie_t cookie = 0;
1075         unsigned long desc_phys;
1076         struct ioat_desc_sw *latest_desc;
1077         struct dma_async_tx_descriptor *tx;
1078
1079         prefetch(ioat_chan->completion_virt);
1080
1081         if (!spin_trylock_bh(&ioat_chan->cleanup_lock))
1082                 return;
1083
1084         /* The completion writeback can happen at any time,
1085            so reads by the driver need to be atomic operations
1086            The descriptor physical addresses are limited to 32-bits
1087            when the CPU can only do a 32-bit mov */
1088
1089 #if (BITS_PER_LONG == 64)
1090         phys_complete =
1091                 ioat_chan->completion_virt->full
1092                 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
1093 #else
1094         phys_complete =
1095                 ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
1096 #endif
1097
1098         if ((ioat_chan->completion_virt->full
1099                 & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
1100                                 IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
1101                 dev_err(to_dev(ioat_chan), "Channel halted, chanerr = %x\n",
1102                         readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET));
1103
1104                 /* TODO do something to salvage the situation */
1105         }
1106
1107         if (phys_complete == ioat_chan->last_completion) {
1108                 spin_unlock_bh(&ioat_chan->cleanup_lock);
1109                 /*
1110                  * perhaps we're stuck so hard that the watchdog can't go off?
1111                  * try to catch it after 2 seconds
1112                  */
1113                 if (ioat_chan->device->version != IOAT_VER_3_0) {
1114                         if (time_after(jiffies,
1115                                        ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
1116                                 ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
1117                                 ioat_chan->last_completion_time = jiffies;
1118                         }
1119                 }
1120                 return;
1121         }
1122         ioat_chan->last_completion_time = jiffies;
1123
1124         cookie = 0;
1125         if (!spin_trylock_bh(&ioat_chan->desc_lock)) {
1126                 spin_unlock_bh(&ioat_chan->cleanup_lock);
1127                 return;
1128         }
1129
1130         switch (ioat_chan->device->version) {
1131         case IOAT_VER_1_2:
1132                 list_for_each_entry_safe(desc, _desc,
1133                                          &ioat_chan->used_desc, node) {
1134                         tx = &desc->txd;
1135                         /*
1136                          * Incoming DMA requests may use multiple descriptors,
1137                          * due to exceeding xfercap, perhaps. If so, only the
1138                          * last one will have a cookie, and require unmapping.
1139                          */
1140                         if (tx->cookie) {
1141                                 cookie = tx->cookie;
1142                                 ioat_dma_unmap(ioat_chan, desc);
1143                                 if (tx->callback) {
1144                                         tx->callback(tx->callback_param);
1145                                         tx->callback = NULL;
1146                                 }
1147                         }
1148
1149                         if (tx->phys != phys_complete) {
1150                                 /*
1151                                  * a completed entry, but not the last, so clean
1152                                  * up if the client is done with the descriptor
1153                                  */
1154                                 if (async_tx_test_ack(tx)) {
1155                                         list_move_tail(&desc->node,
1156                                                        &ioat_chan->free_desc);
1157                                 } else
1158                                         tx->cookie = 0;
1159                         } else {
1160                                 /*
1161                                  * last used desc. Do not remove, so we can
1162                                  * append from it, but don't look at it next
1163                                  * time, either
1164                                  */
1165                                 tx->cookie = 0;
1166
1167                                 /* TODO check status bits? */
1168                                 break;
1169                         }
1170                 }
1171                 break;
1172         case IOAT_VER_2_0:
1173         case IOAT_VER_3_0:
1174                 /* has some other thread has already cleaned up? */
1175                 if (ioat_chan->used_desc.prev == NULL)
1176                         break;
1177
1178                 /* work backwards to find latest finished desc */
1179                 desc = to_ioat_desc(ioat_chan->used_desc.next);
1180                 tx = &desc->txd;
1181                 latest_desc = NULL;
1182                 do {
1183                         desc = to_ioat_desc(desc->node.prev);
1184                         desc_phys = (unsigned long)tx->phys
1185                                        & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
1186                         if (desc_phys == phys_complete) {
1187                                 latest_desc = desc;
1188                                 break;
1189                         }
1190                 } while (&desc->node != ioat_chan->used_desc.prev);
1191
1192                 if (latest_desc != NULL) {
1193                         /* work forwards to clear finished descriptors */
1194                         for (desc = to_ioat_desc(ioat_chan->used_desc.prev);
1195                              &desc->node != latest_desc->node.next &&
1196                              &desc->node != ioat_chan->used_desc.next;
1197                              desc = to_ioat_desc(desc->node.next)) {
1198                                 if (tx->cookie) {
1199                                         cookie = tx->cookie;
1200                                         tx->cookie = 0;
1201                                         ioat_dma_unmap(ioat_chan, desc);
1202                                         if (tx->callback) {
1203                                                 tx->callback(tx->callback_param);
1204                                                 tx->callback = NULL;
1205                                         }
1206                                 }
1207                         }
1208
1209                         /* move used.prev up beyond those that are finished */
1210                         if (&desc->node == ioat_chan->used_desc.next)
1211                                 ioat_chan->used_desc.prev = NULL;
1212                         else
1213                                 ioat_chan->used_desc.prev = &desc->node;
1214                 }
1215                 break;
1216         }
1217
1218         spin_unlock_bh(&ioat_chan->desc_lock);
1219
1220         ioat_chan->last_completion = phys_complete;
1221         if (cookie != 0)
1222                 ioat_chan->completed_cookie = cookie;
1223
1224         spin_unlock_bh(&ioat_chan->cleanup_lock);
1225 }
1226
1227 /**
1228  * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
1229  * @chan: IOAT DMA channel handle
1230  * @cookie: DMA transaction identifier
1231  * @done: if not %NULL, updated with last completed transaction
1232  * @used: if not %NULL, updated with last used transaction
1233  */
1234 static enum dma_status
1235 ioat_dma_is_complete(struct dma_chan *chan, dma_cookie_t cookie,
1236                      dma_cookie_t *done, dma_cookie_t *used)
1237 {
1238         struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
1239         dma_cookie_t last_used;
1240         dma_cookie_t last_complete;
1241         enum dma_status ret;
1242
1243         last_used = chan->cookie;
1244         last_complete = ioat_chan->completed_cookie;
1245         ioat_chan->watchdog_tcp_cookie = cookie;
1246
1247         if (done)
1248                 *done = last_complete;
1249         if (used)
1250                 *used = last_used;
1251
1252         ret = dma_async_is_complete(cookie, last_complete, last_used);
1253         if (ret == DMA_SUCCESS)
1254                 return ret;
1255
1256         ioat_dma_memcpy_cleanup(ioat_chan);
1257
1258         last_used = chan->cookie;
1259         last_complete = ioat_chan->completed_cookie;
1260
1261         if (done)
1262                 *done = last_complete;
1263         if (used)
1264                 *used = last_used;
1265
1266         return dma_async_is_complete(cookie, last_complete, last_used);
1267 }
1268
1269 static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
1270 {
1271         struct ioat_desc_sw *desc;
1272
1273         spin_lock_bh(&ioat_chan->desc_lock);
1274
1275         desc = ioat_dma_get_next_descriptor(ioat_chan);
1276
1277         if (!desc) {
1278                 dev_err(to_dev(ioat_chan),
1279                         "Unable to start null desc - get next desc failed\n");
1280                 spin_unlock_bh(&ioat_chan->desc_lock);
1281                 return;
1282         }
1283
1284         desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
1285                                 | IOAT_DMA_DESCRIPTOR_CTL_INT_GN
1286                                 | IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
1287         /* set size to non-zero value (channel returns error when size is 0) */
1288         desc->hw->size = NULL_DESC_BUFFER_SIZE;
1289         desc->hw->src_addr = 0;
1290         desc->hw->dst_addr = 0;
1291         async_tx_ack(&desc->txd);
1292         switch (ioat_chan->device->version) {
1293         case IOAT_VER_1_2:
1294                 desc->hw->next = 0;
1295                 list_add_tail(&desc->node, &ioat_chan->used_desc);
1296
1297                 writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF,
1298                        ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
1299                 writel(((u64) desc->txd.phys) >> 32,
1300                        ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
1301
1302                 writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
1303                         + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
1304                 break;
1305         case IOAT_VER_2_0:
1306         case IOAT_VER_3_0:
1307                 writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF,
1308                        ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
1309                 writel(((u64) desc->txd.phys) >> 32,
1310                        ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
1311
1312                 ioat_chan->dmacount++;
1313                 __ioat2_dma_memcpy_issue_pending(ioat_chan);
1314                 break;
1315         }
1316         spin_unlock_bh(&ioat_chan->desc_lock);
1317 }
1318
1319 /*
1320  * Perform a IOAT transaction to verify the HW works.
1321  */
1322 #define IOAT_TEST_SIZE 2000
1323
1324 static void ioat_dma_test_callback(void *dma_async_param)
1325 {
1326         struct completion *cmp = dma_async_param;
1327
1328         complete(cmp);
1329 }
1330
1331 /**
1332  * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
1333  * @device: device to be tested
1334  */
1335 static int ioat_dma_self_test(struct ioatdma_device *device)
1336 {
1337         int i;
1338         u8 *src;
1339         u8 *dest;
1340         struct dma_device *dma = &device->common;
1341         struct device *dev = &device->pdev->dev;
1342         struct dma_chan *dma_chan;
1343         struct dma_async_tx_descriptor *tx;
1344         dma_addr_t dma_dest, dma_src;
1345         dma_cookie_t cookie;
1346         int err = 0;
1347         struct completion cmp;
1348         unsigned long tmo;
1349         unsigned long flags;
1350
1351         src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
1352         if (!src)
1353                 return -ENOMEM;
1354         dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
1355         if (!dest) {
1356                 kfree(src);
1357                 return -ENOMEM;
1358         }
1359
1360         /* Fill in src buffer */
1361         for (i = 0; i < IOAT_TEST_SIZE; i++)
1362                 src[i] = (u8)i;
1363
1364         /* Start copy, using first DMA channel */
1365         dma_chan = container_of(dma->channels.next, struct dma_chan,
1366                                 device_node);
1367         if (dma->device_alloc_chan_resources(dma_chan) < 1) {
1368                 dev_err(dev, "selftest cannot allocate chan resource\n");
1369                 err = -ENODEV;
1370                 goto out;
1371         }
1372
1373         dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
1374         dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
1375         flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE;
1376         tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
1377                                                    IOAT_TEST_SIZE, flags);
1378         if (!tx) {
1379                 dev_err(dev, "Self-test prep failed, disabling\n");
1380                 err = -ENODEV;
1381                 goto free_resources;
1382         }
1383
1384         async_tx_ack(tx);
1385         init_completion(&cmp);
1386         tx->callback = ioat_dma_test_callback;
1387         tx->callback_param = &cmp;
1388         cookie = tx->tx_submit(tx);
1389         if (cookie < 0) {
1390                 dev_err(dev, "Self-test setup failed, disabling\n");
1391                 err = -ENODEV;
1392                 goto free_resources;
1393         }
1394         dma->device_issue_pending(dma_chan);
1395
1396         tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1397
1398         if (tmo == 0 ||
1399             dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL)
1400                                         != DMA_SUCCESS) {
1401                 dev_err(dev, "Self-test copy timed out, disabling\n");
1402                 err = -ENODEV;
1403                 goto free_resources;
1404         }
1405         if (memcmp(src, dest, IOAT_TEST_SIZE)) {
1406                 dev_err(dev, "Self-test copy failed compare, disabling\n");
1407                 err = -ENODEV;
1408                 goto free_resources;
1409         }
1410
1411 free_resources:
1412         dma->device_free_chan_resources(dma_chan);
1413 out:
1414         kfree(src);
1415         kfree(dest);
1416         return err;
1417 }
1418
1419 static char ioat_interrupt_style[32] = "msix";
1420 module_param_string(ioat_interrupt_style, ioat_interrupt_style,
1421                     sizeof(ioat_interrupt_style), 0644);
1422 MODULE_PARM_DESC(ioat_interrupt_style,
1423                  "set ioat interrupt style: msix (default), "
1424                  "msix-single-vector, msi, intx)");
1425
1426 /**
1427  * ioat_dma_setup_interrupts - setup interrupt handler
1428  * @device: ioat device
1429  */
1430 static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
1431 {
1432         struct ioat_dma_chan *ioat_chan;
1433         struct pci_dev *pdev = device->pdev;
1434         struct device *dev = &pdev->dev;
1435         struct msix_entry *msix;
1436         int i, j, msixcnt;
1437         int err = -EINVAL;
1438         u8 intrctrl = 0;
1439
1440         if (!strcmp(ioat_interrupt_style, "msix"))
1441                 goto msix;
1442         if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
1443                 goto msix_single_vector;
1444         if (!strcmp(ioat_interrupt_style, "msi"))
1445                 goto msi;
1446         if (!strcmp(ioat_interrupt_style, "intx"))
1447                 goto intx;
1448         dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
1449         goto err_no_irq;
1450
1451 msix:
1452         /* The number of MSI-X vectors should equal the number of channels */
1453         msixcnt = device->common.chancnt;
1454         for (i = 0; i < msixcnt; i++)
1455                 device->msix_entries[i].entry = i;
1456
1457         err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
1458         if (err < 0)
1459                 goto msi;
1460         if (err > 0)
1461                 goto msix_single_vector;
1462
1463         for (i = 0; i < msixcnt; i++) {
1464                 msix = &device->msix_entries[i];
1465                 ioat_chan = ioat_chan_by_index(device, i);
1466                 err = devm_request_irq(dev, msix->vector,
1467                                        ioat_dma_do_interrupt_msix, 0,
1468                                        "ioat-msix", ioat_chan);
1469                 if (err) {
1470                         for (j = 0; j < i; j++) {
1471                                 msix = &device->msix_entries[j];
1472                                 ioat_chan = ioat_chan_by_index(device, j);
1473                                 devm_free_irq(dev, msix->vector, ioat_chan);
1474                         }
1475                         goto msix_single_vector;
1476                 }
1477         }
1478         intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
1479         goto done;
1480
1481 msix_single_vector:
1482         msix = &device->msix_entries[0];
1483         msix->entry = 0;
1484         err = pci_enable_msix(pdev, device->msix_entries, 1);
1485         if (err)
1486                 goto msi;
1487
1488         err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
1489                                "ioat-msix", device);
1490         if (err) {
1491                 pci_disable_msix(pdev);
1492                 goto msi;
1493         }
1494         goto done;
1495
1496 msi:
1497         err = pci_enable_msi(pdev);
1498         if (err)
1499                 goto intx;
1500
1501         err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
1502                                "ioat-msi", device);
1503         if (err) {
1504                 pci_disable_msi(pdev);
1505                 goto intx;
1506         }
1507         /*
1508          * CB 1.2 devices need a bit set in configuration space to enable MSI
1509          */
1510         if (device->version == IOAT_VER_1_2) {
1511                 u32 dmactrl;
1512                 pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
1513                 dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
1514                 pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
1515         }
1516         goto done;
1517
1518 intx:
1519         err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
1520                                IRQF_SHARED, "ioat-intx", device);
1521         if (err)
1522                 goto err_no_irq;
1523
1524 done:
1525         intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
1526         writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
1527         return 0;
1528
1529 err_no_irq:
1530         /* Disable all interrupt generation */
1531         writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
1532         dev_err(dev, "no usable interrupts\n");
1533         return err;
1534 }
1535
1536 static void ioat_disable_interrupts(struct ioatdma_device *device)
1537 {
1538         /* Disable all interrupt generation */
1539         writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
1540 }
1541
1542 struct ioatdma_device *
1543 ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase)
1544 {
1545         int err;
1546         struct device *dev = &pdev->dev;
1547         struct ioatdma_device *device;
1548         struct dma_device *dma;
1549
1550         device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL);
1551         if (!device)
1552                 err = -ENOMEM;
1553         device->pdev = pdev;
1554         device->reg_base = iobase;
1555         device->version = readb(device->reg_base + IOAT_VER_OFFSET);
1556         dma = &device->common;
1557
1558         /* DMA coherent memory pool for DMA descriptor allocations */
1559         device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
1560                                            sizeof(struct ioat_dma_descriptor),
1561                                            64, 0);
1562         if (!device->dma_pool) {
1563                 err = -ENOMEM;
1564                 goto err_dma_pool;
1565         }
1566
1567         device->completion_pool = pci_pool_create("completion_pool", pdev,
1568                                                   sizeof(u64), SMP_CACHE_BYTES,
1569                                                   SMP_CACHE_BYTES);
1570         if (!device->completion_pool) {
1571                 err = -ENOMEM;
1572                 goto err_completion_pool;
1573         }
1574
1575         INIT_LIST_HEAD(&dma->channels);
1576         ioat_dma_enumerate_channels(device);
1577
1578         dma->device_alloc_chan_resources = ioat_dma_alloc_chan_resources;
1579         dma->device_free_chan_resources = ioat_dma_free_chan_resources;
1580         dma->dev = &pdev->dev;
1581
1582         dma_cap_set(DMA_MEMCPY, dma->cap_mask);
1583         dma->device_is_tx_complete = ioat_dma_is_complete;
1584         switch (device->version) {
1585         case IOAT_VER_1_2:
1586                 dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
1587                 dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
1588                 break;
1589         case IOAT_VER_2_0:
1590         case IOAT_VER_3_0:
1591                 dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy;
1592                 dma->device_issue_pending = ioat2_dma_memcpy_issue_pending;
1593                 break;
1594         }
1595
1596         dev_err(dev, "Intel(R) I/OAT DMA Engine found,"
1597                 " %d channels, device version 0x%02x, driver version %s\n",
1598                 dma->chancnt, device->version, IOAT_DMA_VERSION);
1599
1600         if (!dma->chancnt) {
1601                 dev_err(dev, "Intel(R) I/OAT DMA Engine problem found: "
1602                         "zero channels detected\n");
1603                 goto err_setup_interrupts;
1604         }
1605
1606         err = ioat_dma_setup_interrupts(device);
1607         if (err)
1608                 goto err_setup_interrupts;
1609
1610         err = ioat_dma_self_test(device);
1611         if (err)
1612                 goto err_self_test;
1613
1614         err = dma_async_device_register(dma);
1615         if (err)
1616                 goto err_self_test;
1617
1618         ioat_set_tcp_copy_break(device);
1619
1620         if (device->version != IOAT_VER_3_0) {
1621                 INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
1622                 schedule_delayed_work(&device->work,
1623                                       WATCHDOG_DELAY);
1624         }
1625
1626         return device;
1627
1628 err_self_test:
1629         ioat_disable_interrupts(device);
1630 err_setup_interrupts:
1631         pci_pool_destroy(device->completion_pool);
1632 err_completion_pool:
1633         pci_pool_destroy(device->dma_pool);
1634 err_dma_pool:
1635         return NULL;
1636 }
1637
1638 void ioat_dma_remove(struct ioatdma_device *device)
1639 {
1640         struct dma_chan *chan, *_chan;
1641         struct ioat_dma_chan *ioat_chan;
1642         struct dma_device *dma = &device->common;
1643
1644         if (device->version != IOAT_VER_3_0)
1645                 cancel_delayed_work(&device->work);
1646
1647         ioat_disable_interrupts(device);
1648
1649         dma_async_device_unregister(dma);
1650
1651         pci_pool_destroy(device->dma_pool);
1652         pci_pool_destroy(device->completion_pool);
1653
1654         list_for_each_entry_safe(chan, _chan, &dma->channels, device_node) {
1655                 ioat_chan = to_ioat_chan(chan);
1656                 list_del(&chan->device_node);
1657         }
1658 }
1659