MD: Add del_timer_sync to mddev_suspend (fix nasty panic)
[linux-flexiantxendom0-3.2.10.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2012 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
61
62 #define MAJ 3
63 #define MIN 2
64 #define BUILD 10
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70                                 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73 static const struct e1000_info *igb_info_tbl[] = {
74         [board_82575] = &e1000_82575_info,
75 };
76
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103         /* required last entry */
104         {0, }
105 };
106
107 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
108
109 void igb_reset(struct igb_adapter *);
110 static int igb_setup_all_tx_resources(struct igb_adapter *);
111 static int igb_setup_all_rx_resources(struct igb_adapter *);
112 static void igb_free_all_tx_resources(struct igb_adapter *);
113 static void igb_free_all_rx_resources(struct igb_adapter *);
114 static void igb_setup_mrqc(struct igb_adapter *);
115 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116 static void __devexit igb_remove(struct pci_dev *pdev);
117 static void igb_init_hw_timer(struct igb_adapter *adapter);
118 static int igb_sw_init(struct igb_adapter *);
119 static int igb_open(struct net_device *);
120 static int igb_close(struct net_device *);
121 static void igb_configure_tx(struct igb_adapter *);
122 static void igb_configure_rx(struct igb_adapter *);
123 static void igb_clean_all_tx_rings(struct igb_adapter *);
124 static void igb_clean_all_rx_rings(struct igb_adapter *);
125 static void igb_clean_tx_ring(struct igb_ring *);
126 static void igb_clean_rx_ring(struct igb_ring *);
127 static void igb_set_rx_mode(struct net_device *);
128 static void igb_update_phy_info(unsigned long);
129 static void igb_watchdog(unsigned long);
130 static void igb_watchdog_task(struct work_struct *);
131 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133                                                  struct rtnl_link_stats64 *stats);
134 static int igb_change_mtu(struct net_device *, int);
135 static int igb_set_mac(struct net_device *, void *);
136 static void igb_set_uta(struct igb_adapter *adapter);
137 static irqreturn_t igb_intr(int irq, void *);
138 static irqreturn_t igb_intr_msi(int irq, void *);
139 static irqreturn_t igb_msix_other(int irq, void *);
140 static irqreturn_t igb_msix_ring(int irq, void *);
141 #ifdef CONFIG_IGB_DCA
142 static void igb_update_dca(struct igb_q_vector *);
143 static void igb_setup_dca(struct igb_adapter *);
144 #endif /* CONFIG_IGB_DCA */
145 static int igb_poll(struct napi_struct *, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector *);
147 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149 static void igb_tx_timeout(struct net_device *);
150 static void igb_reset_task(struct work_struct *);
151 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152 static int igb_vlan_rx_add_vid(struct net_device *, u16);
153 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154 static void igb_restore_vlan(struct igb_adapter *);
155 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156 static void igb_ping_all_vfs(struct igb_adapter *);
157 static void igb_msg_task(struct igb_adapter *);
158 static void igb_vmm_control(struct igb_adapter *);
159 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163                                int vf, u16 vlan, u8 qos);
164 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166                                  struct ifla_vf_info *ivi);
167 static void igb_check_vf_rate_limit(struct igb_adapter *);
168
169 #ifdef CONFIG_PCI_IOV
170 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172 static int igb_check_vf_assignment(struct igb_adapter *adapter);
173 #endif
174
175 #ifdef CONFIG_PM
176 #ifdef CONFIG_PM_SLEEP
177 static int igb_suspend(struct device *);
178 #endif
179 static int igb_resume(struct device *);
180 #ifdef CONFIG_PM_RUNTIME
181 static int igb_runtime_suspend(struct device *dev);
182 static int igb_runtime_resume(struct device *dev);
183 static int igb_runtime_idle(struct device *dev);
184 #endif
185 static const struct dev_pm_ops igb_pm_ops = {
186         SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
187         SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
188                         igb_runtime_idle)
189 };
190 #endif
191 static void igb_shutdown(struct pci_dev *);
192 #ifdef CONFIG_IGB_DCA
193 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
194 static struct notifier_block dca_notifier = {
195         .notifier_call  = igb_notify_dca,
196         .next           = NULL,
197         .priority       = 0
198 };
199 #endif
200 #ifdef CONFIG_NET_POLL_CONTROLLER
201 /* for netdump / net console */
202 static void igb_netpoll(struct net_device *);
203 #endif
204 #ifdef CONFIG_PCI_IOV
205 static unsigned int max_vfs = 0;
206 module_param(max_vfs, uint, 0);
207 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
208                  "per physical function");
209 #endif /* CONFIG_PCI_IOV */
210
211 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
212                      pci_channel_state_t);
213 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
214 static void igb_io_resume(struct pci_dev *);
215
216 static struct pci_error_handlers igb_err_handler = {
217         .error_detected = igb_io_error_detected,
218         .slot_reset = igb_io_slot_reset,
219         .resume = igb_io_resume,
220 };
221
222 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
223
224 static struct pci_driver igb_driver = {
225         .name     = igb_driver_name,
226         .id_table = igb_pci_tbl,
227         .probe    = igb_probe,
228         .remove   = __devexit_p(igb_remove),
229 #ifdef CONFIG_PM
230         .driver.pm = &igb_pm_ops,
231 #endif
232         .shutdown = igb_shutdown,
233         .err_handler = &igb_err_handler
234 };
235
236 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
237 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
238 MODULE_LICENSE("GPL");
239 MODULE_VERSION(DRV_VERSION);
240
241 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
242 static int debug = -1;
243 module_param(debug, int, 0);
244 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
245
246 struct igb_reg_info {
247         u32 ofs;
248         char *name;
249 };
250
251 static const struct igb_reg_info igb_reg_info_tbl[] = {
252
253         /* General Registers */
254         {E1000_CTRL, "CTRL"},
255         {E1000_STATUS, "STATUS"},
256         {E1000_CTRL_EXT, "CTRL_EXT"},
257
258         /* Interrupt Registers */
259         {E1000_ICR, "ICR"},
260
261         /* RX Registers */
262         {E1000_RCTL, "RCTL"},
263         {E1000_RDLEN(0), "RDLEN"},
264         {E1000_RDH(0), "RDH"},
265         {E1000_RDT(0), "RDT"},
266         {E1000_RXDCTL(0), "RXDCTL"},
267         {E1000_RDBAL(0), "RDBAL"},
268         {E1000_RDBAH(0), "RDBAH"},
269
270         /* TX Registers */
271         {E1000_TCTL, "TCTL"},
272         {E1000_TDBAL(0), "TDBAL"},
273         {E1000_TDBAH(0), "TDBAH"},
274         {E1000_TDLEN(0), "TDLEN"},
275         {E1000_TDH(0), "TDH"},
276         {E1000_TDT(0), "TDT"},
277         {E1000_TXDCTL(0), "TXDCTL"},
278         {E1000_TDFH, "TDFH"},
279         {E1000_TDFT, "TDFT"},
280         {E1000_TDFHS, "TDFHS"},
281         {E1000_TDFPC, "TDFPC"},
282
283         /* List Terminator */
284         {}
285 };
286
287 /*
288  * igb_regdump - register printout routine
289  */
290 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
291 {
292         int n = 0;
293         char rname[16];
294         u32 regs[8];
295
296         switch (reginfo->ofs) {
297         case E1000_RDLEN(0):
298                 for (n = 0; n < 4; n++)
299                         regs[n] = rd32(E1000_RDLEN(n));
300                 break;
301         case E1000_RDH(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_RDH(n));
304                 break;
305         case E1000_RDT(0):
306                 for (n = 0; n < 4; n++)
307                         regs[n] = rd32(E1000_RDT(n));
308                 break;
309         case E1000_RXDCTL(0):
310                 for (n = 0; n < 4; n++)
311                         regs[n] = rd32(E1000_RXDCTL(n));
312                 break;
313         case E1000_RDBAL(0):
314                 for (n = 0; n < 4; n++)
315                         regs[n] = rd32(E1000_RDBAL(n));
316                 break;
317         case E1000_RDBAH(0):
318                 for (n = 0; n < 4; n++)
319                         regs[n] = rd32(E1000_RDBAH(n));
320                 break;
321         case E1000_TDBAL(0):
322                 for (n = 0; n < 4; n++)
323                         regs[n] = rd32(E1000_RDBAL(n));
324                 break;
325         case E1000_TDBAH(0):
326                 for (n = 0; n < 4; n++)
327                         regs[n] = rd32(E1000_TDBAH(n));
328                 break;
329         case E1000_TDLEN(0):
330                 for (n = 0; n < 4; n++)
331                         regs[n] = rd32(E1000_TDLEN(n));
332                 break;
333         case E1000_TDH(0):
334                 for (n = 0; n < 4; n++)
335                         regs[n] = rd32(E1000_TDH(n));
336                 break;
337         case E1000_TDT(0):
338                 for (n = 0; n < 4; n++)
339                         regs[n] = rd32(E1000_TDT(n));
340                 break;
341         case E1000_TXDCTL(0):
342                 for (n = 0; n < 4; n++)
343                         regs[n] = rd32(E1000_TXDCTL(n));
344                 break;
345         default:
346                 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
347                 return;
348         }
349
350         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
351         pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
352                 regs[2], regs[3]);
353 }
354
355 /*
356  * igb_dump - Print registers, tx-rings and rx-rings
357  */
358 static void igb_dump(struct igb_adapter *adapter)
359 {
360         struct net_device *netdev = adapter->netdev;
361         struct e1000_hw *hw = &adapter->hw;
362         struct igb_reg_info *reginfo;
363         struct igb_ring *tx_ring;
364         union e1000_adv_tx_desc *tx_desc;
365         struct my_u0 { u64 a; u64 b; } *u0;
366         struct igb_ring *rx_ring;
367         union e1000_adv_rx_desc *rx_desc;
368         u32 staterr;
369         u16 i, n;
370
371         if (!netif_msg_hw(adapter))
372                 return;
373
374         /* Print netdevice Info */
375         if (netdev) {
376                 dev_info(&adapter->pdev->dev, "Net device Info\n");
377                 pr_info("Device Name     state            trans_start      "
378                         "last_rx\n");
379                 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
380                         netdev->state, netdev->trans_start, netdev->last_rx);
381         }
382
383         /* Print Registers */
384         dev_info(&adapter->pdev->dev, "Register Dump\n");
385         pr_info(" Register Name   Value\n");
386         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
387              reginfo->name; reginfo++) {
388                 igb_regdump(hw, reginfo);
389         }
390
391         /* Print TX Ring Summary */
392         if (!netdev || !netif_running(netdev))
393                 goto exit;
394
395         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
396         pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
397         for (n = 0; n < adapter->num_tx_queues; n++) {
398                 struct igb_tx_buffer *buffer_info;
399                 tx_ring = adapter->tx_ring[n];
400                 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
401                 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
402                         n, tx_ring->next_to_use, tx_ring->next_to_clean,
403                         (u64)buffer_info->dma,
404                         buffer_info->length,
405                         buffer_info->next_to_watch,
406                         (u64)buffer_info->time_stamp);
407         }
408
409         /* Print TX Rings */
410         if (!netif_msg_tx_done(adapter))
411                 goto rx_ring_summary;
412
413         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
414
415         /* Transmit Descriptor Formats
416          *
417          * Advanced Transmit Descriptor
418          *   +--------------------------------------------------------------+
419          * 0 |         Buffer Address [63:0]                                |
420          *   +--------------------------------------------------------------+
421          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
422          *   +--------------------------------------------------------------+
423          *   63      46 45    40 39 38 36 35 32 31   24             15       0
424          */
425
426         for (n = 0; n < adapter->num_tx_queues; n++) {
427                 tx_ring = adapter->tx_ring[n];
428                 pr_info("------------------------------------\n");
429                 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
430                 pr_info("------------------------------------\n");
431                 pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
432                         "[bi->dma       ] leng  ntw timestamp        "
433                         "bi->skb\n");
434
435                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
436                         const char *next_desc;
437                         struct igb_tx_buffer *buffer_info;
438                         tx_desc = IGB_TX_DESC(tx_ring, i);
439                         buffer_info = &tx_ring->tx_buffer_info[i];
440                         u0 = (struct my_u0 *)tx_desc;
441                         if (i == tx_ring->next_to_use &&
442                             i == tx_ring->next_to_clean)
443                                 next_desc = " NTC/U";
444                         else if (i == tx_ring->next_to_use)
445                                 next_desc = " NTU";
446                         else if (i == tx_ring->next_to_clean)
447                                 next_desc = " NTC";
448                         else
449                                 next_desc = "";
450
451                         pr_info("T [0x%03X]    %016llX %016llX %016llX"
452                                 " %04X  %p %016llX %p%s\n", i,
453                                 le64_to_cpu(u0->a),
454                                 le64_to_cpu(u0->b),
455                                 (u64)buffer_info->dma,
456                                 buffer_info->length,
457                                 buffer_info->next_to_watch,
458                                 (u64)buffer_info->time_stamp,
459                                 buffer_info->skb, next_desc);
460
461                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
462                                 print_hex_dump(KERN_INFO, "",
463                                         DUMP_PREFIX_ADDRESS,
464                                         16, 1, phys_to_virt(buffer_info->dma),
465                                         buffer_info->length, true);
466                 }
467         }
468
469         /* Print RX Rings Summary */
470 rx_ring_summary:
471         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
472         pr_info("Queue [NTU] [NTC]\n");
473         for (n = 0; n < adapter->num_rx_queues; n++) {
474                 rx_ring = adapter->rx_ring[n];
475                 pr_info(" %5d %5X %5X\n",
476                         n, rx_ring->next_to_use, rx_ring->next_to_clean);
477         }
478
479         /* Print RX Rings */
480         if (!netif_msg_rx_status(adapter))
481                 goto exit;
482
483         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
484
485         /* Advanced Receive Descriptor (Read) Format
486          *    63                                           1        0
487          *    +-----------------------------------------------------+
488          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
489          *    +----------------------------------------------+------+
490          *  8 |       Header Buffer Address [63:1]           |  DD  |
491          *    +-----------------------------------------------------+
492          *
493          *
494          * Advanced Receive Descriptor (Write-Back) Format
495          *
496          *   63       48 47    32 31  30      21 20 17 16   4 3     0
497          *   +------------------------------------------------------+
498          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
499          *   | Checksum   Ident  |   |           |    | Type | Type |
500          *   +------------------------------------------------------+
501          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
502          *   +------------------------------------------------------+
503          *   63       48 47    32 31            20 19               0
504          */
505
506         for (n = 0; n < adapter->num_rx_queues; n++) {
507                 rx_ring = adapter->rx_ring[n];
508                 pr_info("------------------------------------\n");
509                 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
510                 pr_info("------------------------------------\n");
511                 pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
512                         "[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
513                 pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
514                         "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
515
516                 for (i = 0; i < rx_ring->count; i++) {
517                         const char *next_desc;
518                         struct igb_rx_buffer *buffer_info;
519                         buffer_info = &rx_ring->rx_buffer_info[i];
520                         rx_desc = IGB_RX_DESC(rx_ring, i);
521                         u0 = (struct my_u0 *)rx_desc;
522                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
523
524                         if (i == rx_ring->next_to_use)
525                                 next_desc = " NTU";
526                         else if (i == rx_ring->next_to_clean)
527                                 next_desc = " NTC";
528                         else
529                                 next_desc = "";
530
531                         if (staterr & E1000_RXD_STAT_DD) {
532                                 /* Descriptor Done */
533                                 pr_info("%s[0x%03X]     %016llX %016llX -------"
534                                         "--------- %p%s\n", "RWB", i,
535                                         le64_to_cpu(u0->a),
536                                         le64_to_cpu(u0->b),
537                                         buffer_info->skb, next_desc);
538                         } else {
539                                 pr_info("%s[0x%03X]     %016llX %016llX %016llX"
540                                         " %p%s\n", "R  ", i,
541                                         le64_to_cpu(u0->a),
542                                         le64_to_cpu(u0->b),
543                                         (u64)buffer_info->dma,
544                                         buffer_info->skb, next_desc);
545
546                                 if (netif_msg_pktdata(adapter)) {
547                                         print_hex_dump(KERN_INFO, "",
548                                                 DUMP_PREFIX_ADDRESS,
549                                                 16, 1,
550                                                 phys_to_virt(buffer_info->dma),
551                                                 IGB_RX_HDR_LEN, true);
552                                         print_hex_dump(KERN_INFO, "",
553                                           DUMP_PREFIX_ADDRESS,
554                                           16, 1,
555                                           phys_to_virt(
556                                             buffer_info->page_dma +
557                                             buffer_info->page_offset),
558                                           PAGE_SIZE/2, true);
559                                 }
560                         }
561                 }
562         }
563
564 exit:
565         return;
566 }
567
568
569 /**
570  * igb_read_clock - read raw cycle counter (to be used by time counter)
571  */
572 static cycle_t igb_read_clock(const struct cyclecounter *tc)
573 {
574         struct igb_adapter *adapter =
575                 container_of(tc, struct igb_adapter, cycles);
576         struct e1000_hw *hw = &adapter->hw;
577         u64 stamp = 0;
578         int shift = 0;
579
580         /*
581          * The timestamp latches on lowest register read. For the 82580
582          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
583          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
584          */
585         if (hw->mac.type >= e1000_82580) {
586                 stamp = rd32(E1000_SYSTIMR) >> 8;
587                 shift = IGB_82580_TSYNC_SHIFT;
588         }
589
590         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
591         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
592         return stamp;
593 }
594
595 /**
596  * igb_get_hw_dev - return device
597  * used by hardware layer to print debugging information
598  **/
599 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
600 {
601         struct igb_adapter *adapter = hw->back;
602         return adapter->netdev;
603 }
604
605 /**
606  * igb_init_module - Driver Registration Routine
607  *
608  * igb_init_module is the first routine called when the driver is
609  * loaded. All it does is register with the PCI subsystem.
610  **/
611 static int __init igb_init_module(void)
612 {
613         int ret;
614         pr_info("%s - version %s\n",
615                igb_driver_string, igb_driver_version);
616
617         pr_info("%s\n", igb_copyright);
618
619 #ifdef CONFIG_IGB_DCA
620         dca_register_notify(&dca_notifier);
621 #endif
622         ret = pci_register_driver(&igb_driver);
623         return ret;
624 }
625
626 module_init(igb_init_module);
627
628 /**
629  * igb_exit_module - Driver Exit Cleanup Routine
630  *
631  * igb_exit_module is called just before the driver is removed
632  * from memory.
633  **/
634 static void __exit igb_exit_module(void)
635 {
636 #ifdef CONFIG_IGB_DCA
637         dca_unregister_notify(&dca_notifier);
638 #endif
639         pci_unregister_driver(&igb_driver);
640 }
641
642 module_exit(igb_exit_module);
643
644 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
645 /**
646  * igb_cache_ring_register - Descriptor ring to register mapping
647  * @adapter: board private structure to initialize
648  *
649  * Once we know the feature-set enabled for the device, we'll cache
650  * the register offset the descriptor ring is assigned to.
651  **/
652 static void igb_cache_ring_register(struct igb_adapter *adapter)
653 {
654         int i = 0, j = 0;
655         u32 rbase_offset = adapter->vfs_allocated_count;
656
657         switch (adapter->hw.mac.type) {
658         case e1000_82576:
659                 /* The queues are allocated for virtualization such that VF 0
660                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
661                  * In order to avoid collision we start at the first free queue
662                  * and continue consuming queues in the same sequence
663                  */
664                 if (adapter->vfs_allocated_count) {
665                         for (; i < adapter->rss_queues; i++)
666                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
667                                                                Q_IDX_82576(i);
668                 }
669         case e1000_82575:
670         case e1000_82580:
671         case e1000_i350:
672         default:
673                 for (; i < adapter->num_rx_queues; i++)
674                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
675                 for (; j < adapter->num_tx_queues; j++)
676                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
677                 break;
678         }
679 }
680
681 static void igb_free_queues(struct igb_adapter *adapter)
682 {
683         int i;
684
685         for (i = 0; i < adapter->num_tx_queues; i++) {
686                 kfree(adapter->tx_ring[i]);
687                 adapter->tx_ring[i] = NULL;
688         }
689         for (i = 0; i < adapter->num_rx_queues; i++) {
690                 kfree(adapter->rx_ring[i]);
691                 adapter->rx_ring[i] = NULL;
692         }
693         adapter->num_rx_queues = 0;
694         adapter->num_tx_queues = 0;
695 }
696
697 /**
698  * igb_alloc_queues - Allocate memory for all rings
699  * @adapter: board private structure to initialize
700  *
701  * We allocate one ring per queue at run-time since we don't know the
702  * number of queues at compile-time.
703  **/
704 static int igb_alloc_queues(struct igb_adapter *adapter)
705 {
706         struct igb_ring *ring;
707         int i;
708         int orig_node = adapter->node;
709
710         for (i = 0; i < adapter->num_tx_queues; i++) {
711                 if (orig_node == -1) {
712                         int cur_node = next_online_node(adapter->node);
713                         if (cur_node == MAX_NUMNODES)
714                                 cur_node = first_online_node;
715                         adapter->node = cur_node;
716                 }
717                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
718                                     adapter->node);
719                 if (!ring)
720                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
721                 if (!ring)
722                         goto err;
723                 ring->count = adapter->tx_ring_count;
724                 ring->queue_index = i;
725                 ring->dev = &adapter->pdev->dev;
726                 ring->netdev = adapter->netdev;
727                 ring->numa_node = adapter->node;
728                 /* For 82575, context index must be unique per ring. */
729                 if (adapter->hw.mac.type == e1000_82575)
730                         set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
731                 adapter->tx_ring[i] = ring;
732         }
733         /* Restore the adapter's original node */
734         adapter->node = orig_node;
735
736         for (i = 0; i < adapter->num_rx_queues; i++) {
737                 if (orig_node == -1) {
738                         int cur_node = next_online_node(adapter->node);
739                         if (cur_node == MAX_NUMNODES)
740                                 cur_node = first_online_node;
741                         adapter->node = cur_node;
742                 }
743                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
744                                     adapter->node);
745                 if (!ring)
746                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
747                 if (!ring)
748                         goto err;
749                 ring->count = adapter->rx_ring_count;
750                 ring->queue_index = i;
751                 ring->dev = &adapter->pdev->dev;
752                 ring->netdev = adapter->netdev;
753                 ring->numa_node = adapter->node;
754                 /* set flag indicating ring supports SCTP checksum offload */
755                 if (adapter->hw.mac.type >= e1000_82576)
756                         set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
757
758                 /* On i350, loopback VLAN packets have the tag byte-swapped. */
759                 if (adapter->hw.mac.type == e1000_i350)
760                         set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
761
762                 adapter->rx_ring[i] = ring;
763         }
764         /* Restore the adapter's original node */
765         adapter->node = orig_node;
766
767         igb_cache_ring_register(adapter);
768
769         return 0;
770
771 err:
772         /* Restore the adapter's original node */
773         adapter->node = orig_node;
774         igb_free_queues(adapter);
775
776         return -ENOMEM;
777 }
778
779 /**
780  *  igb_write_ivar - configure ivar for given MSI-X vector
781  *  @hw: pointer to the HW structure
782  *  @msix_vector: vector number we are allocating to a given ring
783  *  @index: row index of IVAR register to write within IVAR table
784  *  @offset: column offset of in IVAR, should be multiple of 8
785  *
786  *  This function is intended to handle the writing of the IVAR register
787  *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
788  *  each containing an cause allocation for an Rx and Tx ring, and a
789  *  variable number of rows depending on the number of queues supported.
790  **/
791 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
792                            int index, int offset)
793 {
794         u32 ivar = array_rd32(E1000_IVAR0, index);
795
796         /* clear any bits that are currently set */
797         ivar &= ~((u32)0xFF << offset);
798
799         /* write vector and valid bit */
800         ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
801
802         array_wr32(E1000_IVAR0, index, ivar);
803 }
804
805 #define IGB_N0_QUEUE -1
806 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
807 {
808         struct igb_adapter *adapter = q_vector->adapter;
809         struct e1000_hw *hw = &adapter->hw;
810         int rx_queue = IGB_N0_QUEUE;
811         int tx_queue = IGB_N0_QUEUE;
812         u32 msixbm = 0;
813
814         if (q_vector->rx.ring)
815                 rx_queue = q_vector->rx.ring->reg_idx;
816         if (q_vector->tx.ring)
817                 tx_queue = q_vector->tx.ring->reg_idx;
818
819         switch (hw->mac.type) {
820         case e1000_82575:
821                 /* The 82575 assigns vectors using a bitmask, which matches the
822                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
823                    or more queues to a vector, we write the appropriate bits
824                    into the MSIXBM register for that vector. */
825                 if (rx_queue > IGB_N0_QUEUE)
826                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
827                 if (tx_queue > IGB_N0_QUEUE)
828                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
829                 if (!adapter->msix_entries && msix_vector == 0)
830                         msixbm |= E1000_EIMS_OTHER;
831                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
832                 q_vector->eims_value = msixbm;
833                 break;
834         case e1000_82576:
835                 /*
836                  * 82576 uses a table that essentially consists of 2 columns
837                  * with 8 rows.  The ordering is column-major so we use the
838                  * lower 3 bits as the row index, and the 4th bit as the
839                  * column offset.
840                  */
841                 if (rx_queue > IGB_N0_QUEUE)
842                         igb_write_ivar(hw, msix_vector,
843                                        rx_queue & 0x7,
844                                        (rx_queue & 0x8) << 1);
845                 if (tx_queue > IGB_N0_QUEUE)
846                         igb_write_ivar(hw, msix_vector,
847                                        tx_queue & 0x7,
848                                        ((tx_queue & 0x8) << 1) + 8);
849                 q_vector->eims_value = 1 << msix_vector;
850                 break;
851         case e1000_82580:
852         case e1000_i350:
853                 /*
854                  * On 82580 and newer adapters the scheme is similar to 82576
855                  * however instead of ordering column-major we have things
856                  * ordered row-major.  So we traverse the table by using
857                  * bit 0 as the column offset, and the remaining bits as the
858                  * row index.
859                  */
860                 if (rx_queue > IGB_N0_QUEUE)
861                         igb_write_ivar(hw, msix_vector,
862                                        rx_queue >> 1,
863                                        (rx_queue & 0x1) << 4);
864                 if (tx_queue > IGB_N0_QUEUE)
865                         igb_write_ivar(hw, msix_vector,
866                                        tx_queue >> 1,
867                                        ((tx_queue & 0x1) << 4) + 8);
868                 q_vector->eims_value = 1 << msix_vector;
869                 break;
870         default:
871                 BUG();
872                 break;
873         }
874
875         /* add q_vector eims value to global eims_enable_mask */
876         adapter->eims_enable_mask |= q_vector->eims_value;
877
878         /* configure q_vector to set itr on first interrupt */
879         q_vector->set_itr = 1;
880 }
881
882 /**
883  * igb_configure_msix - Configure MSI-X hardware
884  *
885  * igb_configure_msix sets up the hardware to properly
886  * generate MSI-X interrupts.
887  **/
888 static void igb_configure_msix(struct igb_adapter *adapter)
889 {
890         u32 tmp;
891         int i, vector = 0;
892         struct e1000_hw *hw = &adapter->hw;
893
894         adapter->eims_enable_mask = 0;
895
896         /* set vector for other causes, i.e. link changes */
897         switch (hw->mac.type) {
898         case e1000_82575:
899                 tmp = rd32(E1000_CTRL_EXT);
900                 /* enable MSI-X PBA support*/
901                 tmp |= E1000_CTRL_EXT_PBA_CLR;
902
903                 /* Auto-Mask interrupts upon ICR read. */
904                 tmp |= E1000_CTRL_EXT_EIAME;
905                 tmp |= E1000_CTRL_EXT_IRCA;
906
907                 wr32(E1000_CTRL_EXT, tmp);
908
909                 /* enable msix_other interrupt */
910                 array_wr32(E1000_MSIXBM(0), vector++,
911                                       E1000_EIMS_OTHER);
912                 adapter->eims_other = E1000_EIMS_OTHER;
913
914                 break;
915
916         case e1000_82576:
917         case e1000_82580:
918         case e1000_i350:
919                 /* Turn on MSI-X capability first, or our settings
920                  * won't stick.  And it will take days to debug. */
921                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
922                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
923                                 E1000_GPIE_NSICR);
924
925                 /* enable msix_other interrupt */
926                 adapter->eims_other = 1 << vector;
927                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
928
929                 wr32(E1000_IVAR_MISC, tmp);
930                 break;
931         default:
932                 /* do nothing, since nothing else supports MSI-X */
933                 break;
934         } /* switch (hw->mac.type) */
935
936         adapter->eims_enable_mask |= adapter->eims_other;
937
938         for (i = 0; i < adapter->num_q_vectors; i++)
939                 igb_assign_vector(adapter->q_vector[i], vector++);
940
941         wrfl();
942 }
943
944 /**
945  * igb_request_msix - Initialize MSI-X interrupts
946  *
947  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
948  * kernel.
949  **/
950 static int igb_request_msix(struct igb_adapter *adapter)
951 {
952         struct net_device *netdev = adapter->netdev;
953         struct e1000_hw *hw = &adapter->hw;
954         int i, err = 0, vector = 0;
955
956         err = request_irq(adapter->msix_entries[vector].vector,
957                           igb_msix_other, 0, netdev->name, adapter);
958         if (err)
959                 goto out;
960         vector++;
961
962         for (i = 0; i < adapter->num_q_vectors; i++) {
963                 struct igb_q_vector *q_vector = adapter->q_vector[i];
964
965                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
966
967                 if (q_vector->rx.ring && q_vector->tx.ring)
968                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
969                                 q_vector->rx.ring->queue_index);
970                 else if (q_vector->tx.ring)
971                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
972                                 q_vector->tx.ring->queue_index);
973                 else if (q_vector->rx.ring)
974                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
975                                 q_vector->rx.ring->queue_index);
976                 else
977                         sprintf(q_vector->name, "%s-unused", netdev->name);
978
979                 err = request_irq(adapter->msix_entries[vector].vector,
980                                   igb_msix_ring, 0, q_vector->name,
981                                   q_vector);
982                 if (err)
983                         goto out;
984                 vector++;
985         }
986
987         igb_configure_msix(adapter);
988         return 0;
989 out:
990         return err;
991 }
992
993 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
994 {
995         if (adapter->msix_entries) {
996                 pci_disable_msix(adapter->pdev);
997                 kfree(adapter->msix_entries);
998                 adapter->msix_entries = NULL;
999         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
1000                 pci_disable_msi(adapter->pdev);
1001         }
1002 }
1003
1004 /**
1005  * igb_free_q_vectors - Free memory allocated for interrupt vectors
1006  * @adapter: board private structure to initialize
1007  *
1008  * This function frees the memory allocated to the q_vectors.  In addition if
1009  * NAPI is enabled it will delete any references to the NAPI struct prior
1010  * to freeing the q_vector.
1011  **/
1012 static void igb_free_q_vectors(struct igb_adapter *adapter)
1013 {
1014         int v_idx;
1015
1016         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1017                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1018                 adapter->q_vector[v_idx] = NULL;
1019                 if (!q_vector)
1020                         continue;
1021                 netif_napi_del(&q_vector->napi);
1022                 kfree(q_vector);
1023         }
1024         adapter->num_q_vectors = 0;
1025 }
1026
1027 /**
1028  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1029  *
1030  * This function resets the device so that it has 0 rx queues, tx queues, and
1031  * MSI-X interrupts allocated.
1032  */
1033 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1034 {
1035         igb_free_queues(adapter);
1036         igb_free_q_vectors(adapter);
1037         igb_reset_interrupt_capability(adapter);
1038 }
1039
1040 /**
1041  * igb_set_interrupt_capability - set MSI or MSI-X if supported
1042  *
1043  * Attempt to configure interrupts using the best available
1044  * capabilities of the hardware and kernel.
1045  **/
1046 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1047 {
1048         int err;
1049         int numvecs, i;
1050
1051         /* Number of supported queues. */
1052         adapter->num_rx_queues = adapter->rss_queues;
1053         if (adapter->vfs_allocated_count)
1054                 adapter->num_tx_queues = 1;
1055         else
1056                 adapter->num_tx_queues = adapter->rss_queues;
1057
1058         /* start with one vector for every rx queue */
1059         numvecs = adapter->num_rx_queues;
1060
1061         /* if tx handler is separate add 1 for every tx queue */
1062         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1063                 numvecs += adapter->num_tx_queues;
1064
1065         /* store the number of vectors reserved for queues */
1066         adapter->num_q_vectors = numvecs;
1067
1068         /* add 1 vector for link status interrupts */
1069         numvecs++;
1070         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1071                                         GFP_KERNEL);
1072         if (!adapter->msix_entries)
1073                 goto msi_only;
1074
1075         for (i = 0; i < numvecs; i++)
1076                 adapter->msix_entries[i].entry = i;
1077
1078         err = pci_enable_msix(adapter->pdev,
1079                               adapter->msix_entries,
1080                               numvecs);
1081         if (err == 0)
1082                 goto out;
1083
1084         igb_reset_interrupt_capability(adapter);
1085
1086         /* If we can't do MSI-X, try MSI */
1087 msi_only:
1088 #ifdef CONFIG_PCI_IOV
1089         /* disable SR-IOV for non MSI-X configurations */
1090         if (adapter->vf_data) {
1091                 struct e1000_hw *hw = &adapter->hw;
1092                 /* disable iov and allow time for transactions to clear */
1093                 pci_disable_sriov(adapter->pdev);
1094                 msleep(500);
1095
1096                 kfree(adapter->vf_data);
1097                 adapter->vf_data = NULL;
1098                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1099                 wrfl();
1100                 msleep(100);
1101                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1102         }
1103 #endif
1104         adapter->vfs_allocated_count = 0;
1105         adapter->rss_queues = 1;
1106         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1107         adapter->num_rx_queues = 1;
1108         adapter->num_tx_queues = 1;
1109         adapter->num_q_vectors = 1;
1110         if (!pci_enable_msi(adapter->pdev))
1111                 adapter->flags |= IGB_FLAG_HAS_MSI;
1112 out:
1113         /* Notify the stack of the (possibly) reduced queue counts. */
1114         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1115         return netif_set_real_num_rx_queues(adapter->netdev,
1116                                             adapter->num_rx_queues);
1117 }
1118
1119 /**
1120  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1121  * @adapter: board private structure to initialize
1122  *
1123  * We allocate one q_vector per queue interrupt.  If allocation fails we
1124  * return -ENOMEM.
1125  **/
1126 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1127 {
1128         struct igb_q_vector *q_vector;
1129         struct e1000_hw *hw = &adapter->hw;
1130         int v_idx;
1131         int orig_node = adapter->node;
1132
1133         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1134                 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1135                                                 adapter->num_tx_queues)) &&
1136                     (adapter->num_rx_queues == v_idx))
1137                         adapter->node = orig_node;
1138                 if (orig_node == -1) {
1139                         int cur_node = next_online_node(adapter->node);
1140                         if (cur_node == MAX_NUMNODES)
1141                                 cur_node = first_online_node;
1142                         adapter->node = cur_node;
1143                 }
1144                 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1145                                         adapter->node);
1146                 if (!q_vector)
1147                         q_vector = kzalloc(sizeof(struct igb_q_vector),
1148                                            GFP_KERNEL);
1149                 if (!q_vector)
1150                         goto err_out;
1151                 q_vector->adapter = adapter;
1152                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1153                 q_vector->itr_val = IGB_START_ITR;
1154                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1155                 adapter->q_vector[v_idx] = q_vector;
1156         }
1157         /* Restore the adapter's original node */
1158         adapter->node = orig_node;
1159
1160         return 0;
1161
1162 err_out:
1163         /* Restore the adapter's original node */
1164         adapter->node = orig_node;
1165         igb_free_q_vectors(adapter);
1166         return -ENOMEM;
1167 }
1168
1169 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1170                                       int ring_idx, int v_idx)
1171 {
1172         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1173
1174         q_vector->rx.ring = adapter->rx_ring[ring_idx];
1175         q_vector->rx.ring->q_vector = q_vector;
1176         q_vector->rx.count++;
1177         q_vector->itr_val = adapter->rx_itr_setting;
1178         if (q_vector->itr_val && q_vector->itr_val <= 3)
1179                 q_vector->itr_val = IGB_START_ITR;
1180 }
1181
1182 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1183                                       int ring_idx, int v_idx)
1184 {
1185         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1186
1187         q_vector->tx.ring = adapter->tx_ring[ring_idx];
1188         q_vector->tx.ring->q_vector = q_vector;
1189         q_vector->tx.count++;
1190         q_vector->itr_val = adapter->tx_itr_setting;
1191         q_vector->tx.work_limit = adapter->tx_work_limit;
1192         if (q_vector->itr_val && q_vector->itr_val <= 3)
1193                 q_vector->itr_val = IGB_START_ITR;
1194 }
1195
1196 /**
1197  * igb_map_ring_to_vector - maps allocated queues to vectors
1198  *
1199  * This function maps the recently allocated queues to vectors.
1200  **/
1201 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1202 {
1203         int i;
1204         int v_idx = 0;
1205
1206         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1207             (adapter->num_q_vectors < adapter->num_tx_queues))
1208                 return -ENOMEM;
1209
1210         if (adapter->num_q_vectors >=
1211             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1212                 for (i = 0; i < adapter->num_rx_queues; i++)
1213                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1214                 for (i = 0; i < adapter->num_tx_queues; i++)
1215                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1216         } else {
1217                 for (i = 0; i < adapter->num_rx_queues; i++) {
1218                         if (i < adapter->num_tx_queues)
1219                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1220                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1221                 }
1222                 for (; i < adapter->num_tx_queues; i++)
1223                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1224         }
1225         return 0;
1226 }
1227
1228 /**
1229  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1230  *
1231  * This function initializes the interrupts and allocates all of the queues.
1232  **/
1233 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1234 {
1235         struct pci_dev *pdev = adapter->pdev;
1236         int err;
1237
1238         err = igb_set_interrupt_capability(adapter);
1239         if (err)
1240                 return err;
1241
1242         err = igb_alloc_q_vectors(adapter);
1243         if (err) {
1244                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1245                 goto err_alloc_q_vectors;
1246         }
1247
1248         err = igb_alloc_queues(adapter);
1249         if (err) {
1250                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1251                 goto err_alloc_queues;
1252         }
1253
1254         err = igb_map_ring_to_vector(adapter);
1255         if (err) {
1256                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1257                 goto err_map_queues;
1258         }
1259
1260
1261         return 0;
1262 err_map_queues:
1263         igb_free_queues(adapter);
1264 err_alloc_queues:
1265         igb_free_q_vectors(adapter);
1266 err_alloc_q_vectors:
1267         igb_reset_interrupt_capability(adapter);
1268         return err;
1269 }
1270
1271 /**
1272  * igb_request_irq - initialize interrupts
1273  *
1274  * Attempts to configure interrupts using the best available
1275  * capabilities of the hardware and kernel.
1276  **/
1277 static int igb_request_irq(struct igb_adapter *adapter)
1278 {
1279         struct net_device *netdev = adapter->netdev;
1280         struct pci_dev *pdev = adapter->pdev;
1281         int err = 0;
1282
1283         if (adapter->msix_entries) {
1284                 err = igb_request_msix(adapter);
1285                 if (!err)
1286                         goto request_done;
1287                 /* fall back to MSI */
1288                 igb_clear_interrupt_scheme(adapter);
1289                 if (!pci_enable_msi(pdev))
1290                         adapter->flags |= IGB_FLAG_HAS_MSI;
1291                 igb_free_all_tx_resources(adapter);
1292                 igb_free_all_rx_resources(adapter);
1293                 adapter->num_tx_queues = 1;
1294                 adapter->num_rx_queues = 1;
1295                 adapter->num_q_vectors = 1;
1296                 err = igb_alloc_q_vectors(adapter);
1297                 if (err) {
1298                         dev_err(&pdev->dev,
1299                                 "Unable to allocate memory for vectors\n");
1300                         goto request_done;
1301                 }
1302                 err = igb_alloc_queues(adapter);
1303                 if (err) {
1304                         dev_err(&pdev->dev,
1305                                 "Unable to allocate memory for queues\n");
1306                         igb_free_q_vectors(adapter);
1307                         goto request_done;
1308                 }
1309                 igb_setup_all_tx_resources(adapter);
1310                 igb_setup_all_rx_resources(adapter);
1311         }
1312
1313         igb_assign_vector(adapter->q_vector[0], 0);
1314
1315         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1316                 err = request_irq(pdev->irq, igb_intr_msi, 0,
1317                                   netdev->name, adapter);
1318                 if (!err)
1319                         goto request_done;
1320
1321                 /* fall back to legacy interrupts */
1322                 igb_reset_interrupt_capability(adapter);
1323                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1324         }
1325
1326         err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1327                           netdev->name, adapter);
1328
1329         if (err)
1330                 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1331                         err);
1332
1333 request_done:
1334         return err;
1335 }
1336
1337 static void igb_free_irq(struct igb_adapter *adapter)
1338 {
1339         if (adapter->msix_entries) {
1340                 int vector = 0, i;
1341
1342                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1343
1344                 for (i = 0; i < adapter->num_q_vectors; i++)
1345                         free_irq(adapter->msix_entries[vector++].vector,
1346                                  adapter->q_vector[i]);
1347         } else {
1348                 free_irq(adapter->pdev->irq, adapter);
1349         }
1350 }
1351
1352 /**
1353  * igb_irq_disable - Mask off interrupt generation on the NIC
1354  * @adapter: board private structure
1355  **/
1356 static void igb_irq_disable(struct igb_adapter *adapter)
1357 {
1358         struct e1000_hw *hw = &adapter->hw;
1359
1360         /*
1361          * we need to be careful when disabling interrupts.  The VFs are also
1362          * mapped into these registers and so clearing the bits can cause
1363          * issues on the VF drivers so we only need to clear what we set
1364          */
1365         if (adapter->msix_entries) {
1366                 u32 regval = rd32(E1000_EIAM);
1367                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1368                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1369                 regval = rd32(E1000_EIAC);
1370                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1371         }
1372
1373         wr32(E1000_IAM, 0);
1374         wr32(E1000_IMC, ~0);
1375         wrfl();
1376         if (adapter->msix_entries) {
1377                 int i;
1378                 for (i = 0; i < adapter->num_q_vectors; i++)
1379                         synchronize_irq(adapter->msix_entries[i].vector);
1380         } else {
1381                 synchronize_irq(adapter->pdev->irq);
1382         }
1383 }
1384
1385 /**
1386  * igb_irq_enable - Enable default interrupt generation settings
1387  * @adapter: board private structure
1388  **/
1389 static void igb_irq_enable(struct igb_adapter *adapter)
1390 {
1391         struct e1000_hw *hw = &adapter->hw;
1392
1393         if (adapter->msix_entries) {
1394                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1395                 u32 regval = rd32(E1000_EIAC);
1396                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1397                 regval = rd32(E1000_EIAM);
1398                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1399                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1400                 if (adapter->vfs_allocated_count) {
1401                         wr32(E1000_MBVFIMR, 0xFF);
1402                         ims |= E1000_IMS_VMMB;
1403                 }
1404                 wr32(E1000_IMS, ims);
1405         } else {
1406                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1407                                 E1000_IMS_DRSTA);
1408                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1409                                 E1000_IMS_DRSTA);
1410         }
1411 }
1412
1413 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1414 {
1415         struct e1000_hw *hw = &adapter->hw;
1416         u16 vid = adapter->hw.mng_cookie.vlan_id;
1417         u16 old_vid = adapter->mng_vlan_id;
1418
1419         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1420                 /* add VID to filter table */
1421                 igb_vfta_set(hw, vid, true);
1422                 adapter->mng_vlan_id = vid;
1423         } else {
1424                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1425         }
1426
1427         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1428             (vid != old_vid) &&
1429             !test_bit(old_vid, adapter->active_vlans)) {
1430                 /* remove VID from filter table */
1431                 igb_vfta_set(hw, old_vid, false);
1432         }
1433 }
1434
1435 /**
1436  * igb_release_hw_control - release control of the h/w to f/w
1437  * @adapter: address of board private structure
1438  *
1439  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1440  * For ASF and Pass Through versions of f/w this means that the
1441  * driver is no longer loaded.
1442  *
1443  **/
1444 static void igb_release_hw_control(struct igb_adapter *adapter)
1445 {
1446         struct e1000_hw *hw = &adapter->hw;
1447         u32 ctrl_ext;
1448
1449         /* Let firmware take over control of h/w */
1450         ctrl_ext = rd32(E1000_CTRL_EXT);
1451         wr32(E1000_CTRL_EXT,
1452                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1453 }
1454
1455 /**
1456  * igb_get_hw_control - get control of the h/w from f/w
1457  * @adapter: address of board private structure
1458  *
1459  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1460  * For ASF and Pass Through versions of f/w this means that
1461  * the driver is loaded.
1462  *
1463  **/
1464 static void igb_get_hw_control(struct igb_adapter *adapter)
1465 {
1466         struct e1000_hw *hw = &adapter->hw;
1467         u32 ctrl_ext;
1468
1469         /* Let firmware know the driver has taken over */
1470         ctrl_ext = rd32(E1000_CTRL_EXT);
1471         wr32(E1000_CTRL_EXT,
1472                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1473 }
1474
1475 /**
1476  * igb_configure - configure the hardware for RX and TX
1477  * @adapter: private board structure
1478  **/
1479 static void igb_configure(struct igb_adapter *adapter)
1480 {
1481         struct net_device *netdev = adapter->netdev;
1482         int i;
1483
1484         igb_get_hw_control(adapter);
1485         igb_set_rx_mode(netdev);
1486
1487         igb_restore_vlan(adapter);
1488
1489         igb_setup_tctl(adapter);
1490         igb_setup_mrqc(adapter);
1491         igb_setup_rctl(adapter);
1492
1493         igb_configure_tx(adapter);
1494         igb_configure_rx(adapter);
1495
1496         igb_rx_fifo_flush_82575(&adapter->hw);
1497
1498         /* call igb_desc_unused which always leaves
1499          * at least 1 descriptor unused to make sure
1500          * next_to_use != next_to_clean */
1501         for (i = 0; i < adapter->num_rx_queues; i++) {
1502                 struct igb_ring *ring = adapter->rx_ring[i];
1503                 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1504         }
1505 }
1506
1507 /**
1508  * igb_power_up_link - Power up the phy/serdes link
1509  * @adapter: address of board private structure
1510  **/
1511 void igb_power_up_link(struct igb_adapter *adapter)
1512 {
1513         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1514                 igb_power_up_phy_copper(&adapter->hw);
1515         else
1516                 igb_power_up_serdes_link_82575(&adapter->hw);
1517         igb_reset_phy(&adapter->hw);
1518 }
1519
1520 /**
1521  * igb_power_down_link - Power down the phy/serdes link
1522  * @adapter: address of board private structure
1523  */
1524 static void igb_power_down_link(struct igb_adapter *adapter)
1525 {
1526         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1527                 igb_power_down_phy_copper_82575(&adapter->hw);
1528         else
1529                 igb_shutdown_serdes_link_82575(&adapter->hw);
1530 }
1531
1532 /**
1533  * igb_up - Open the interface and prepare it to handle traffic
1534  * @adapter: board private structure
1535  **/
1536 int igb_up(struct igb_adapter *adapter)
1537 {
1538         struct e1000_hw *hw = &adapter->hw;
1539         int i;
1540
1541         /* hardware has been reset, we need to reload some things */
1542         igb_configure(adapter);
1543
1544         clear_bit(__IGB_DOWN, &adapter->state);
1545
1546         for (i = 0; i < adapter->num_q_vectors; i++)
1547                 napi_enable(&(adapter->q_vector[i]->napi));
1548
1549         if (adapter->msix_entries)
1550                 igb_configure_msix(adapter);
1551         else
1552                 igb_assign_vector(adapter->q_vector[0], 0);
1553
1554         /* Clear any pending interrupts. */
1555         rd32(E1000_ICR);
1556         igb_irq_enable(adapter);
1557
1558         /* notify VFs that reset has been completed */
1559         if (adapter->vfs_allocated_count) {
1560                 u32 reg_data = rd32(E1000_CTRL_EXT);
1561                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1562                 wr32(E1000_CTRL_EXT, reg_data);
1563         }
1564
1565         netif_tx_start_all_queues(adapter->netdev);
1566
1567         /* start the watchdog. */
1568         hw->mac.get_link_status = 1;
1569         schedule_work(&adapter->watchdog_task);
1570
1571         return 0;
1572 }
1573
1574 void igb_down(struct igb_adapter *adapter)
1575 {
1576         struct net_device *netdev = adapter->netdev;
1577         struct e1000_hw *hw = &adapter->hw;
1578         u32 tctl, rctl;
1579         int i;
1580
1581         /* signal that we're down so the interrupt handler does not
1582          * reschedule our watchdog timer */
1583         set_bit(__IGB_DOWN, &adapter->state);
1584
1585         /* disable receives in the hardware */
1586         rctl = rd32(E1000_RCTL);
1587         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1588         /* flush and sleep below */
1589
1590         netif_tx_stop_all_queues(netdev);
1591
1592         /* disable transmits in the hardware */
1593         tctl = rd32(E1000_TCTL);
1594         tctl &= ~E1000_TCTL_EN;
1595         wr32(E1000_TCTL, tctl);
1596         /* flush both disables and wait for them to finish */
1597         wrfl();
1598         msleep(10);
1599
1600         for (i = 0; i < adapter->num_q_vectors; i++)
1601                 napi_disable(&(adapter->q_vector[i]->napi));
1602
1603         igb_irq_disable(adapter);
1604
1605         del_timer_sync(&adapter->watchdog_timer);
1606         del_timer_sync(&adapter->phy_info_timer);
1607
1608         netif_carrier_off(netdev);
1609
1610         /* record the stats before reset*/
1611         spin_lock(&adapter->stats64_lock);
1612         igb_update_stats(adapter, &adapter->stats64);
1613         spin_unlock(&adapter->stats64_lock);
1614
1615         adapter->link_speed = 0;
1616         adapter->link_duplex = 0;
1617
1618         if (!pci_channel_offline(adapter->pdev))
1619                 igb_reset(adapter);
1620         igb_clean_all_tx_rings(adapter);
1621         igb_clean_all_rx_rings(adapter);
1622 #ifdef CONFIG_IGB_DCA
1623
1624         /* since we reset the hardware DCA settings were cleared */
1625         igb_setup_dca(adapter);
1626 #endif
1627 }
1628
1629 void igb_reinit_locked(struct igb_adapter *adapter)
1630 {
1631         WARN_ON(in_interrupt());
1632         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1633                 msleep(1);
1634         igb_down(adapter);
1635         igb_up(adapter);
1636         clear_bit(__IGB_RESETTING, &adapter->state);
1637 }
1638
1639 void igb_reset(struct igb_adapter *adapter)
1640 {
1641         struct pci_dev *pdev = adapter->pdev;
1642         struct e1000_hw *hw = &adapter->hw;
1643         struct e1000_mac_info *mac = &hw->mac;
1644         struct e1000_fc_info *fc = &hw->fc;
1645         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1646         u16 hwm;
1647
1648         /* Repartition Pba for greater than 9k mtu
1649          * To take effect CTRL.RST is required.
1650          */
1651         switch (mac->type) {
1652         case e1000_i350:
1653         case e1000_82580:
1654                 pba = rd32(E1000_RXPBS);
1655                 pba = igb_rxpbs_adjust_82580(pba);
1656                 break;
1657         case e1000_82576:
1658                 pba = rd32(E1000_RXPBS);
1659                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1660                 break;
1661         case e1000_82575:
1662         default:
1663                 pba = E1000_PBA_34K;
1664                 break;
1665         }
1666
1667         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1668             (mac->type < e1000_82576)) {
1669                 /* adjust PBA for jumbo frames */
1670                 wr32(E1000_PBA, pba);
1671
1672                 /* To maintain wire speed transmits, the Tx FIFO should be
1673                  * large enough to accommodate two full transmit packets,
1674                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1675                  * the Rx FIFO should be large enough to accommodate at least
1676                  * one full receive packet and is similarly rounded up and
1677                  * expressed in KB. */
1678                 pba = rd32(E1000_PBA);
1679                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1680                 tx_space = pba >> 16;
1681                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1682                 pba &= 0xffff;
1683                 /* the tx fifo also stores 16 bytes of information about the tx
1684                  * but don't include ethernet FCS because hardware appends it */
1685                 min_tx_space = (adapter->max_frame_size +
1686                                 sizeof(union e1000_adv_tx_desc) -
1687                                 ETH_FCS_LEN) * 2;
1688                 min_tx_space = ALIGN(min_tx_space, 1024);
1689                 min_tx_space >>= 10;
1690                 /* software strips receive CRC, so leave room for it */
1691                 min_rx_space = adapter->max_frame_size;
1692                 min_rx_space = ALIGN(min_rx_space, 1024);
1693                 min_rx_space >>= 10;
1694
1695                 /* If current Tx allocation is less than the min Tx FIFO size,
1696                  * and the min Tx FIFO size is less than the current Rx FIFO
1697                  * allocation, take space away from current Rx allocation */
1698                 if (tx_space < min_tx_space &&
1699                     ((min_tx_space - tx_space) < pba)) {
1700                         pba = pba - (min_tx_space - tx_space);
1701
1702                         /* if short on rx space, rx wins and must trump tx
1703                          * adjustment */
1704                         if (pba < min_rx_space)
1705                                 pba = min_rx_space;
1706                 }
1707                 wr32(E1000_PBA, pba);
1708         }
1709
1710         /* flow control settings */
1711         /* The high water mark must be low enough to fit one full frame
1712          * (or the size used for early receive) above it in the Rx FIFO.
1713          * Set it to the lower of:
1714          * - 90% of the Rx FIFO size, or
1715          * - the full Rx FIFO size minus one full frame */
1716         hwm = min(((pba << 10) * 9 / 10),
1717                         ((pba << 10) - 2 * adapter->max_frame_size));
1718
1719         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1720         fc->low_water = fc->high_water - 16;
1721         fc->pause_time = 0xFFFF;
1722         fc->send_xon = 1;
1723         fc->current_mode = fc->requested_mode;
1724
1725         /* disable receive for all VFs and wait one second */
1726         if (adapter->vfs_allocated_count) {
1727                 int i;
1728                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1729                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1730
1731                 /* ping all the active vfs to let them know we are going down */
1732                 igb_ping_all_vfs(adapter);
1733
1734                 /* disable transmits and receives */
1735                 wr32(E1000_VFRE, 0);
1736                 wr32(E1000_VFTE, 0);
1737         }
1738
1739         /* Allow time for pending master requests to run */
1740         hw->mac.ops.reset_hw(hw);
1741         wr32(E1000_WUC, 0);
1742
1743         if (hw->mac.ops.init_hw(hw))
1744                 dev_err(&pdev->dev, "Hardware Error\n");
1745
1746         igb_init_dmac(adapter, pba);
1747         if (!netif_running(adapter->netdev))
1748                 igb_power_down_link(adapter);
1749
1750         igb_update_mng_vlan(adapter);
1751
1752         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1753         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1754
1755         igb_get_phy_info(hw);
1756 }
1757
1758 static netdev_features_t igb_fix_features(struct net_device *netdev,
1759         netdev_features_t features)
1760 {
1761         /*
1762          * Since there is no support for separate rx/tx vlan accel
1763          * enable/disable make sure tx flag is always in same state as rx.
1764          */
1765         if (features & NETIF_F_HW_VLAN_RX)
1766                 features |= NETIF_F_HW_VLAN_TX;
1767         else
1768                 features &= ~NETIF_F_HW_VLAN_TX;
1769
1770         return features;
1771 }
1772
1773 static int igb_set_features(struct net_device *netdev,
1774         netdev_features_t features)
1775 {
1776         netdev_features_t changed = netdev->features ^ features;
1777         struct igb_adapter *adapter = netdev_priv(netdev);
1778
1779         if (changed & NETIF_F_HW_VLAN_RX)
1780                 igb_vlan_mode(netdev, features);
1781
1782         if (!(changed & NETIF_F_RXALL))
1783                 return 0;
1784
1785         netdev->features = features;
1786
1787         if (netif_running(netdev))
1788                 igb_reinit_locked(adapter);
1789         else
1790                 igb_reset(adapter);
1791
1792         return 0;
1793 }
1794
1795 static const struct net_device_ops igb_netdev_ops = {
1796         .ndo_open               = igb_open,
1797         .ndo_stop               = igb_close,
1798         .ndo_start_xmit         = igb_xmit_frame,
1799         .ndo_get_stats64        = igb_get_stats64,
1800         .ndo_set_rx_mode        = igb_set_rx_mode,
1801         .ndo_set_mac_address    = igb_set_mac,
1802         .ndo_change_mtu         = igb_change_mtu,
1803         .ndo_do_ioctl           = igb_ioctl,
1804         .ndo_tx_timeout         = igb_tx_timeout,
1805         .ndo_validate_addr      = eth_validate_addr,
1806         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1807         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1808         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1809         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1810         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1811         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1812 #ifdef CONFIG_NET_POLL_CONTROLLER
1813         .ndo_poll_controller    = igb_netpoll,
1814 #endif
1815         .ndo_fix_features       = igb_fix_features,
1816         .ndo_set_features       = igb_set_features,
1817 };
1818
1819 /**
1820  * igb_probe - Device Initialization Routine
1821  * @pdev: PCI device information struct
1822  * @ent: entry in igb_pci_tbl
1823  *
1824  * Returns 0 on success, negative on failure
1825  *
1826  * igb_probe initializes an adapter identified by a pci_dev structure.
1827  * The OS initialization, configuring of the adapter private structure,
1828  * and a hardware reset occur.
1829  **/
1830 static int __devinit igb_probe(struct pci_dev *pdev,
1831                                const struct pci_device_id *ent)
1832 {
1833         struct net_device *netdev;
1834         struct igb_adapter *adapter;
1835         struct e1000_hw *hw;
1836         u16 eeprom_data = 0;
1837         s32 ret_val;
1838         static int global_quad_port_a; /* global quad port a indication */
1839         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1840         unsigned long mmio_start, mmio_len;
1841         int err, pci_using_dac;
1842         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1843         u8 part_str[E1000_PBANUM_LENGTH];
1844
1845         /* Catch broken hardware that put the wrong VF device ID in
1846          * the PCIe SR-IOV capability.
1847          */
1848         if (pdev->is_virtfn) {
1849                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1850                      pci_name(pdev), pdev->vendor, pdev->device);
1851                 return -EINVAL;
1852         }
1853
1854         err = pci_enable_device_mem(pdev);
1855         if (err)
1856                 return err;
1857
1858         pci_using_dac = 0;
1859         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1860         if (!err) {
1861                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1862                 if (!err)
1863                         pci_using_dac = 1;
1864         } else {
1865                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1866                 if (err) {
1867                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1868                         if (err) {
1869                                 dev_err(&pdev->dev, "No usable DMA "
1870                                         "configuration, aborting\n");
1871                                 goto err_dma;
1872                         }
1873                 }
1874         }
1875
1876         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1877                                            IORESOURCE_MEM),
1878                                            igb_driver_name);
1879         if (err)
1880                 goto err_pci_reg;
1881
1882         pci_enable_pcie_error_reporting(pdev);
1883
1884         pci_set_master(pdev);
1885         pci_save_state(pdev);
1886
1887         err = -ENOMEM;
1888         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1889                                    IGB_MAX_TX_QUEUES);
1890         if (!netdev)
1891                 goto err_alloc_etherdev;
1892
1893         SET_NETDEV_DEV(netdev, &pdev->dev);
1894
1895         pci_set_drvdata(pdev, netdev);
1896         adapter = netdev_priv(netdev);
1897         adapter->netdev = netdev;
1898         adapter->pdev = pdev;
1899         hw = &adapter->hw;
1900         hw->back = adapter;
1901         adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1902
1903         mmio_start = pci_resource_start(pdev, 0);
1904         mmio_len = pci_resource_len(pdev, 0);
1905
1906         err = -EIO;
1907         hw->hw_addr = ioremap(mmio_start, mmio_len);
1908         if (!hw->hw_addr)
1909                 goto err_ioremap;
1910
1911         netdev->netdev_ops = &igb_netdev_ops;
1912         igb_set_ethtool_ops(netdev);
1913         netdev->watchdog_timeo = 5 * HZ;
1914
1915         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1916
1917         netdev->mem_start = mmio_start;
1918         netdev->mem_end = mmio_start + mmio_len;
1919
1920         /* PCI config space info */
1921         hw->vendor_id = pdev->vendor;
1922         hw->device_id = pdev->device;
1923         hw->revision_id = pdev->revision;
1924         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1925         hw->subsystem_device_id = pdev->subsystem_device;
1926
1927         /* Copy the default MAC, PHY and NVM function pointers */
1928         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1929         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1930         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1931         /* Initialize skew-specific constants */
1932         err = ei->get_invariants(hw);
1933         if (err)
1934                 goto err_sw_init;
1935
1936         /* setup the private structure */
1937         err = igb_sw_init(adapter);
1938         if (err)
1939                 goto err_sw_init;
1940
1941         igb_get_bus_info_pcie(hw);
1942
1943         hw->phy.autoneg_wait_to_complete = false;
1944
1945         /* Copper options */
1946         if (hw->phy.media_type == e1000_media_type_copper) {
1947                 hw->phy.mdix = AUTO_ALL_MODES;
1948                 hw->phy.disable_polarity_correction = false;
1949                 hw->phy.ms_type = e1000_ms_hw_default;
1950         }
1951
1952         if (igb_check_reset_block(hw))
1953                 dev_info(&pdev->dev,
1954                         "PHY reset is blocked due to SOL/IDER session.\n");
1955
1956         /*
1957          * features is initialized to 0 in allocation, it might have bits
1958          * set by igb_sw_init so we should use an or instead of an
1959          * assignment.
1960          */
1961         netdev->features |= NETIF_F_SG |
1962                             NETIF_F_IP_CSUM |
1963                             NETIF_F_IPV6_CSUM |
1964                             NETIF_F_TSO |
1965                             NETIF_F_TSO6 |
1966                             NETIF_F_RXHASH |
1967                             NETIF_F_RXCSUM |
1968                             NETIF_F_HW_VLAN_RX |
1969                             NETIF_F_HW_VLAN_TX;
1970
1971         /* copy netdev features into list of user selectable features */
1972         netdev->hw_features |= netdev->features;
1973         netdev->hw_features |= NETIF_F_RXALL;
1974
1975         /* set this bit last since it cannot be part of hw_features */
1976         netdev->features |= NETIF_F_HW_VLAN_FILTER;
1977
1978         netdev->vlan_features |= NETIF_F_TSO |
1979                                  NETIF_F_TSO6 |
1980                                  NETIF_F_IP_CSUM |
1981                                  NETIF_F_IPV6_CSUM |
1982                                  NETIF_F_SG;
1983
1984         netdev->priv_flags |= IFF_SUPP_NOFCS;
1985
1986         if (pci_using_dac) {
1987                 netdev->features |= NETIF_F_HIGHDMA;
1988                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1989         }
1990
1991         if (hw->mac.type >= e1000_82576) {
1992                 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1993                 netdev->features |= NETIF_F_SCTP_CSUM;
1994         }
1995
1996         netdev->priv_flags |= IFF_UNICAST_FLT;
1997
1998         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1999
2000         /* before reading the NVM, reset the controller to put the device in a
2001          * known good starting state */
2002         hw->mac.ops.reset_hw(hw);
2003
2004         /* make sure the NVM is good */
2005         if (hw->nvm.ops.validate(hw) < 0) {
2006                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2007                 err = -EIO;
2008                 goto err_eeprom;
2009         }
2010
2011         /* copy the MAC address out of the NVM */
2012         if (hw->mac.ops.read_mac_addr(hw))
2013                 dev_err(&pdev->dev, "NVM Read Error\n");
2014
2015         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2016         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2017
2018         if (!is_valid_ether_addr(netdev->perm_addr)) {
2019                 dev_err(&pdev->dev, "Invalid MAC Address\n");
2020                 err = -EIO;
2021                 goto err_eeprom;
2022         }
2023
2024         setup_timer(&adapter->watchdog_timer, igb_watchdog,
2025                     (unsigned long) adapter);
2026         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2027                     (unsigned long) adapter);
2028
2029         INIT_WORK(&adapter->reset_task, igb_reset_task);
2030         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2031
2032         /* Initialize link properties that are user-changeable */
2033         adapter->fc_autoneg = true;
2034         hw->mac.autoneg = true;
2035         hw->phy.autoneg_advertised = 0x2f;
2036
2037         hw->fc.requested_mode = e1000_fc_default;
2038         hw->fc.current_mode = e1000_fc_default;
2039
2040         igb_validate_mdi_setting(hw);
2041
2042         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2043          * enable the ACPI Magic Packet filter
2044          */
2045
2046         if (hw->bus.func == 0)
2047                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2048         else if (hw->mac.type >= e1000_82580)
2049                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2050                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2051                                  &eeprom_data);
2052         else if (hw->bus.func == 1)
2053                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2054
2055         if (eeprom_data & eeprom_apme_mask)
2056                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2057
2058         /* now that we have the eeprom settings, apply the special cases where
2059          * the eeprom may be wrong or the board simply won't support wake on
2060          * lan on a particular port */
2061         switch (pdev->device) {
2062         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2063                 adapter->eeprom_wol = 0;
2064                 break;
2065         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2066         case E1000_DEV_ID_82576_FIBER:
2067         case E1000_DEV_ID_82576_SERDES:
2068                 /* Wake events only supported on port A for dual fiber
2069                  * regardless of eeprom setting */
2070                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2071                         adapter->eeprom_wol = 0;
2072                 break;
2073         case E1000_DEV_ID_82576_QUAD_COPPER:
2074         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2075                 /* if quad port adapter, disable WoL on all but port A */
2076                 if (global_quad_port_a != 0)
2077                         adapter->eeprom_wol = 0;
2078                 else
2079                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2080                 /* Reset for multiple quad port adapters */
2081                 if (++global_quad_port_a == 4)
2082                         global_quad_port_a = 0;
2083                 break;
2084         }
2085
2086         /* initialize the wol settings based on the eeprom settings */
2087         adapter->wol = adapter->eeprom_wol;
2088         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2089
2090         /* reset the hardware with the new settings */
2091         igb_reset(adapter);
2092
2093         /* let the f/w know that the h/w is now under the control of the
2094          * driver. */
2095         igb_get_hw_control(adapter);
2096
2097         strcpy(netdev->name, "eth%d");
2098         err = register_netdev(netdev);
2099         if (err)
2100                 goto err_register;
2101
2102         /* carrier off reporting is important to ethtool even BEFORE open */
2103         netif_carrier_off(netdev);
2104
2105 #ifdef CONFIG_IGB_DCA
2106         if (dca_add_requester(&pdev->dev) == 0) {
2107                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2108                 dev_info(&pdev->dev, "DCA enabled\n");
2109                 igb_setup_dca(adapter);
2110         }
2111
2112 #endif
2113         /* do hw tstamp init after resetting */
2114         igb_init_hw_timer(adapter);
2115
2116         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2117         /* print bus type/speed/width info */
2118         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2119                  netdev->name,
2120                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2121                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2122                                                             "unknown"),
2123                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2124                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2125                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2126                    "unknown"),
2127                  netdev->dev_addr);
2128
2129         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2130         if (ret_val)
2131                 strcpy(part_str, "Unknown");
2132         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2133         dev_info(&pdev->dev,
2134                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2135                 adapter->msix_entries ? "MSI-X" :
2136                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2137                 adapter->num_rx_queues, adapter->num_tx_queues);
2138         switch (hw->mac.type) {
2139         case e1000_i350:
2140                 igb_set_eee_i350(hw);
2141                 break;
2142         default:
2143                 break;
2144         }
2145
2146         pm_runtime_put_noidle(&pdev->dev);
2147         return 0;
2148
2149 err_register:
2150         igb_release_hw_control(adapter);
2151 err_eeprom:
2152         if (!igb_check_reset_block(hw))
2153                 igb_reset_phy(hw);
2154
2155         if (hw->flash_address)
2156                 iounmap(hw->flash_address);
2157 err_sw_init:
2158         igb_clear_interrupt_scheme(adapter);
2159         iounmap(hw->hw_addr);
2160 err_ioremap:
2161         free_netdev(netdev);
2162 err_alloc_etherdev:
2163         pci_release_selected_regions(pdev,
2164                                      pci_select_bars(pdev, IORESOURCE_MEM));
2165 err_pci_reg:
2166 err_dma:
2167         pci_disable_device(pdev);
2168         return err;
2169 }
2170
2171 /**
2172  * igb_remove - Device Removal Routine
2173  * @pdev: PCI device information struct
2174  *
2175  * igb_remove is called by the PCI subsystem to alert the driver
2176  * that it should release a PCI device.  The could be caused by a
2177  * Hot-Plug event, or because the driver is going to be removed from
2178  * memory.
2179  **/
2180 static void __devexit igb_remove(struct pci_dev *pdev)
2181 {
2182         struct net_device *netdev = pci_get_drvdata(pdev);
2183         struct igb_adapter *adapter = netdev_priv(netdev);
2184         struct e1000_hw *hw = &adapter->hw;
2185
2186         pm_runtime_get_noresume(&pdev->dev);
2187
2188         /*
2189          * The watchdog timer may be rescheduled, so explicitly
2190          * disable watchdog from being rescheduled.
2191          */
2192         set_bit(__IGB_DOWN, &adapter->state);
2193         del_timer_sync(&adapter->watchdog_timer);
2194         del_timer_sync(&adapter->phy_info_timer);
2195
2196         cancel_work_sync(&adapter->reset_task);
2197         cancel_work_sync(&adapter->watchdog_task);
2198
2199 #ifdef CONFIG_IGB_DCA
2200         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2201                 dev_info(&pdev->dev, "DCA disabled\n");
2202                 dca_remove_requester(&pdev->dev);
2203                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2204                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2205         }
2206 #endif
2207
2208         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2209          * would have already happened in close and is redundant. */
2210         igb_release_hw_control(adapter);
2211
2212         unregister_netdev(netdev);
2213
2214         igb_clear_interrupt_scheme(adapter);
2215
2216 #ifdef CONFIG_PCI_IOV
2217         /* reclaim resources allocated to VFs */
2218         if (adapter->vf_data) {
2219                 /* disable iov and allow time for transactions to clear */
2220                 if (!igb_check_vf_assignment(adapter)) {
2221                         pci_disable_sriov(pdev);
2222                         msleep(500);
2223                 } else {
2224                         dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2225                 }
2226
2227                 kfree(adapter->vf_data);
2228                 adapter->vf_data = NULL;
2229                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2230                 wrfl();
2231                 msleep(100);
2232                 dev_info(&pdev->dev, "IOV Disabled\n");
2233         }
2234 #endif
2235
2236         iounmap(hw->hw_addr);
2237         if (hw->flash_address)
2238                 iounmap(hw->flash_address);
2239         pci_release_selected_regions(pdev,
2240                                      pci_select_bars(pdev, IORESOURCE_MEM));
2241
2242         kfree(adapter->shadow_vfta);
2243         free_netdev(netdev);
2244
2245         pci_disable_pcie_error_reporting(pdev);
2246
2247         pci_disable_device(pdev);
2248 }
2249
2250 /**
2251  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2252  * @adapter: board private structure to initialize
2253  *
2254  * This function initializes the vf specific data storage and then attempts to
2255  * allocate the VFs.  The reason for ordering it this way is because it is much
2256  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2257  * the memory for the VFs.
2258  **/
2259 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2260 {
2261 #ifdef CONFIG_PCI_IOV
2262         struct pci_dev *pdev = adapter->pdev;
2263         int old_vfs = igb_find_enabled_vfs(adapter);
2264         int i;
2265
2266         if (old_vfs) {
2267                 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2268                          "max_vfs setting of %d\n", old_vfs, max_vfs);
2269                 adapter->vfs_allocated_count = old_vfs;
2270         }
2271
2272         if (!adapter->vfs_allocated_count)
2273                 return;
2274
2275         adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2276                                 sizeof(struct vf_data_storage), GFP_KERNEL);
2277         /* if allocation failed then we do not support SR-IOV */
2278         if (!adapter->vf_data) {
2279                 adapter->vfs_allocated_count = 0;
2280                 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2281                         "Data Storage\n");
2282                 goto out;
2283         }
2284
2285         if (!old_vfs) {
2286                 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2287                         goto err_out;
2288         }
2289         dev_info(&pdev->dev, "%d VFs allocated\n",
2290                  adapter->vfs_allocated_count);
2291         for (i = 0; i < adapter->vfs_allocated_count; i++)
2292                 igb_vf_configure(adapter, i);
2293
2294         /* DMA Coalescing is not supported in IOV mode. */
2295         adapter->flags &= ~IGB_FLAG_DMAC;
2296         goto out;
2297 err_out:
2298         kfree(adapter->vf_data);
2299         adapter->vf_data = NULL;
2300         adapter->vfs_allocated_count = 0;
2301 out:
2302         return;
2303 #endif /* CONFIG_PCI_IOV */
2304 }
2305
2306 /**
2307  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2308  * @adapter: board private structure to initialize
2309  *
2310  * igb_init_hw_timer initializes the function pointer and values for the hw
2311  * timer found in hardware.
2312  **/
2313 static void igb_init_hw_timer(struct igb_adapter *adapter)
2314 {
2315         struct e1000_hw *hw = &adapter->hw;
2316
2317         switch (hw->mac.type) {
2318         case e1000_i350:
2319         case e1000_82580:
2320                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2321                 adapter->cycles.read = igb_read_clock;
2322                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2323                 adapter->cycles.mult = 1;
2324                 /*
2325                  * The 82580 timesync updates the system timer every 8ns by 8ns
2326                  * and the value cannot be shifted.  Instead we need to shift
2327                  * the registers to generate a 64bit timer value.  As a result
2328                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2329                  * 24 in order to generate a larger value for synchronization.
2330                  */
2331                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2332                 /* disable system timer temporarily by setting bit 31 */
2333                 wr32(E1000_TSAUXC, 0x80000000);
2334                 wrfl();
2335
2336                 /* Set registers so that rollover occurs soon to test this. */
2337                 wr32(E1000_SYSTIMR, 0x00000000);
2338                 wr32(E1000_SYSTIML, 0x80000000);
2339                 wr32(E1000_SYSTIMH, 0x000000FF);
2340                 wrfl();
2341
2342                 /* enable system timer by clearing bit 31 */
2343                 wr32(E1000_TSAUXC, 0x0);
2344                 wrfl();
2345
2346                 timecounter_init(&adapter->clock,
2347                                  &adapter->cycles,
2348                                  ktime_to_ns(ktime_get_real()));
2349                 /*
2350                  * Synchronize our NIC clock against system wall clock. NIC
2351                  * time stamp reading requires ~3us per sample, each sample
2352                  * was pretty stable even under load => only require 10
2353                  * samples for each offset comparison.
2354                  */
2355                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2356                 adapter->compare.source = &adapter->clock;
2357                 adapter->compare.target = ktime_get_real;
2358                 adapter->compare.num_samples = 10;
2359                 timecompare_update(&adapter->compare, 0);
2360                 break;
2361         case e1000_82576:
2362                 /*
2363                  * Initialize hardware timer: we keep it running just in case
2364                  * that some program needs it later on.
2365                  */
2366                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2367                 adapter->cycles.read = igb_read_clock;
2368                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2369                 adapter->cycles.mult = 1;
2370                 /**
2371                  * Scale the NIC clock cycle by a large factor so that
2372                  * relatively small clock corrections can be added or
2373                  * subtracted at each clock tick. The drawbacks of a large
2374                  * factor are a) that the clock register overflows more quickly
2375                  * (not such a big deal) and b) that the increment per tick has
2376                  * to fit into 24 bits.  As a result we need to use a shift of
2377                  * 19 so we can fit a value of 16 into the TIMINCA register.
2378                  */
2379                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2380                 wr32(E1000_TIMINCA,
2381                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2382                                 (16 << IGB_82576_TSYNC_SHIFT));
2383
2384                 /* Set registers so that rollover occurs soon to test this. */
2385                 wr32(E1000_SYSTIML, 0x00000000);
2386                 wr32(E1000_SYSTIMH, 0xFF800000);
2387                 wrfl();
2388
2389                 timecounter_init(&adapter->clock,
2390                                  &adapter->cycles,
2391                                  ktime_to_ns(ktime_get_real()));
2392                 /*
2393                  * Synchronize our NIC clock against system wall clock. NIC
2394                  * time stamp reading requires ~3us per sample, each sample
2395                  * was pretty stable even under load => only require 10
2396                  * samples for each offset comparison.
2397                  */
2398                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2399                 adapter->compare.source = &adapter->clock;
2400                 adapter->compare.target = ktime_get_real;
2401                 adapter->compare.num_samples = 10;
2402                 timecompare_update(&adapter->compare, 0);
2403                 break;
2404         case e1000_82575:
2405                 /* 82575 does not support timesync */
2406         default:
2407                 break;
2408         }
2409
2410 }
2411
2412 /**
2413  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2414  * @adapter: board private structure to initialize
2415  *
2416  * igb_sw_init initializes the Adapter private data structure.
2417  * Fields are initialized based on PCI device information and
2418  * OS network device settings (MTU size).
2419  **/
2420 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2421 {
2422         struct e1000_hw *hw = &adapter->hw;
2423         struct net_device *netdev = adapter->netdev;
2424         struct pci_dev *pdev = adapter->pdev;
2425
2426         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2427
2428         /* set default ring sizes */
2429         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2430         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2431
2432         /* set default ITR values */
2433         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2434         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2435
2436         /* set default work limits */
2437         adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2438
2439         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2440                                   VLAN_HLEN;
2441         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2442
2443         adapter->node = -1;
2444
2445         spin_lock_init(&adapter->stats64_lock);
2446 #ifdef CONFIG_PCI_IOV
2447         switch (hw->mac.type) {
2448         case e1000_82576:
2449         case e1000_i350:
2450                 if (max_vfs > 7) {
2451                         dev_warn(&pdev->dev,
2452                                  "Maximum of 7 VFs per PF, using max\n");
2453                         adapter->vfs_allocated_count = 7;
2454                 } else
2455                         adapter->vfs_allocated_count = max_vfs;
2456                 break;
2457         default:
2458                 break;
2459         }
2460 #endif /* CONFIG_PCI_IOV */
2461         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2462         /* i350 cannot do RSS and SR-IOV at the same time */
2463         if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2464                 adapter->rss_queues = 1;
2465
2466         /*
2467          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2468          * then we should combine the queues into a queue pair in order to
2469          * conserve interrupts due to limited supply
2470          */
2471         if ((adapter->rss_queues > 4) ||
2472             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2473                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2474
2475         /* Setup and initialize a copy of the hw vlan table array */
2476         adapter->shadow_vfta = kzalloc(sizeof(u32) *
2477                                 E1000_VLAN_FILTER_TBL_SIZE,
2478                                 GFP_ATOMIC);
2479
2480         /* This call may decrease the number of queues */
2481         if (igb_init_interrupt_scheme(adapter)) {
2482                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2483                 return -ENOMEM;
2484         }
2485
2486         igb_probe_vfs(adapter);
2487
2488         /* Explicitly disable IRQ since the NIC can be in any state. */
2489         igb_irq_disable(adapter);
2490
2491         if (hw->mac.type == e1000_i350)
2492                 adapter->flags &= ~IGB_FLAG_DMAC;
2493
2494         set_bit(__IGB_DOWN, &adapter->state);
2495         return 0;
2496 }
2497
2498 /**
2499  * igb_open - Called when a network interface is made active
2500  * @netdev: network interface device structure
2501  *
2502  * Returns 0 on success, negative value on failure
2503  *
2504  * The open entry point is called when a network interface is made
2505  * active by the system (IFF_UP).  At this point all resources needed
2506  * for transmit and receive operations are allocated, the interrupt
2507  * handler is registered with the OS, the watchdog timer is started,
2508  * and the stack is notified that the interface is ready.
2509  **/
2510 static int __igb_open(struct net_device *netdev, bool resuming)
2511 {
2512         struct igb_adapter *adapter = netdev_priv(netdev);
2513         struct e1000_hw *hw = &adapter->hw;
2514         struct pci_dev *pdev = adapter->pdev;
2515         int err;
2516         int i;
2517
2518         /* disallow open during test */
2519         if (test_bit(__IGB_TESTING, &adapter->state)) {
2520                 WARN_ON(resuming);
2521                 return -EBUSY;
2522         }
2523
2524         if (!resuming)
2525                 pm_runtime_get_sync(&pdev->dev);
2526
2527         netif_carrier_off(netdev);
2528
2529         /* allocate transmit descriptors */
2530         err = igb_setup_all_tx_resources(adapter);
2531         if (err)
2532                 goto err_setup_tx;
2533
2534         /* allocate receive descriptors */
2535         err = igb_setup_all_rx_resources(adapter);
2536         if (err)
2537                 goto err_setup_rx;
2538
2539         igb_power_up_link(adapter);
2540
2541         /* before we allocate an interrupt, we must be ready to handle it.
2542          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2543          * as soon as we call pci_request_irq, so we have to setup our
2544          * clean_rx handler before we do so.  */
2545         igb_configure(adapter);
2546
2547         err = igb_request_irq(adapter);
2548         if (err)
2549                 goto err_req_irq;
2550
2551         /* From here on the code is the same as igb_up() */
2552         clear_bit(__IGB_DOWN, &adapter->state);
2553
2554         for (i = 0; i < adapter->num_q_vectors; i++)
2555                 napi_enable(&(adapter->q_vector[i]->napi));
2556
2557         /* Clear any pending interrupts. */
2558         rd32(E1000_ICR);
2559
2560         igb_irq_enable(adapter);
2561
2562         /* notify VFs that reset has been completed */
2563         if (adapter->vfs_allocated_count) {
2564                 u32 reg_data = rd32(E1000_CTRL_EXT);
2565                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2566                 wr32(E1000_CTRL_EXT, reg_data);
2567         }
2568
2569         netif_tx_start_all_queues(netdev);
2570
2571         if (!resuming)
2572                 pm_runtime_put(&pdev->dev);
2573
2574         /* start the watchdog. */
2575         hw->mac.get_link_status = 1;
2576         schedule_work(&adapter->watchdog_task);
2577
2578         return 0;
2579
2580 err_req_irq:
2581         igb_release_hw_control(adapter);
2582         igb_power_down_link(adapter);
2583         igb_free_all_rx_resources(adapter);
2584 err_setup_rx:
2585         igb_free_all_tx_resources(adapter);
2586 err_setup_tx:
2587         igb_reset(adapter);
2588         if (!resuming)
2589                 pm_runtime_put(&pdev->dev);
2590
2591         return err;
2592 }
2593
2594 static int igb_open(struct net_device *netdev)
2595 {
2596         return __igb_open(netdev, false);
2597 }
2598
2599 /**
2600  * igb_close - Disables a network interface
2601  * @netdev: network interface device structure
2602  *
2603  * Returns 0, this is not allowed to fail
2604  *
2605  * The close entry point is called when an interface is de-activated
2606  * by the OS.  The hardware is still under the driver's control, but
2607  * needs to be disabled.  A global MAC reset is issued to stop the
2608  * hardware, and all transmit and receive resources are freed.
2609  **/
2610 static int __igb_close(struct net_device *netdev, bool suspending)
2611 {
2612         struct igb_adapter *adapter = netdev_priv(netdev);
2613         struct pci_dev *pdev = adapter->pdev;
2614
2615         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2616
2617         if (!suspending)
2618                 pm_runtime_get_sync(&pdev->dev);
2619
2620         igb_down(adapter);
2621         igb_free_irq(adapter);
2622
2623         igb_free_all_tx_resources(adapter);
2624         igb_free_all_rx_resources(adapter);
2625
2626         if (!suspending)
2627                 pm_runtime_put_sync(&pdev->dev);
2628         return 0;
2629 }
2630
2631 static int igb_close(struct net_device *netdev)
2632 {
2633         return __igb_close(netdev, false);
2634 }
2635
2636 /**
2637  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2638  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2639  *
2640  * Return 0 on success, negative on failure
2641  **/
2642 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2643 {
2644         struct device *dev = tx_ring->dev;
2645         int orig_node = dev_to_node(dev);
2646         int size;
2647
2648         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2649         tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2650         if (!tx_ring->tx_buffer_info)
2651                 tx_ring->tx_buffer_info = vzalloc(size);
2652         if (!tx_ring->tx_buffer_info)
2653                 goto err;
2654
2655         /* round up to nearest 4K */
2656         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2657         tx_ring->size = ALIGN(tx_ring->size, 4096);
2658
2659         set_dev_node(dev, tx_ring->numa_node);
2660         tx_ring->desc = dma_alloc_coherent(dev,
2661                                            tx_ring->size,
2662                                            &tx_ring->dma,
2663                                            GFP_KERNEL);
2664         set_dev_node(dev, orig_node);
2665         if (!tx_ring->desc)
2666                 tx_ring->desc = dma_alloc_coherent(dev,
2667                                                    tx_ring->size,
2668                                                    &tx_ring->dma,
2669                                                    GFP_KERNEL);
2670
2671         if (!tx_ring->desc)
2672                 goto err;
2673
2674         tx_ring->next_to_use = 0;
2675         tx_ring->next_to_clean = 0;
2676
2677         return 0;
2678
2679 err:
2680         vfree(tx_ring->tx_buffer_info);
2681         dev_err(dev,
2682                 "Unable to allocate memory for the transmit descriptor ring\n");
2683         return -ENOMEM;
2684 }
2685
2686 /**
2687  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2688  *                                (Descriptors) for all queues
2689  * @adapter: board private structure
2690  *
2691  * Return 0 on success, negative on failure
2692  **/
2693 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2694 {
2695         struct pci_dev *pdev = adapter->pdev;
2696         int i, err = 0;
2697
2698         for (i = 0; i < adapter->num_tx_queues; i++) {
2699                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2700                 if (err) {
2701                         dev_err(&pdev->dev,
2702                                 "Allocation for Tx Queue %u failed\n", i);
2703                         for (i--; i >= 0; i--)
2704                                 igb_free_tx_resources(adapter->tx_ring[i]);
2705                         break;
2706                 }
2707         }
2708
2709         return err;
2710 }
2711
2712 /**
2713  * igb_setup_tctl - configure the transmit control registers
2714  * @adapter: Board private structure
2715  **/
2716 void igb_setup_tctl(struct igb_adapter *adapter)
2717 {
2718         struct e1000_hw *hw = &adapter->hw;
2719         u32 tctl;
2720
2721         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2722         wr32(E1000_TXDCTL(0), 0);
2723
2724         /* Program the Transmit Control Register */
2725         tctl = rd32(E1000_TCTL);
2726         tctl &= ~E1000_TCTL_CT;
2727         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2728                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2729
2730         igb_config_collision_dist(hw);
2731
2732         /* Enable transmits */
2733         tctl |= E1000_TCTL_EN;
2734
2735         wr32(E1000_TCTL, tctl);
2736 }
2737
2738 /**
2739  * igb_configure_tx_ring - Configure transmit ring after Reset
2740  * @adapter: board private structure
2741  * @ring: tx ring to configure
2742  *
2743  * Configure a transmit ring after a reset.
2744  **/
2745 void igb_configure_tx_ring(struct igb_adapter *adapter,
2746                            struct igb_ring *ring)
2747 {
2748         struct e1000_hw *hw = &adapter->hw;
2749         u32 txdctl = 0;
2750         u64 tdba = ring->dma;
2751         int reg_idx = ring->reg_idx;
2752
2753         /* disable the queue */
2754         wr32(E1000_TXDCTL(reg_idx), 0);
2755         wrfl();
2756         mdelay(10);
2757
2758         wr32(E1000_TDLEN(reg_idx),
2759                         ring->count * sizeof(union e1000_adv_tx_desc));
2760         wr32(E1000_TDBAL(reg_idx),
2761                         tdba & 0x00000000ffffffffULL);
2762         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2763
2764         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2765         wr32(E1000_TDH(reg_idx), 0);
2766         writel(0, ring->tail);
2767
2768         txdctl |= IGB_TX_PTHRESH;
2769         txdctl |= IGB_TX_HTHRESH << 8;
2770         txdctl |= IGB_TX_WTHRESH << 16;
2771
2772         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2773         wr32(E1000_TXDCTL(reg_idx), txdctl);
2774
2775         netdev_tx_reset_queue(txring_txq(ring));
2776 }
2777
2778 /**
2779  * igb_configure_tx - Configure transmit Unit after Reset
2780  * @adapter: board private structure
2781  *
2782  * Configure the Tx unit of the MAC after a reset.
2783  **/
2784 static void igb_configure_tx(struct igb_adapter *adapter)
2785 {
2786         int i;
2787
2788         for (i = 0; i < adapter->num_tx_queues; i++)
2789                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2790 }
2791
2792 /**
2793  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2794  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2795  *
2796  * Returns 0 on success, negative on failure
2797  **/
2798 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2799 {
2800         struct device *dev = rx_ring->dev;
2801         int orig_node = dev_to_node(dev);
2802         int size, desc_len;
2803
2804         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2805         rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2806         if (!rx_ring->rx_buffer_info)
2807                 rx_ring->rx_buffer_info = vzalloc(size);
2808         if (!rx_ring->rx_buffer_info)
2809                 goto err;
2810
2811         desc_len = sizeof(union e1000_adv_rx_desc);
2812
2813         /* Round up to nearest 4K */
2814         rx_ring->size = rx_ring->count * desc_len;
2815         rx_ring->size = ALIGN(rx_ring->size, 4096);
2816
2817         set_dev_node(dev, rx_ring->numa_node);
2818         rx_ring->desc = dma_alloc_coherent(dev,
2819                                            rx_ring->size,
2820                                            &rx_ring->dma,
2821                                            GFP_KERNEL);
2822         set_dev_node(dev, orig_node);
2823         if (!rx_ring->desc)
2824                 rx_ring->desc = dma_alloc_coherent(dev,
2825                                                    rx_ring->size,
2826                                                    &rx_ring->dma,
2827                                                    GFP_KERNEL);
2828
2829         if (!rx_ring->desc)
2830                 goto err;
2831
2832         rx_ring->next_to_clean = 0;
2833         rx_ring->next_to_use = 0;
2834
2835         return 0;
2836
2837 err:
2838         vfree(rx_ring->rx_buffer_info);
2839         rx_ring->rx_buffer_info = NULL;
2840         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2841                 " ring\n");
2842         return -ENOMEM;
2843 }
2844
2845 /**
2846  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2847  *                                (Descriptors) for all queues
2848  * @adapter: board private structure
2849  *
2850  * Return 0 on success, negative on failure
2851  **/
2852 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2853 {
2854         struct pci_dev *pdev = adapter->pdev;
2855         int i, err = 0;
2856
2857         for (i = 0; i < adapter->num_rx_queues; i++) {
2858                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2859                 if (err) {
2860                         dev_err(&pdev->dev,
2861                                 "Allocation for Rx Queue %u failed\n", i);
2862                         for (i--; i >= 0; i--)
2863                                 igb_free_rx_resources(adapter->rx_ring[i]);
2864                         break;
2865                 }
2866         }
2867
2868         return err;
2869 }
2870
2871 /**
2872  * igb_setup_mrqc - configure the multiple receive queue control registers
2873  * @adapter: Board private structure
2874  **/
2875 static void igb_setup_mrqc(struct igb_adapter *adapter)
2876 {
2877         struct e1000_hw *hw = &adapter->hw;
2878         u32 mrqc, rxcsum;
2879         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2880         union e1000_reta {
2881                 u32 dword;
2882                 u8  bytes[4];
2883         } reta;
2884         static const u8 rsshash[40] = {
2885                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2886                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2887                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2888                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2889
2890         /* Fill out hash function seeds */
2891         for (j = 0; j < 10; j++) {
2892                 u32 rsskey = rsshash[(j * 4)];
2893                 rsskey |= rsshash[(j * 4) + 1] << 8;
2894                 rsskey |= rsshash[(j * 4) + 2] << 16;
2895                 rsskey |= rsshash[(j * 4) + 3] << 24;
2896                 array_wr32(E1000_RSSRK(0), j, rsskey);
2897         }
2898
2899         num_rx_queues = adapter->rss_queues;
2900
2901         if (adapter->vfs_allocated_count) {
2902                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2903                 switch (hw->mac.type) {
2904                 case e1000_i350:
2905                 case e1000_82580:
2906                         num_rx_queues = 1;
2907                         shift = 0;
2908                         break;
2909                 case e1000_82576:
2910                         shift = 3;
2911                         num_rx_queues = 2;
2912                         break;
2913                 case e1000_82575:
2914                         shift = 2;
2915                         shift2 = 6;
2916                 default:
2917                         break;
2918                 }
2919         } else {
2920                 if (hw->mac.type == e1000_82575)
2921                         shift = 6;
2922         }
2923
2924         for (j = 0; j < (32 * 4); j++) {
2925                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2926                 if (shift2)
2927                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2928                 if ((j & 3) == 3)
2929                         wr32(E1000_RETA(j >> 2), reta.dword);
2930         }
2931
2932         /*
2933          * Disable raw packet checksumming so that RSS hash is placed in
2934          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2935          * offloads as they are enabled by default
2936          */
2937         rxcsum = rd32(E1000_RXCSUM);
2938         rxcsum |= E1000_RXCSUM_PCSD;
2939
2940         if (adapter->hw.mac.type >= e1000_82576)
2941                 /* Enable Receive Checksum Offload for SCTP */
2942                 rxcsum |= E1000_RXCSUM_CRCOFL;
2943
2944         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2945         wr32(E1000_RXCSUM, rxcsum);
2946
2947         /* If VMDq is enabled then we set the appropriate mode for that, else
2948          * we default to RSS so that an RSS hash is calculated per packet even
2949          * if we are only using one queue */
2950         if (adapter->vfs_allocated_count) {
2951                 if (hw->mac.type > e1000_82575) {
2952                         /* Set the default pool for the PF's first queue */
2953                         u32 vtctl = rd32(E1000_VT_CTL);
2954                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2955                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2956                         vtctl |= adapter->vfs_allocated_count <<
2957                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2958                         wr32(E1000_VT_CTL, vtctl);
2959                 }
2960                 if (adapter->rss_queues > 1)
2961                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2962                 else
2963                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2964         } else {
2965                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2966         }
2967         igb_vmm_control(adapter);
2968
2969         /*
2970          * Generate RSS hash based on TCP port numbers and/or
2971          * IPv4/v6 src and dst addresses since UDP cannot be
2972          * hashed reliably due to IP fragmentation
2973          */
2974         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2975                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2976                 E1000_MRQC_RSS_FIELD_IPV6 |
2977                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2978                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2979
2980         wr32(E1000_MRQC, mrqc);
2981 }
2982
2983 /**
2984  * igb_setup_rctl - configure the receive control registers
2985  * @adapter: Board private structure
2986  **/
2987 void igb_setup_rctl(struct igb_adapter *adapter)
2988 {
2989         struct e1000_hw *hw = &adapter->hw;
2990         u32 rctl;
2991
2992         rctl = rd32(E1000_RCTL);
2993
2994         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2995         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2996
2997         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2998                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2999
3000         /*
3001          * enable stripping of CRC. It's unlikely this will break BMC
3002          * redirection as it did with e1000. Newer features require
3003          * that the HW strips the CRC.
3004          */
3005         rctl |= E1000_RCTL_SECRC;
3006
3007         /* disable store bad packets and clear size bits. */
3008         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3009
3010         /* enable LPE to prevent packets larger than max_frame_size */
3011         rctl |= E1000_RCTL_LPE;
3012
3013         /* disable queue 0 to prevent tail write w/o re-config */
3014         wr32(E1000_RXDCTL(0), 0);
3015
3016         /* Attention!!!  For SR-IOV PF driver operations you must enable
3017          * queue drop for all VF and PF queues to prevent head of line blocking
3018          * if an un-trusted VF does not provide descriptors to hardware.
3019          */
3020         if (adapter->vfs_allocated_count) {
3021                 /* set all queue drop enable bits */
3022                 wr32(E1000_QDE, ALL_QUEUES);
3023         }
3024
3025         /* This is useful for sniffing bad packets. */
3026         if (adapter->netdev->features & NETIF_F_RXALL) {
3027                 /* UPE and MPE will be handled by normal PROMISC logic
3028                  * in e1000e_set_rx_mode */
3029                 rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3030                          E1000_RCTL_BAM | /* RX All Bcast Pkts */
3031                          E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3032
3033                 rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
3034                           E1000_RCTL_DPF | /* Allow filtered pause */
3035                           E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3036                 /* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3037                  * and that breaks VLANs.
3038                  */
3039         }
3040
3041         wr32(E1000_RCTL, rctl);
3042 }
3043
3044 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3045                                    int vfn)
3046 {
3047         struct e1000_hw *hw = &adapter->hw;
3048         u32 vmolr;
3049
3050         /* if it isn't the PF check to see if VFs are enabled and
3051          * increase the size to support vlan tags */
3052         if (vfn < adapter->vfs_allocated_count &&
3053             adapter->vf_data[vfn].vlans_enabled)
3054                 size += VLAN_TAG_SIZE;
3055
3056         vmolr = rd32(E1000_VMOLR(vfn));
3057         vmolr &= ~E1000_VMOLR_RLPML_MASK;
3058         vmolr |= size | E1000_VMOLR_LPE;
3059         wr32(E1000_VMOLR(vfn), vmolr);
3060
3061         return 0;
3062 }
3063
3064 /**
3065  * igb_rlpml_set - set maximum receive packet size
3066  * @adapter: board private structure
3067  *
3068  * Configure maximum receivable packet size.
3069  **/
3070 static void igb_rlpml_set(struct igb_adapter *adapter)
3071 {
3072         u32 max_frame_size = adapter->max_frame_size;
3073         struct e1000_hw *hw = &adapter->hw;
3074         u16 pf_id = adapter->vfs_allocated_count;
3075
3076         if (pf_id) {
3077                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3078                 /*
3079                  * If we're in VMDQ or SR-IOV mode, then set global RLPML
3080                  * to our max jumbo frame size, in case we need to enable
3081                  * jumbo frames on one of the rings later.
3082                  * This will not pass over-length frames into the default
3083                  * queue because it's gated by the VMOLR.RLPML.
3084                  */
3085                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3086         }
3087
3088         wr32(E1000_RLPML, max_frame_size);
3089 }
3090
3091 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3092                                  int vfn, bool aupe)
3093 {
3094         struct e1000_hw *hw = &adapter->hw;
3095         u32 vmolr;
3096
3097         /*
3098          * This register exists only on 82576 and newer so if we are older then
3099          * we should exit and do nothing
3100          */
3101         if (hw->mac.type < e1000_82576)
3102                 return;
3103
3104         vmolr = rd32(E1000_VMOLR(vfn));
3105         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3106         if (aupe)
3107                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3108         else
3109                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3110
3111         /* clear all bits that might not be set */
3112         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3113
3114         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3115                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3116         /*
3117          * for VMDq only allow the VFs and pool 0 to accept broadcast and
3118          * multicast packets
3119          */
3120         if (vfn <= adapter->vfs_allocated_count)
3121                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3122
3123         wr32(E1000_VMOLR(vfn), vmolr);
3124 }
3125
3126 /**
3127  * igb_configure_rx_ring - Configure a receive ring after Reset
3128  * @adapter: board private structure
3129  * @ring: receive ring to be configured
3130  *
3131  * Configure the Rx unit of the MAC after a reset.
3132  **/
3133 void igb_configure_rx_ring(struct igb_adapter *adapter,
3134                            struct igb_ring *ring)
3135 {
3136         struct e1000_hw *hw = &adapter->hw;
3137         u64 rdba = ring->dma;
3138         int reg_idx = ring->reg_idx;
3139         u32 srrctl = 0, rxdctl = 0;
3140
3141         /* disable the queue */
3142         wr32(E1000_RXDCTL(reg_idx), 0);
3143
3144         /* Set DMA base address registers */
3145         wr32(E1000_RDBAL(reg_idx),
3146              rdba & 0x00000000ffffffffULL);
3147         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3148         wr32(E1000_RDLEN(reg_idx),
3149                        ring->count * sizeof(union e1000_adv_rx_desc));
3150
3151         /* initialize head and tail */
3152         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3153         wr32(E1000_RDH(reg_idx), 0);
3154         writel(0, ring->tail);
3155
3156         /* set descriptor configuration */
3157         srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3158 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3159         srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3160 #else
3161         srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3162 #endif
3163         srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3164         if (hw->mac.type >= e1000_82580)
3165                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3166         /* Only set Drop Enable if we are supporting multiple queues */
3167         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3168                 srrctl |= E1000_SRRCTL_DROP_EN;
3169
3170         wr32(E1000_SRRCTL(reg_idx), srrctl);
3171
3172         /* set filtering for VMDQ pools */
3173         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3174
3175         rxdctl |= IGB_RX_PTHRESH;
3176         rxdctl |= IGB_RX_HTHRESH << 8;
3177         rxdctl |= IGB_RX_WTHRESH << 16;
3178
3179         /* enable receive descriptor fetching */
3180         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3181         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3182 }
3183
3184 /**
3185  * igb_configure_rx - Configure receive Unit after Reset
3186  * @adapter: board private structure
3187  *
3188  * Configure the Rx unit of the MAC after a reset.
3189  **/
3190 static void igb_configure_rx(struct igb_adapter *adapter)
3191 {
3192         int i;
3193
3194         /* set UTA to appropriate mode */
3195         igb_set_uta(adapter);
3196
3197         /* set the correct pool for the PF default MAC address in entry 0 */
3198         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3199                          adapter->vfs_allocated_count);
3200
3201         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3202          * the Base and Length of the Rx Descriptor Ring */
3203         for (i = 0; i < adapter->num_rx_queues; i++)
3204                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3205 }
3206
3207 /**
3208  * igb_free_tx_resources - Free Tx Resources per Queue
3209  * @tx_ring: Tx descriptor ring for a specific queue
3210  *
3211  * Free all transmit software resources
3212  **/
3213 void igb_free_tx_resources(struct igb_ring *tx_ring)
3214 {
3215         igb_clean_tx_ring(tx_ring);
3216
3217         vfree(tx_ring->tx_buffer_info);
3218         tx_ring->tx_buffer_info = NULL;
3219
3220         /* if not set, then don't free */
3221         if (!tx_ring->desc)
3222                 return;
3223
3224         dma_free_coherent(tx_ring->dev, tx_ring->size,
3225                           tx_ring->desc, tx_ring->dma);
3226
3227         tx_ring->desc = NULL;
3228 }
3229
3230 /**
3231  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3232  * @adapter: board private structure
3233  *
3234  * Free all transmit software resources
3235  **/
3236 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3237 {
3238         int i;
3239
3240         for (i = 0; i < adapter->num_tx_queues; i++)
3241                 igb_free_tx_resources(adapter->tx_ring[i]);
3242 }
3243
3244 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3245                                     struct igb_tx_buffer *tx_buffer)
3246 {
3247         if (tx_buffer->skb) {
3248                 dev_kfree_skb_any(tx_buffer->skb);
3249                 if (tx_buffer->dma)
3250                         dma_unmap_single(ring->dev,
3251                                          tx_buffer->dma,
3252                                          tx_buffer->length,
3253                                          DMA_TO_DEVICE);
3254         } else if (tx_buffer->dma) {
3255                 dma_unmap_page(ring->dev,
3256                                tx_buffer->dma,
3257                                tx_buffer->length,
3258                                DMA_TO_DEVICE);
3259         }
3260         tx_buffer->next_to_watch = NULL;
3261         tx_buffer->skb = NULL;
3262         tx_buffer->dma = 0;
3263         /* buffer_info must be completely set up in the transmit path */
3264 }
3265
3266 /**
3267  * igb_clean_tx_ring - Free Tx Buffers
3268  * @tx_ring: ring to be cleaned
3269  **/
3270 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3271 {
3272         struct igb_tx_buffer *buffer_info;
3273         unsigned long size;
3274         u16 i;
3275
3276         if (!tx_ring->tx_buffer_info)
3277                 return;
3278         /* Free all the Tx ring sk_buffs */
3279
3280         for (i = 0; i < tx_ring->count; i++) {
3281                 buffer_info = &tx_ring->tx_buffer_info[i];
3282                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3283         }
3284
3285         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3286         memset(tx_ring->tx_buffer_info, 0, size);
3287
3288         /* Zero out the descriptor ring */
3289         memset(tx_ring->desc, 0, tx_ring->size);
3290
3291         tx_ring->next_to_use = 0;
3292         tx_ring->next_to_clean = 0;
3293 }
3294
3295 /**
3296  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3297  * @adapter: board private structure
3298  **/
3299 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3300 {
3301         int i;
3302
3303         for (i = 0; i < adapter->num_tx_queues; i++)
3304                 igb_clean_tx_ring(adapter->tx_ring[i]);
3305 }
3306
3307 /**
3308  * igb_free_rx_resources - Free Rx Resources
3309  * @rx_ring: ring to clean the resources from
3310  *
3311  * Free all receive software resources
3312  **/
3313 void igb_free_rx_resources(struct igb_ring *rx_ring)
3314 {
3315         igb_clean_rx_ring(rx_ring);
3316
3317         vfree(rx_ring->rx_buffer_info);
3318         rx_ring->rx_buffer_info = NULL;
3319
3320         /* if not set, then don't free */
3321         if (!rx_ring->desc)
3322                 return;
3323
3324         dma_free_coherent(rx_ring->dev, rx_ring->size,
3325                           rx_ring->desc, rx_ring->dma);
3326
3327         rx_ring->desc = NULL;
3328 }
3329
3330 /**
3331  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3332  * @adapter: board private structure
3333  *
3334  * Free all receive software resources
3335  **/
3336 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3337 {
3338         int i;
3339
3340         for (i = 0; i < adapter->num_rx_queues; i++)
3341                 igb_free_rx_resources(adapter->rx_ring[i]);
3342 }
3343
3344 /**
3345  * igb_clean_rx_ring - Free Rx Buffers per Queue
3346  * @rx_ring: ring to free buffers from
3347  **/
3348 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3349 {
3350         unsigned long size;
3351         u16 i;
3352
3353         if (!rx_ring->rx_buffer_info)
3354                 return;
3355
3356         /* Free all the Rx ring sk_buffs */
3357         for (i = 0; i < rx_ring->count; i++) {
3358                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3359                 if (buffer_info->dma) {
3360                         dma_unmap_single(rx_ring->dev,
3361                                          buffer_info->dma,
3362                                          IGB_RX_HDR_LEN,
3363                                          DMA_FROM_DEVICE);
3364                         buffer_info->dma = 0;
3365                 }
3366
3367                 if (buffer_info->skb) {
3368                         dev_kfree_skb(buffer_info->skb);
3369                         buffer_info->skb = NULL;
3370                 }
3371                 if (buffer_info->page_dma) {
3372                         dma_unmap_page(rx_ring->dev,
3373                                        buffer_info->page_dma,
3374                                        PAGE_SIZE / 2,
3375                                        DMA_FROM_DEVICE);
3376                         buffer_info->page_dma = 0;
3377                 }
3378                 if (buffer_info->page) {
3379                         put_page(buffer_info->page);
3380                         buffer_info->page = NULL;
3381                         buffer_info->page_offset = 0;
3382                 }
3383         }
3384
3385         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3386         memset(rx_ring->rx_buffer_info, 0, size);
3387
3388         /* Zero out the descriptor ring */
3389         memset(rx_ring->desc, 0, rx_ring->size);
3390
3391         rx_ring->next_to_clean = 0;
3392         rx_ring->next_to_use = 0;
3393 }
3394
3395 /**
3396  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3397  * @adapter: board private structure
3398  **/
3399 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3400 {
3401         int i;
3402
3403         for (i = 0; i < adapter->num_rx_queues; i++)
3404                 igb_clean_rx_ring(adapter->rx_ring[i]);
3405 }
3406
3407 /**
3408  * igb_set_mac - Change the Ethernet Address of the NIC
3409  * @netdev: network interface device structure
3410  * @p: pointer to an address structure
3411  *
3412  * Returns 0 on success, negative on failure
3413  **/
3414 static int igb_set_mac(struct net_device *netdev, void *p)
3415 {
3416         struct igb_adapter *adapter = netdev_priv(netdev);
3417         struct e1000_hw *hw = &adapter->hw;
3418         struct sockaddr *addr = p;
3419
3420         if (!is_valid_ether_addr(addr->sa_data))
3421                 return -EADDRNOTAVAIL;
3422
3423         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3424         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3425
3426         /* set the correct pool for the new PF MAC address in entry 0 */
3427         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3428                          adapter->vfs_allocated_count);
3429
3430         return 0;
3431 }
3432
3433 /**
3434  * igb_write_mc_addr_list - write multicast addresses to MTA
3435  * @netdev: network interface device structure
3436  *
3437  * Writes multicast address list to the MTA hash table.
3438  * Returns: -ENOMEM on failure
3439  *                0 on no addresses written
3440  *                X on writing X addresses to MTA
3441  **/
3442 static int igb_write_mc_addr_list(struct net_device *netdev)
3443 {
3444         struct igb_adapter *adapter = netdev_priv(netdev);
3445         struct e1000_hw *hw = &adapter->hw;
3446         struct netdev_hw_addr *ha;
3447         u8  *mta_list;
3448         int i;
3449
3450         if (netdev_mc_empty(netdev)) {
3451                 /* nothing to program, so clear mc list */
3452                 igb_update_mc_addr_list(hw, NULL, 0);
3453                 igb_restore_vf_multicasts(adapter);
3454                 return 0;
3455         }
3456
3457         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3458         if (!mta_list)
3459                 return -ENOMEM;
3460
3461         /* The shared function expects a packed array of only addresses. */
3462         i = 0;
3463         netdev_for_each_mc_addr(ha, netdev)
3464                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3465
3466         igb_update_mc_addr_list(hw, mta_list, i);
3467         kfree(mta_list);
3468
3469         return netdev_mc_count(netdev);
3470 }
3471
3472 /**
3473  * igb_write_uc_addr_list - write unicast addresses to RAR table
3474  * @netdev: network interface device structure
3475  *
3476  * Writes unicast address list to the RAR table.
3477  * Returns: -ENOMEM on failure/insufficient address space
3478  *                0 on no addresses written
3479  *                X on writing X addresses to the RAR table
3480  **/
3481 static int igb_write_uc_addr_list(struct net_device *netdev)
3482 {
3483         struct igb_adapter *adapter = netdev_priv(netdev);
3484         struct e1000_hw *hw = &adapter->hw;
3485         unsigned int vfn = adapter->vfs_allocated_count;
3486         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3487         int count = 0;
3488
3489         /* return ENOMEM indicating insufficient memory for addresses */
3490         if (netdev_uc_count(netdev) > rar_entries)
3491                 return -ENOMEM;
3492
3493         if (!netdev_uc_empty(netdev) && rar_entries) {
3494                 struct netdev_hw_addr *ha;
3495
3496                 netdev_for_each_uc_addr(ha, netdev) {
3497                         if (!rar_entries)
3498                                 break;
3499                         igb_rar_set_qsel(adapter, ha->addr,
3500                                          rar_entries--,
3501                                          vfn);
3502                         count++;
3503                 }
3504         }
3505         /* write the addresses in reverse order to avoid write combining */
3506         for (; rar_entries > 0 ; rar_entries--) {
3507                 wr32(E1000_RAH(rar_entries), 0);
3508                 wr32(E1000_RAL(rar_entries), 0);
3509         }
3510         wrfl();
3511
3512         return count;
3513 }
3514
3515 /**
3516  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3517  * @netdev: network interface device structure
3518  *
3519  * The set_rx_mode entry point is called whenever the unicast or multicast
3520  * address lists or the network interface flags are updated.  This routine is
3521  * responsible for configuring the hardware for proper unicast, multicast,
3522  * promiscuous mode, and all-multi behavior.
3523  **/
3524 static void igb_set_rx_mode(struct net_device *netdev)
3525 {
3526         struct igb_adapter *adapter = netdev_priv(netdev);
3527         struct e1000_hw *hw = &adapter->hw;
3528         unsigned int vfn = adapter->vfs_allocated_count;
3529         u32 rctl, vmolr = 0;
3530         int count;
3531
3532         /* Check for Promiscuous and All Multicast modes */
3533         rctl = rd32(E1000_RCTL);
3534
3535         /* clear the effected bits */
3536         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3537
3538         if (netdev->flags & IFF_PROMISC) {
3539                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3540                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3541         } else {
3542                 if (netdev->flags & IFF_ALLMULTI) {
3543                         rctl |= E1000_RCTL_MPE;
3544                         vmolr |= E1000_VMOLR_MPME;
3545                 } else {
3546                         /*
3547                          * Write addresses to the MTA, if the attempt fails
3548                          * then we should just turn on promiscuous mode so
3549                          * that we can at least receive multicast traffic
3550                          */
3551                         count = igb_write_mc_addr_list(netdev);
3552                         if (count < 0) {
3553                                 rctl |= E1000_RCTL_MPE;
3554                                 vmolr |= E1000_VMOLR_MPME;
3555                         } else if (count) {
3556                                 vmolr |= E1000_VMOLR_ROMPE;
3557                         }
3558                 }
3559                 /*
3560                  * Write addresses to available RAR registers, if there is not
3561                  * sufficient space to store all the addresses then enable
3562                  * unicast promiscuous mode
3563                  */
3564                 count = igb_write_uc_addr_list(netdev);
3565                 if (count < 0) {
3566                         rctl |= E1000_RCTL_UPE;
3567                         vmolr |= E1000_VMOLR_ROPE;
3568                 }
3569                 rctl |= E1000_RCTL_VFE;
3570         }
3571         wr32(E1000_RCTL, rctl);
3572
3573         /*
3574          * In order to support SR-IOV and eventually VMDq it is necessary to set
3575          * the VMOLR to enable the appropriate modes.  Without this workaround
3576          * we will have issues with VLAN tag stripping not being done for frames
3577          * that are only arriving because we are the default pool
3578          */
3579         if (hw->mac.type < e1000_82576)
3580                 return;
3581
3582         vmolr |= rd32(E1000_VMOLR(vfn)) &
3583                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3584         wr32(E1000_VMOLR(vfn), vmolr);
3585         igb_restore_vf_multicasts(adapter);
3586 }
3587
3588 static void igb_check_wvbr(struct igb_adapter *adapter)
3589 {
3590         struct e1000_hw *hw = &adapter->hw;
3591         u32 wvbr = 0;
3592
3593         switch (hw->mac.type) {
3594         case e1000_82576:
3595         case e1000_i350:
3596                 if (!(wvbr = rd32(E1000_WVBR)))
3597                         return;
3598                 break;
3599         default:
3600                 break;
3601         }
3602
3603         adapter->wvbr |= wvbr;
3604 }
3605
3606 #define IGB_STAGGERED_QUEUE_OFFSET 8
3607
3608 static void igb_spoof_check(struct igb_adapter *adapter)
3609 {
3610         int j;
3611
3612         if (!adapter->wvbr)
3613                 return;
3614
3615         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3616                 if (adapter->wvbr & (1 << j) ||
3617                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3618                         dev_warn(&adapter->pdev->dev,
3619                                 "Spoof event(s) detected on VF %d\n", j);
3620                         adapter->wvbr &=
3621                                 ~((1 << j) |
3622                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3623                 }
3624         }
3625 }
3626
3627 /* Need to wait a few seconds after link up to get diagnostic information from
3628  * the phy */
3629 static void igb_update_phy_info(unsigned long data)
3630 {
3631         struct igb_adapter *adapter = (struct igb_adapter *) data;
3632         igb_get_phy_info(&adapter->hw);
3633 }
3634
3635 /**
3636  * igb_has_link - check shared code for link and determine up/down
3637  * @adapter: pointer to driver private info
3638  **/
3639 bool igb_has_link(struct igb_adapter *adapter)
3640 {
3641         struct e1000_hw *hw = &adapter->hw;
3642         bool link_active = false;
3643         s32 ret_val = 0;
3644
3645         /* get_link_status is set on LSC (link status) interrupt or
3646          * rx sequence error interrupt.  get_link_status will stay
3647          * false until the e1000_check_for_link establishes link
3648          * for copper adapters ONLY
3649          */
3650         switch (hw->phy.media_type) {
3651         case e1000_media_type_copper:
3652                 if (hw->mac.get_link_status) {
3653                         ret_val = hw->mac.ops.check_for_link(hw);
3654                         link_active = !hw->mac.get_link_status;
3655                 } else {
3656                         link_active = true;
3657                 }
3658                 break;
3659         case e1000_media_type_internal_serdes:
3660                 ret_val = hw->mac.ops.check_for_link(hw);
3661                 link_active = hw->mac.serdes_has_link;
3662                 break;
3663         default:
3664         case e1000_media_type_unknown:
3665                 break;
3666         }
3667
3668         return link_active;
3669 }
3670
3671 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3672 {
3673         bool ret = false;
3674         u32 ctrl_ext, thstat;
3675
3676         /* check for thermal sensor event on i350, copper only */
3677         if (hw->mac.type == e1000_i350) {
3678                 thstat = rd32(E1000_THSTAT);
3679                 ctrl_ext = rd32(E1000_CTRL_EXT);
3680
3681                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3682                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3683                         ret = !!(thstat & event);
3684                 }
3685         }
3686
3687         return ret;
3688 }
3689
3690 /**
3691  * igb_watchdog - Timer Call-back
3692  * @data: pointer to adapter cast into an unsigned long
3693  **/
3694 static void igb_watchdog(unsigned long data)
3695 {
3696         struct igb_adapter *adapter = (struct igb_adapter *)data;
3697         /* Do the rest outside of interrupt context */
3698         schedule_work(&adapter->watchdog_task);
3699 }
3700
3701 static void igb_watchdog_task(struct work_struct *work)
3702 {
3703         struct igb_adapter *adapter = container_of(work,
3704                                                    struct igb_adapter,
3705                                                    watchdog_task);
3706         struct e1000_hw *hw = &adapter->hw;
3707         struct net_device *netdev = adapter->netdev;
3708         u32 link;
3709         int i;
3710
3711         link = igb_has_link(adapter);
3712         if (link) {
3713                 /* Cancel scheduled suspend requests. */
3714                 pm_runtime_resume(netdev->dev.parent);
3715
3716                 if (!netif_carrier_ok(netdev)) {
3717                         u32 ctrl;
3718                         hw->mac.ops.get_speed_and_duplex(hw,
3719                                                          &adapter->link_speed,
3720                                                          &adapter->link_duplex);
3721
3722                         ctrl = rd32(E1000_CTRL);
3723                         /* Links status message must follow this format */
3724                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3725                                "Duplex, Flow Control: %s\n",
3726                                netdev->name,
3727                                adapter->link_speed,
3728                                adapter->link_duplex == FULL_DUPLEX ?
3729                                "Full" : "Half",
3730                                (ctrl & E1000_CTRL_TFCE) &&
3731                                (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3732                                (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3733                                (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3734
3735                         /* check for thermal sensor event */
3736                         if (igb_thermal_sensor_event(hw,
3737                             E1000_THSTAT_LINK_THROTTLE)) {
3738                                 netdev_info(netdev, "The network adapter link "
3739                                             "speed was downshifted because it "
3740                                             "overheated\n");
3741                         }
3742
3743                         /* adjust timeout factor according to speed/duplex */
3744                         adapter->tx_timeout_factor = 1;
3745                         switch (adapter->link_speed) {
3746                         case SPEED_10:
3747                                 adapter->tx_timeout_factor = 14;
3748                                 break;
3749                         case SPEED_100:
3750                                 /* maybe add some timeout factor ? */
3751                                 break;
3752                         }
3753
3754                         netif_carrier_on(netdev);
3755
3756                         igb_ping_all_vfs(adapter);
3757                         igb_check_vf_rate_limit(adapter);
3758
3759                         /* link state has changed, schedule phy info update */
3760                         if (!test_bit(__IGB_DOWN, &adapter->state))
3761                                 mod_timer(&adapter->phy_info_timer,
3762                                           round_jiffies(jiffies + 2 * HZ));
3763                 }
3764         } else {
3765                 if (netif_carrier_ok(netdev)) {
3766                         adapter->link_speed = 0;
3767                         adapter->link_duplex = 0;
3768
3769                         /* check for thermal sensor event */
3770                         if (igb_thermal_sensor_event(hw,
3771                             E1000_THSTAT_PWR_DOWN)) {
3772                                 netdev_err(netdev, "The network adapter was "
3773                                            "stopped because it overheated\n");
3774                         }
3775
3776                         /* Links status message must follow this format */
3777                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3778                                netdev->name);
3779                         netif_carrier_off(netdev);
3780
3781                         igb_ping_all_vfs(adapter);
3782
3783                         /* link state has changed, schedule phy info update */
3784                         if (!test_bit(__IGB_DOWN, &adapter->state))
3785                                 mod_timer(&adapter->phy_info_timer,
3786                                           round_jiffies(jiffies + 2 * HZ));
3787
3788                         pm_schedule_suspend(netdev->dev.parent,
3789                                             MSEC_PER_SEC * 5);
3790                 }
3791         }
3792
3793         spin_lock(&adapter->stats64_lock);
3794         igb_update_stats(adapter, &adapter->stats64);
3795         spin_unlock(&adapter->stats64_lock);
3796
3797         for (i = 0; i < adapter->num_tx_queues; i++) {
3798                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3799                 if (!netif_carrier_ok(netdev)) {
3800                         /* We've lost link, so the controller stops DMA,
3801                          * but we've got queued Tx work that's never going
3802                          * to get done, so reset controller to flush Tx.
3803                          * (Do the reset outside of interrupt context). */
3804                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3805                                 adapter->tx_timeout_count++;
3806                                 schedule_work(&adapter->reset_task);
3807                                 /* return immediately since reset is imminent */
3808                                 return;
3809                         }
3810                 }
3811
3812                 /* Force detection of hung controller every watchdog period */
3813                 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3814         }
3815
3816         /* Cause software interrupt to ensure rx ring is cleaned */
3817         if (adapter->msix_entries) {
3818                 u32 eics = 0;
3819                 for (i = 0; i < adapter->num_q_vectors; i++)
3820                         eics |= adapter->q_vector[i]->eims_value;
3821                 wr32(E1000_EICS, eics);
3822         } else {
3823                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3824         }
3825
3826         igb_spoof_check(adapter);
3827
3828         /* Reset the timer */
3829         if (!test_bit(__IGB_DOWN, &adapter->state))
3830                 mod_timer(&adapter->watchdog_timer,
3831                           round_jiffies(jiffies + 2 * HZ));
3832 }
3833
3834 enum latency_range {
3835         lowest_latency = 0,
3836         low_latency = 1,
3837         bulk_latency = 2,
3838         latency_invalid = 255
3839 };
3840
3841 /**
3842  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3843  *
3844  *      Stores a new ITR value based on strictly on packet size.  This
3845  *      algorithm is less sophisticated than that used in igb_update_itr,
3846  *      due to the difficulty of synchronizing statistics across multiple
3847  *      receive rings.  The divisors and thresholds used by this function
3848  *      were determined based on theoretical maximum wire speed and testing
3849  *      data, in order to minimize response time while increasing bulk
3850  *      throughput.
3851  *      This functionality is controlled by the InterruptThrottleRate module
3852  *      parameter (see igb_param.c)
3853  *      NOTE:  This function is called only when operating in a multiqueue
3854  *             receive environment.
3855  * @q_vector: pointer to q_vector
3856  **/
3857 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3858 {
3859         int new_val = q_vector->itr_val;
3860         int avg_wire_size = 0;
3861         struct igb_adapter *adapter = q_vector->adapter;
3862         unsigned int packets;
3863
3864         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3865          * ints/sec - ITR timer value of 120 ticks.
3866          */
3867         if (adapter->link_speed != SPEED_1000) {
3868                 new_val = IGB_4K_ITR;
3869                 goto set_itr_val;
3870         }
3871
3872         packets = q_vector->rx.total_packets;
3873         if (packets)
3874                 avg_wire_size = q_vector->rx.total_bytes / packets;
3875
3876         packets = q_vector->tx.total_packets;
3877         if (packets)
3878                 avg_wire_size = max_t(u32, avg_wire_size,
3879                                       q_vector->tx.total_bytes / packets);
3880
3881         /* if avg_wire_size isn't set no work was done */
3882         if (!avg_wire_size)
3883                 goto clear_counts;
3884
3885         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3886         avg_wire_size += 24;
3887
3888         /* Don't starve jumbo frames */
3889         avg_wire_size = min(avg_wire_size, 3000);
3890
3891         /* Give a little boost to mid-size frames */
3892         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3893                 new_val = avg_wire_size / 3;
3894         else
3895                 new_val = avg_wire_size / 2;
3896
3897         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3898         if (new_val < IGB_20K_ITR &&
3899             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3900              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3901                 new_val = IGB_20K_ITR;
3902
3903 set_itr_val:
3904         if (new_val != q_vector->itr_val) {
3905                 q_vector->itr_val = new_val;
3906                 q_vector->set_itr = 1;
3907         }
3908 clear_counts:
3909         q_vector->rx.total_bytes = 0;
3910         q_vector->rx.total_packets = 0;
3911         q_vector->tx.total_bytes = 0;
3912         q_vector->tx.total_packets = 0;
3913 }
3914
3915 /**
3916  * igb_update_itr - update the dynamic ITR value based on statistics
3917  *      Stores a new ITR value based on packets and byte
3918  *      counts during the last interrupt.  The advantage of per interrupt
3919  *      computation is faster updates and more accurate ITR for the current
3920  *      traffic pattern.  Constants in this function were computed
3921  *      based on theoretical maximum wire speed and thresholds were set based
3922  *      on testing data as well as attempting to minimize response time
3923  *      while increasing bulk throughput.
3924  *      this functionality is controlled by the InterruptThrottleRate module
3925  *      parameter (see igb_param.c)
3926  *      NOTE:  These calculations are only valid when operating in a single-
3927  *             queue environment.
3928  * @q_vector: pointer to q_vector
3929  * @ring_container: ring info to update the itr for
3930  **/
3931 static void igb_update_itr(struct igb_q_vector *q_vector,
3932                            struct igb_ring_container *ring_container)
3933 {
3934         unsigned int packets = ring_container->total_packets;
3935         unsigned int bytes = ring_container->total_bytes;
3936         u8 itrval = ring_container->itr;
3937
3938         /* no packets, exit with status unchanged */
3939         if (packets == 0)
3940                 return;
3941
3942         switch (itrval) {
3943         case lowest_latency:
3944                 /* handle TSO and jumbo frames */
3945                 if (bytes/packets > 8000)
3946                         itrval = bulk_latency;
3947                 else if ((packets < 5) && (bytes > 512))
3948                         itrval = low_latency;
3949                 break;
3950         case low_latency:  /* 50 usec aka 20000 ints/s */
3951                 if (bytes > 10000) {
3952                         /* this if handles the TSO accounting */
3953                         if (bytes/packets > 8000) {
3954                                 itrval = bulk_latency;
3955                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3956                                 itrval = bulk_latency;
3957                         } else if ((packets > 35)) {
3958                                 itrval = lowest_latency;
3959                         }
3960                 } else if (bytes/packets > 2000) {
3961                         itrval = bulk_latency;
3962                 } else if (packets <= 2 && bytes < 512) {
3963                         itrval = lowest_latency;
3964                 }
3965                 break;
3966         case bulk_latency: /* 250 usec aka 4000 ints/s */
3967                 if (bytes > 25000) {
3968                         if (packets > 35)
3969                                 itrval = low_latency;
3970                 } else if (bytes < 1500) {
3971                         itrval = low_latency;
3972                 }
3973                 break;
3974         }
3975
3976         /* clear work counters since we have the values we need */
3977         ring_container->total_bytes = 0;
3978         ring_container->total_packets = 0;
3979
3980         /* write updated itr to ring container */
3981         ring_container->itr = itrval;
3982 }
3983
3984 static void igb_set_itr(struct igb_q_vector *q_vector)
3985 {
3986         struct igb_adapter *adapter = q_vector->adapter;
3987         u32 new_itr = q_vector->itr_val;
3988         u8 current_itr = 0;
3989
3990         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3991         if (adapter->link_speed != SPEED_1000) {
3992                 current_itr = 0;
3993                 new_itr = IGB_4K_ITR;
3994                 goto set_itr_now;
3995         }
3996
3997         igb_update_itr(q_vector, &q_vector->tx);
3998         igb_update_itr(q_vector, &q_vector->rx);
3999
4000         current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4001
4002         /* conservative mode (itr 3) eliminates the lowest_latency setting */
4003         if (current_itr == lowest_latency &&
4004             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4005              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4006                 current_itr = low_latency;
4007
4008         switch (current_itr) {
4009         /* counts and packets in update_itr are dependent on these numbers */
4010         case lowest_latency:
4011                 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4012                 break;
4013         case low_latency:
4014                 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4015                 break;
4016         case bulk_latency:
4017                 new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4018                 break;
4019         default:
4020                 break;
4021         }
4022
4023 set_itr_now:
4024         if (new_itr != q_vector->itr_val) {
4025                 /* this attempts to bias the interrupt rate towards Bulk
4026                  * by adding intermediate steps when interrupt rate is
4027                  * increasing */
4028                 new_itr = new_itr > q_vector->itr_val ?
4029                              max((new_itr * q_vector->itr_val) /
4030                                  (new_itr + (q_vector->itr_val >> 2)),
4031                                  new_itr) :
4032                              new_itr;
4033                 /* Don't write the value here; it resets the adapter's
4034                  * internal timer, and causes us to delay far longer than
4035                  * we should between interrupts.  Instead, we write the ITR
4036                  * value at the beginning of the next interrupt so the timing
4037                  * ends up being correct.
4038                  */
4039                 q_vector->itr_val = new_itr;
4040                 q_vector->set_itr = 1;
4041         }
4042 }
4043
4044 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4045                             u32 type_tucmd, u32 mss_l4len_idx)
4046 {
4047         struct e1000_adv_tx_context_desc *context_desc;
4048         u16 i = tx_ring->next_to_use;
4049
4050         context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4051
4052         i++;
4053         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4054
4055         /* set bits to identify this as an advanced context descriptor */
4056         type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4057
4058         /* For 82575, context index must be unique per ring. */
4059         if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4060                 mss_l4len_idx |= tx_ring->reg_idx << 4;
4061
4062         context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
4063         context_desc->seqnum_seed       = 0;
4064         context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
4065         context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
4066 }
4067
4068 static int igb_tso(struct igb_ring *tx_ring,
4069                    struct igb_tx_buffer *first,
4070                    u8 *hdr_len)
4071 {
4072         struct sk_buff *skb = first->skb;
4073         u32 vlan_macip_lens, type_tucmd;
4074         u32 mss_l4len_idx, l4len;
4075
4076         if (!skb_is_gso(skb))
4077                 return 0;
4078
4079         if (skb_header_cloned(skb)) {
4080                 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4081                 if (err)
4082                         return err;
4083         }
4084
4085         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4086         type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4087
4088         if (first->protocol == __constant_htons(ETH_P_IP)) {
4089                 struct iphdr *iph = ip_hdr(skb);
4090                 iph->tot_len = 0;
4091                 iph->check = 0;
4092                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4093                                                          iph->daddr, 0,
4094                                                          IPPROTO_TCP,
4095                                                          0);
4096                 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4097                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4098                                    IGB_TX_FLAGS_CSUM |
4099                                    IGB_TX_FLAGS_IPV4;
4100         } else if (skb_is_gso_v6(skb)) {
4101                 ipv6_hdr(skb)->payload_len = 0;
4102                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4103                                                        &ipv6_hdr(skb)->daddr,
4104                                                        0, IPPROTO_TCP, 0);
4105                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4106                                    IGB_TX_FLAGS_CSUM;
4107         }
4108
4109         /* compute header lengths */
4110         l4len = tcp_hdrlen(skb);
4111         *hdr_len = skb_transport_offset(skb) + l4len;
4112
4113         /* update gso size and bytecount with header size */
4114         first->gso_segs = skb_shinfo(skb)->gso_segs;
4115         first->bytecount += (first->gso_segs - 1) * *hdr_len;
4116
4117         /* MSS L4LEN IDX */
4118         mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4119         mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4120
4121         /* VLAN MACLEN IPLEN */
4122         vlan_macip_lens = skb_network_header_len(skb);
4123         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4124         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4125
4126         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4127
4128         return 1;
4129 }
4130
4131 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4132 {
4133         struct sk_buff *skb = first->skb;
4134         u32 vlan_macip_lens = 0;
4135         u32 mss_l4len_idx = 0;
4136         u32 type_tucmd = 0;
4137
4138         if (skb->ip_summed != CHECKSUM_PARTIAL) {
4139                 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4140                         return;
4141         } else {
4142                 u8 l4_hdr = 0;
4143                 switch (first->protocol) {
4144                 case __constant_htons(ETH_P_IP):
4145                         vlan_macip_lens |= skb_network_header_len(skb);
4146                         type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4147                         l4_hdr = ip_hdr(skb)->protocol;
4148                         break;
4149                 case __constant_htons(ETH_P_IPV6):
4150                         vlan_macip_lens |= skb_network_header_len(skb);
4151                         l4_hdr = ipv6_hdr(skb)->nexthdr;
4152                         break;
4153                 default:
4154                         if (unlikely(net_ratelimit())) {
4155                                 dev_warn(tx_ring->dev,
4156                                  "partial checksum but proto=%x!\n",
4157                                  first->protocol);
4158                         }
4159                         break;
4160                 }
4161
4162                 switch (l4_hdr) {
4163                 case IPPROTO_TCP:
4164                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4165                         mss_l4len_idx = tcp_hdrlen(skb) <<
4166                                         E1000_ADVTXD_L4LEN_SHIFT;
4167                         break;
4168                 case IPPROTO_SCTP:
4169                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4170                         mss_l4len_idx = sizeof(struct sctphdr) <<
4171                                         E1000_ADVTXD_L4LEN_SHIFT;
4172                         break;
4173                 case IPPROTO_UDP:
4174                         mss_l4len_idx = sizeof(struct udphdr) <<
4175                                         E1000_ADVTXD_L4LEN_SHIFT;
4176                         break;
4177                 default:
4178                         if (unlikely(net_ratelimit())) {
4179                                 dev_warn(tx_ring->dev,
4180                                  "partial checksum but l4 proto=%x!\n",
4181                                  l4_hdr);
4182                         }
4183                         break;
4184                 }
4185
4186                 /* update TX checksum flag */
4187                 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4188         }
4189
4190         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4191         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4192
4193         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4194 }
4195
4196 static __le32 igb_tx_cmd_type(u32 tx_flags)
4197 {
4198         /* set type for advanced descriptor with frame checksum insertion */
4199         __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4200                                       E1000_ADVTXD_DCMD_IFCS |
4201                                       E1000_ADVTXD_DCMD_DEXT);
4202
4203         /* set HW vlan bit if vlan is present */
4204         if (tx_flags & IGB_TX_FLAGS_VLAN)
4205                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4206
4207         /* set timestamp bit if present */
4208         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4209                 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4210
4211         /* set segmentation bits for TSO */
4212         if (tx_flags & IGB_TX_FLAGS_TSO)
4213                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4214
4215         return cmd_type;
4216 }
4217
4218 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4219                                  union e1000_adv_tx_desc *tx_desc,
4220                                  u32 tx_flags, unsigned int paylen)
4221 {
4222         u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4223
4224         /* 82575 requires a unique index per ring if any offload is enabled */
4225         if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4226             test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4227                 olinfo_status |= tx_ring->reg_idx << 4;
4228
4229         /* insert L4 checksum */
4230         if (tx_flags & IGB_TX_FLAGS_CSUM) {
4231                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4232
4233                 /* insert IPv4 checksum */
4234                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4235                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4236         }
4237
4238         tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4239 }
4240
4241 /*
4242  * The largest size we can write to the descriptor is 65535.  In order to
4243  * maintain a power of two alignment we have to limit ourselves to 32K.
4244  */
4245 #define IGB_MAX_TXD_PWR 15
4246 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4247
4248 static void igb_tx_map(struct igb_ring *tx_ring,
4249                        struct igb_tx_buffer *first,
4250                        const u8 hdr_len)
4251 {
4252         struct sk_buff *skb = first->skb;
4253         struct igb_tx_buffer *tx_buffer_info;
4254         union e1000_adv_tx_desc *tx_desc;
4255         dma_addr_t dma;
4256         struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4257         unsigned int data_len = skb->data_len;
4258         unsigned int size = skb_headlen(skb);
4259         unsigned int paylen = skb->len - hdr_len;
4260         __le32 cmd_type;
4261         u32 tx_flags = first->tx_flags;
4262         u16 i = tx_ring->next_to_use;
4263
4264         tx_desc = IGB_TX_DESC(tx_ring, i);
4265
4266         igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4267         cmd_type = igb_tx_cmd_type(tx_flags);
4268
4269         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4270         if (dma_mapping_error(tx_ring->dev, dma))
4271                 goto dma_error;
4272
4273         /* record length, and DMA address */
4274         first->length = size;
4275         first->dma = dma;
4276         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4277
4278         for (;;) {
4279                 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4280                         tx_desc->read.cmd_type_len =
4281                                 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4282
4283                         i++;
4284                         tx_desc++;
4285                         if (i == tx_ring->count) {
4286                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
4287                                 i = 0;
4288                         }
4289
4290                         dma += IGB_MAX_DATA_PER_TXD;
4291                         size -= IGB_MAX_DATA_PER_TXD;
4292
4293                         tx_desc->read.olinfo_status = 0;
4294                         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4295                 }
4296
4297                 if (likely(!data_len))
4298                         break;
4299
4300                 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4301
4302                 i++;
4303                 tx_desc++;
4304                 if (i == tx_ring->count) {
4305                         tx_desc = IGB_TX_DESC(tx_ring, 0);
4306                         i = 0;
4307                 }
4308
4309                 size = skb_frag_size(frag);
4310                 data_len -= size;
4311
4312                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4313                                    size, DMA_TO_DEVICE);
4314                 if (dma_mapping_error(tx_ring->dev, dma))
4315                         goto dma_error;
4316
4317                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4318                 tx_buffer_info->length = size;
4319                 tx_buffer_info->dma = dma;
4320
4321                 tx_desc->read.olinfo_status = 0;
4322                 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4323
4324                 frag++;
4325         }
4326
4327         netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4328
4329         /* write last descriptor with RS and EOP bits */
4330         cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4331         if (unlikely(skb->no_fcs))
4332                 cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4333         tx_desc->read.cmd_type_len = cmd_type;
4334
4335         /* set the timestamp */
4336         first->time_stamp = jiffies;
4337
4338         /*
4339          * Force memory writes to complete before letting h/w know there
4340          * are new descriptors to fetch.  (Only applicable for weak-ordered
4341          * memory model archs, such as IA-64).
4342          *
4343          * We also need this memory barrier to make certain all of the
4344          * status bits have been updated before next_to_watch is written.
4345          */
4346         wmb();
4347
4348         /* set next_to_watch value indicating a packet is present */
4349         first->next_to_watch = tx_desc;
4350
4351         i++;
4352         if (i == tx_ring->count)
4353                 i = 0;
4354
4355         tx_ring->next_to_use = i;
4356
4357         writel(i, tx_ring->tail);
4358
4359         /* we need this if more than one processor can write to our tail
4360          * at a time, it syncronizes IO on IA64/Altix systems */
4361         mmiowb();
4362
4363         return;
4364
4365 dma_error:
4366         dev_err(tx_ring->dev, "TX DMA map failed\n");
4367
4368         /* clear dma mappings for failed tx_buffer_info map */
4369         for (;;) {
4370                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4371                 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4372                 if (tx_buffer_info == first)
4373                         break;
4374                 if (i == 0)
4375                         i = tx_ring->count;
4376                 i--;
4377         }
4378
4379         tx_ring->next_to_use = i;
4380 }
4381
4382 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4383 {
4384         struct net_device *netdev = tx_ring->netdev;
4385
4386         netif_stop_subqueue(netdev, tx_ring->queue_index);
4387
4388         /* Herbert's original patch had:
4389          *  smp_mb__after_netif_stop_queue();
4390          * but since that doesn't exist yet, just open code it. */
4391         smp_mb();
4392
4393         /* We need to check again in a case another CPU has just
4394          * made room available. */
4395         if (igb_desc_unused(tx_ring) < size)
4396                 return -EBUSY;
4397
4398         /* A reprieve! */
4399         netif_wake_subqueue(netdev, tx_ring->queue_index);
4400
4401         u64_stats_update_begin(&tx_ring->tx_syncp2);
4402         tx_ring->tx_stats.restart_queue2++;
4403         u64_stats_update_end(&tx_ring->tx_syncp2);
4404
4405         return 0;
4406 }
4407
4408 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4409 {
4410         if (igb_desc_unused(tx_ring) >= size)
4411                 return 0;
4412         return __igb_maybe_stop_tx(tx_ring, size);
4413 }
4414
4415 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4416                                 struct igb_ring *tx_ring)
4417 {
4418         struct igb_tx_buffer *first;
4419         int tso;
4420         u32 tx_flags = 0;
4421         __be16 protocol = vlan_get_protocol(skb);
4422         u8 hdr_len = 0;
4423
4424         /* need: 1 descriptor per page,
4425          *       + 2 desc gap to keep tail from touching head,
4426          *       + 1 desc for skb->data,
4427          *       + 1 desc for context descriptor,
4428          * otherwise try next time */
4429         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4430                 /* this is a hard error */
4431                 return NETDEV_TX_BUSY;
4432         }
4433
4434         /* record the location of the first descriptor for this packet */
4435         first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4436         first->skb = skb;
4437         first->bytecount = skb->len;
4438         first->gso_segs = 1;
4439
4440         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4441                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4442                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4443         }
4444
4445         if (vlan_tx_tag_present(skb)) {
4446                 tx_flags |= IGB_TX_FLAGS_VLAN;
4447                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4448         }
4449
4450         /* record initial flags and protocol */
4451         first->tx_flags = tx_flags;
4452         first->protocol = protocol;
4453
4454         tso = igb_tso(tx_ring, first, &hdr_len);
4455         if (tso < 0)
4456                 goto out_drop;
4457         else if (!tso)
4458                 igb_tx_csum(tx_ring, first);
4459
4460         igb_tx_map(tx_ring, first, hdr_len);
4461
4462         /* Make sure there is space in the ring for the next send. */
4463         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4464
4465         return NETDEV_TX_OK;
4466
4467 out_drop:
4468         igb_unmap_and_free_tx_resource(tx_ring, first);
4469
4470         return NETDEV_TX_OK;
4471 }
4472
4473 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4474                                                     struct sk_buff *skb)
4475 {
4476         unsigned int r_idx = skb->queue_mapping;
4477
4478         if (r_idx >= adapter->num_tx_queues)
4479                 r_idx = r_idx % adapter->num_tx_queues;
4480
4481         return adapter->tx_ring[r_idx];
4482 }
4483
4484 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4485                                   struct net_device *netdev)
4486 {
4487         struct igb_adapter *adapter = netdev_priv(netdev);
4488
4489         if (test_bit(__IGB_DOWN, &adapter->state)) {
4490                 dev_kfree_skb_any(skb);
4491                 return NETDEV_TX_OK;
4492         }
4493
4494         if (skb->len <= 0) {
4495                 dev_kfree_skb_any(skb);
4496                 return NETDEV_TX_OK;
4497         }
4498
4499         /*
4500          * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4501          * in order to meet this minimum size requirement.
4502          */
4503         if (skb->len < 17) {
4504                 if (skb_padto(skb, 17))
4505                         return NETDEV_TX_OK;
4506                 skb->len = 17;
4507         }
4508
4509         return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4510 }
4511
4512 /**
4513  * igb_tx_timeout - Respond to a Tx Hang
4514  * @netdev: network interface device structure
4515  **/
4516 static void igb_tx_timeout(struct net_device *netdev)
4517 {
4518         struct igb_adapter *adapter = netdev_priv(netdev);
4519         struct e1000_hw *hw = &adapter->hw;
4520
4521         /* Do the reset outside of interrupt context */
4522         adapter->tx_timeout_count++;
4523
4524         if (hw->mac.type >= e1000_82580)
4525                 hw->dev_spec._82575.global_device_reset = true;
4526
4527         schedule_work(&adapter->reset_task);
4528         wr32(E1000_EICS,
4529              (adapter->eims_enable_mask & ~adapter->eims_other));
4530 }
4531
4532 static void igb_reset_task(struct work_struct *work)
4533 {
4534         struct igb_adapter *adapter;
4535         adapter = container_of(work, struct igb_adapter, reset_task);
4536
4537         igb_dump(adapter);
4538         netdev_err(adapter->netdev, "Reset adapter\n");
4539         igb_reinit_locked(adapter);
4540 }
4541
4542 /**
4543  * igb_get_stats64 - Get System Network Statistics
4544  * @netdev: network interface device structure
4545  * @stats: rtnl_link_stats64 pointer
4546  *
4547  **/
4548 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4549                                                  struct rtnl_link_stats64 *stats)
4550 {
4551         struct igb_adapter *adapter = netdev_priv(netdev);
4552
4553         spin_lock(&adapter->stats64_lock);
4554         igb_update_stats(adapter, &adapter->stats64);
4555         memcpy(stats, &adapter->stats64, sizeof(*stats));
4556         spin_unlock(&adapter->stats64_lock);
4557
4558         return stats;
4559 }
4560
4561 /**
4562  * igb_change_mtu - Change the Maximum Transfer Unit
4563  * @netdev: network interface device structure
4564  * @new_mtu: new value for maximum frame size
4565  *
4566  * Returns 0 on success, negative on failure
4567  **/
4568 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4569 {
4570         struct igb_adapter *adapter = netdev_priv(netdev);
4571         struct pci_dev *pdev = adapter->pdev;
4572         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4573
4574         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4575                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4576                 return -EINVAL;
4577         }
4578
4579 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4580         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4581                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4582                 return -EINVAL;
4583         }
4584
4585         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4586                 msleep(1);
4587
4588         /* igb_down has a dependency on max_frame_size */
4589         adapter->max_frame_size = max_frame;
4590
4591         if (netif_running(netdev))
4592                 igb_down(adapter);
4593
4594         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4595                  netdev->mtu, new_mtu);
4596         netdev->mtu = new_mtu;
4597
4598         if (netif_running(netdev))
4599                 igb_up(adapter);
4600         else
4601                 igb_reset(adapter);
4602
4603         clear_bit(__IGB_RESETTING, &adapter->state);
4604
4605         return 0;
4606 }
4607
4608 /**
4609  * igb_update_stats - Update the board statistics counters
4610  * @adapter: board private structure
4611  **/
4612
4613 void igb_update_stats(struct igb_adapter *adapter,
4614                       struct rtnl_link_stats64 *net_stats)
4615 {
4616         struct e1000_hw *hw = &adapter->hw;
4617         struct pci_dev *pdev = adapter->pdev;
4618         u32 reg, mpc;
4619         u16 phy_tmp;
4620         int i;
4621         u64 bytes, packets;
4622         unsigned int start;
4623         u64 _bytes, _packets;
4624
4625 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4626
4627         /*
4628          * Prevent stats update while adapter is being reset, or if the pci
4629          * connection is down.
4630          */
4631         if (adapter->link_speed == 0)
4632                 return;
4633         if (pci_channel_offline(pdev))
4634                 return;
4635
4636         bytes = 0;
4637         packets = 0;
4638         for (i = 0; i < adapter->num_rx_queues; i++) {
4639                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4640                 struct igb_ring *ring = adapter->rx_ring[i];
4641
4642                 ring->rx_stats.drops += rqdpc_tmp;
4643                 net_stats->rx_fifo_errors += rqdpc_tmp;
4644
4645                 do {
4646                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4647                         _bytes = ring->rx_stats.bytes;
4648                         _packets = ring->rx_stats.packets;
4649                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4650                 bytes += _bytes;
4651                 packets += _packets;
4652         }
4653
4654         net_stats->rx_bytes = bytes;
4655         net_stats->rx_packets = packets;
4656
4657         bytes = 0;
4658         packets = 0;
4659         for (i = 0; i < adapter->num_tx_queues; i++) {
4660                 struct igb_ring *ring = adapter->tx_ring[i];
4661                 do {
4662                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4663                         _bytes = ring->tx_stats.bytes;
4664                         _packets = ring->tx_stats.packets;
4665                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4666                 bytes += _bytes;
4667                 packets += _packets;
4668         }
4669         net_stats->tx_bytes = bytes;
4670         net_stats->tx_packets = packets;
4671
4672         /* read stats registers */
4673         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4674         adapter->stats.gprc += rd32(E1000_GPRC);
4675         adapter->stats.gorc += rd32(E1000_GORCL);
4676         rd32(E1000_GORCH); /* clear GORCL */
4677         adapter->stats.bprc += rd32(E1000_BPRC);
4678         adapter->stats.mprc += rd32(E1000_MPRC);
4679         adapter->stats.roc += rd32(E1000_ROC);
4680
4681         adapter->stats.prc64 += rd32(E1000_PRC64);
4682         adapter->stats.prc127 += rd32(E1000_PRC127);
4683         adapter->stats.prc255 += rd32(E1000_PRC255);
4684         adapter->stats.prc511 += rd32(E1000_PRC511);
4685         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4686         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4687         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4688         adapter->stats.sec += rd32(E1000_SEC);
4689
4690         mpc = rd32(E1000_MPC);
4691         adapter->stats.mpc += mpc;
4692         net_stats->rx_fifo_errors += mpc;
4693         adapter->stats.scc += rd32(E1000_SCC);
4694         adapter->stats.ecol += rd32(E1000_ECOL);
4695         adapter->stats.mcc += rd32(E1000_MCC);
4696         adapter->stats.latecol += rd32(E1000_LATECOL);
4697         adapter->stats.dc += rd32(E1000_DC);
4698         adapter->stats.rlec += rd32(E1000_RLEC);
4699         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4700         adapter->stats.xontxc += rd32(E1000_XONTXC);
4701         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4702         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4703         adapter->stats.fcruc += rd32(E1000_FCRUC);
4704         adapter->stats.gptc += rd32(E1000_GPTC);
4705         adapter->stats.gotc += rd32(E1000_GOTCL);
4706         rd32(E1000_GOTCH); /* clear GOTCL */
4707         adapter->stats.rnbc += rd32(E1000_RNBC);
4708         adapter->stats.ruc += rd32(E1000_RUC);
4709         adapter->stats.rfc += rd32(E1000_RFC);
4710         adapter->stats.rjc += rd32(E1000_RJC);
4711         adapter->stats.tor += rd32(E1000_TORH);
4712         adapter->stats.tot += rd32(E1000_TOTH);
4713         adapter->stats.tpr += rd32(E1000_TPR);
4714
4715         adapter->stats.ptc64 += rd32(E1000_PTC64);
4716         adapter->stats.ptc127 += rd32(E1000_PTC127);
4717         adapter->stats.ptc255 += rd32(E1000_PTC255);
4718         adapter->stats.ptc511 += rd32(E1000_PTC511);
4719         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4720         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4721
4722         adapter->stats.mptc += rd32(E1000_MPTC);
4723         adapter->stats.bptc += rd32(E1000_BPTC);
4724
4725         adapter->stats.tpt += rd32(E1000_TPT);
4726         adapter->stats.colc += rd32(E1000_COLC);
4727
4728         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4729         /* read internal phy specific stats */
4730         reg = rd32(E1000_CTRL_EXT);
4731         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4732                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4733                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4734         }
4735
4736         adapter->stats.tsctc += rd32(E1000_TSCTC);
4737         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4738
4739         adapter->stats.iac += rd32(E1000_IAC);
4740         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4741         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4742         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4743         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4744         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4745         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4746         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4747         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4748
4749         /* Fill out the OS statistics structure */
4750         net_stats->multicast = adapter->stats.mprc;
4751         net_stats->collisions = adapter->stats.colc;
4752
4753         /* Rx Errors */
4754
4755         /* RLEC on some newer hardware can be incorrect so build
4756          * our own version based on RUC and ROC */
4757         net_stats->rx_errors = adapter->stats.rxerrc +
4758                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4759                 adapter->stats.ruc + adapter->stats.roc +
4760                 adapter->stats.cexterr;
4761         net_stats->rx_length_errors = adapter->stats.ruc +
4762                                       adapter->stats.roc;
4763         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4764         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4765         net_stats->rx_missed_errors = adapter->stats.mpc;
4766
4767         /* Tx Errors */
4768         net_stats->tx_errors = adapter->stats.ecol +
4769                                adapter->stats.latecol;
4770         net_stats->tx_aborted_errors = adapter->stats.ecol;
4771         net_stats->tx_window_errors = adapter->stats.latecol;
4772         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4773
4774         /* Tx Dropped needs to be maintained elsewhere */
4775
4776         /* Phy Stats */
4777         if (hw->phy.media_type == e1000_media_type_copper) {
4778                 if ((adapter->link_speed == SPEED_1000) &&
4779                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4780                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4781                         adapter->phy_stats.idle_errors += phy_tmp;
4782                 }
4783         }
4784
4785         /* Management Stats */
4786         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4787         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4788         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4789
4790         /* OS2BMC Stats */
4791         reg = rd32(E1000_MANC);
4792         if (reg & E1000_MANC_EN_BMC2OS) {
4793                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4794                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4795                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4796                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4797         }
4798 }
4799
4800 static irqreturn_t igb_msix_other(int irq, void *data)
4801 {
4802         struct igb_adapter *adapter = data;
4803         struct e1000_hw *hw = &adapter->hw;
4804         u32 icr = rd32(E1000_ICR);
4805         /* reading ICR causes bit 31 of EICR to be cleared */
4806
4807         if (icr & E1000_ICR_DRSTA)
4808                 schedule_work(&adapter->reset_task);
4809
4810         if (icr & E1000_ICR_DOUTSYNC) {
4811                 /* HW is reporting DMA is out of sync */
4812                 adapter->stats.doosync++;
4813                 /* The DMA Out of Sync is also indication of a spoof event
4814                  * in IOV mode. Check the Wrong VM Behavior register to
4815                  * see if it is really a spoof event. */
4816                 igb_check_wvbr(adapter);
4817         }
4818
4819         /* Check for a mailbox event */
4820         if (icr & E1000_ICR_VMMB)
4821                 igb_msg_task(adapter);
4822
4823         if (icr & E1000_ICR_LSC) {
4824                 hw->mac.get_link_status = 1;
4825                 /* guard against interrupt when we're going down */
4826                 if (!test_bit(__IGB_DOWN, &adapter->state))
4827                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4828         }
4829
4830         wr32(E1000_EIMS, adapter->eims_other);
4831
4832         return IRQ_HANDLED;
4833 }
4834
4835 static void igb_write_itr(struct igb_q_vector *q_vector)
4836 {
4837         struct igb_adapter *adapter = q_vector->adapter;
4838         u32 itr_val = q_vector->itr_val & 0x7FFC;
4839
4840         if (!q_vector->set_itr)
4841                 return;
4842
4843         if (!itr_val)
4844                 itr_val = 0x4;
4845
4846         if (adapter->hw.mac.type == e1000_82575)
4847                 itr_val |= itr_val << 16;
4848         else
4849                 itr_val |= E1000_EITR_CNT_IGNR;
4850
4851         writel(itr_val, q_vector->itr_register);
4852         q_vector->set_itr = 0;
4853 }
4854
4855 static irqreturn_t igb_msix_ring(int irq, void *data)
4856 {
4857         struct igb_q_vector *q_vector = data;
4858
4859         /* Write the ITR value calculated from the previous interrupt. */
4860         igb_write_itr(q_vector);
4861
4862         napi_schedule(&q_vector->napi);
4863
4864         return IRQ_HANDLED;
4865 }
4866
4867 #ifdef CONFIG_IGB_DCA
4868 static void igb_update_dca(struct igb_q_vector *q_vector)
4869 {
4870         struct igb_adapter *adapter = q_vector->adapter;
4871         struct e1000_hw *hw = &adapter->hw;
4872         int cpu = get_cpu();
4873
4874         if (q_vector->cpu == cpu)
4875                 goto out_no_update;
4876
4877         if (q_vector->tx.ring) {
4878                 int q = q_vector->tx.ring->reg_idx;
4879                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4880                 if (hw->mac.type == e1000_82575) {
4881                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4882                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4883                 } else {
4884                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4885                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4886                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4887                 }
4888                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4889                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4890         }
4891         if (q_vector->rx.ring) {
4892                 int q = q_vector->rx.ring->reg_idx;
4893                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4894                 if (hw->mac.type == e1000_82575) {
4895                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4896                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4897                 } else {
4898                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4899                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4900                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4901                 }
4902                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4903                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4904                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4905                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4906         }
4907         q_vector->cpu = cpu;
4908 out_no_update:
4909         put_cpu();
4910 }
4911
4912 static void igb_setup_dca(struct igb_adapter *adapter)
4913 {
4914         struct e1000_hw *hw = &adapter->hw;
4915         int i;
4916
4917         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4918                 return;
4919
4920         /* Always use CB2 mode, difference is masked in the CB driver. */
4921         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4922
4923         for (i = 0; i < adapter->num_q_vectors; i++) {
4924                 adapter->q_vector[i]->cpu = -1;
4925                 igb_update_dca(adapter->q_vector[i]);
4926         }
4927 }
4928
4929 static int __igb_notify_dca(struct device *dev, void *data)
4930 {
4931         struct net_device *netdev = dev_get_drvdata(dev);
4932         struct igb_adapter *adapter = netdev_priv(netdev);
4933         struct pci_dev *pdev = adapter->pdev;
4934         struct e1000_hw *hw = &adapter->hw;
4935         unsigned long event = *(unsigned long *)data;
4936
4937         switch (event) {
4938         case DCA_PROVIDER_ADD:
4939                 /* if already enabled, don't do it again */
4940                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4941                         break;
4942                 if (dca_add_requester(dev) == 0) {
4943                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4944                         dev_info(&pdev->dev, "DCA enabled\n");
4945                         igb_setup_dca(adapter);
4946                         break;
4947                 }
4948                 /* Fall Through since DCA is disabled. */
4949         case DCA_PROVIDER_REMOVE:
4950                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4951                         /* without this a class_device is left
4952                          * hanging around in the sysfs model */
4953                         dca_remove_requester(dev);
4954                         dev_info(&pdev->dev, "DCA disabled\n");
4955                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4956                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4957                 }
4958                 break;
4959         }
4960
4961         return 0;
4962 }
4963
4964 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4965                           void *p)
4966 {
4967         int ret_val;
4968
4969         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4970                                          __igb_notify_dca);
4971
4972         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4973 }
4974 #endif /* CONFIG_IGB_DCA */
4975
4976 #ifdef CONFIG_PCI_IOV
4977 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4978 {
4979         unsigned char mac_addr[ETH_ALEN];
4980         struct pci_dev *pdev = adapter->pdev;
4981         struct e1000_hw *hw = &adapter->hw;
4982         struct pci_dev *pvfdev;
4983         unsigned int device_id;
4984         u16 thisvf_devfn;
4985
4986         random_ether_addr(mac_addr);
4987         igb_set_vf_mac(adapter, vf, mac_addr);
4988
4989         switch (adapter->hw.mac.type) {
4990         case e1000_82576:
4991                 device_id = IGB_82576_VF_DEV_ID;
4992                 /* VF Stride for 82576 is 2 */
4993                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4994                         (pdev->devfn & 1);
4995                 break;
4996         case e1000_i350:
4997                 device_id = IGB_I350_VF_DEV_ID;
4998                 /* VF Stride for I350 is 4 */
4999                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
5000                                 (pdev->devfn & 3);
5001                 break;
5002         default:
5003                 device_id = 0;
5004                 thisvf_devfn = 0;
5005                 break;
5006         }
5007
5008         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5009         while (pvfdev) {
5010                 if (pvfdev->devfn == thisvf_devfn)
5011                         break;
5012                 pvfdev = pci_get_device(hw->vendor_id,
5013                                         device_id, pvfdev);
5014         }
5015
5016         if (pvfdev)
5017                 adapter->vf_data[vf].vfdev = pvfdev;
5018         else
5019                 dev_err(&pdev->dev,
5020                         "Couldn't find pci dev ptr for VF %4.4x\n",
5021                         thisvf_devfn);
5022         return pvfdev != NULL;
5023 }
5024
5025 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5026 {
5027         struct e1000_hw *hw = &adapter->hw;
5028         struct pci_dev *pdev = adapter->pdev;
5029         struct pci_dev *pvfdev;
5030         u16 vf_devfn = 0;
5031         u16 vf_stride;
5032         unsigned int device_id;
5033         int vfs_found = 0;
5034
5035         switch (adapter->hw.mac.type) {
5036         case e1000_82576:
5037                 device_id = IGB_82576_VF_DEV_ID;
5038                 /* VF Stride for 82576 is 2 */
5039                 vf_stride = 2;
5040                 break;
5041         case e1000_i350:
5042                 device_id = IGB_I350_VF_DEV_ID;
5043                 /* VF Stride for I350 is 4 */
5044                 vf_stride = 4;
5045                 break;
5046         default:
5047                 device_id = 0;
5048                 vf_stride = 0;
5049                 break;
5050         }
5051
5052         vf_devfn = pdev->devfn + 0x80;
5053         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5054         while (pvfdev) {
5055                 if (pvfdev->devfn == vf_devfn &&
5056                     (pvfdev->bus->number >= pdev->bus->number))
5057                         vfs_found++;
5058                 vf_devfn += vf_stride;
5059                 pvfdev = pci_get_device(hw->vendor_id,
5060                                         device_id, pvfdev);
5061         }
5062
5063         return vfs_found;
5064 }
5065
5066 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5067 {
5068         int i;
5069         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5070                 if (adapter->vf_data[i].vfdev) {
5071                         if (adapter->vf_data[i].vfdev->dev_flags &
5072                             PCI_DEV_FLAGS_ASSIGNED)
5073                                 return true;
5074                 }
5075         }
5076         return false;
5077 }
5078
5079 #endif
5080 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5081 {
5082         struct e1000_hw *hw = &adapter->hw;
5083         u32 ping;
5084         int i;
5085
5086         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5087                 ping = E1000_PF_CONTROL_MSG;
5088                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5089                         ping |= E1000_VT_MSGTYPE_CTS;
5090                 igb_write_mbx(hw, &ping, 1, i);
5091         }
5092 }
5093
5094 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5095 {
5096         struct e1000_hw *hw = &adapter->hw;
5097         u32 vmolr = rd32(E1000_VMOLR(vf));
5098         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5099
5100         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5101                             IGB_VF_FLAG_MULTI_PROMISC);
5102         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5103
5104         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5105                 vmolr |= E1000_VMOLR_MPME;
5106                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5107                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5108         } else {
5109                 /*
5110                  * if we have hashes and we are clearing a multicast promisc
5111                  * flag we need to write the hashes to the MTA as this step
5112                  * was previously skipped
5113                  */
5114                 if (vf_data->num_vf_mc_hashes > 30) {
5115                         vmolr |= E1000_VMOLR_MPME;
5116                 } else if (vf_data->num_vf_mc_hashes) {
5117                         int j;
5118                         vmolr |= E1000_VMOLR_ROMPE;
5119                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5120                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5121                 }
5122         }
5123
5124         wr32(E1000_VMOLR(vf), vmolr);
5125
5126         /* there are flags left unprocessed, likely not supported */
5127         if (*msgbuf & E1000_VT_MSGINFO_MASK)
5128                 return -EINVAL;
5129
5130         return 0;
5131
5132 }
5133
5134 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5135                                   u32 *msgbuf, u32 vf)
5136 {
5137         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5138         u16 *hash_list = (u16 *)&msgbuf[1];
5139         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5140         int i;
5141
5142         /* salt away the number of multicast addresses assigned
5143          * to this VF for later use to restore when the PF multi cast
5144          * list changes
5145          */
5146         vf_data->num_vf_mc_hashes = n;
5147
5148         /* only up to 30 hash values supported */
5149         if (n > 30)
5150                 n = 30;
5151
5152         /* store the hashes for later use */
5153         for (i = 0; i < n; i++)
5154                 vf_data->vf_mc_hashes[i] = hash_list[i];
5155
5156         /* Flush and reset the mta with the new values */
5157         igb_set_rx_mode(adapter->netdev);
5158
5159         return 0;
5160 }
5161
5162 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5163 {
5164         struct e1000_hw *hw = &adapter->hw;
5165         struct vf_data_storage *vf_data;
5166         int i, j;
5167
5168         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5169                 u32 vmolr = rd32(E1000_VMOLR(i));
5170                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5171
5172                 vf_data = &adapter->vf_data[i];
5173
5174                 if ((vf_data->num_vf_mc_hashes > 30) ||
5175                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5176                         vmolr |= E1000_VMOLR_MPME;
5177                 } else if (vf_data->num_vf_mc_hashes) {
5178                         vmolr |= E1000_VMOLR_ROMPE;
5179                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5180                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5181                 }
5182                 wr32(E1000_VMOLR(i), vmolr);
5183         }
5184 }
5185
5186 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5187 {
5188         struct e1000_hw *hw = &adapter->hw;
5189         u32 pool_mask, reg, vid;
5190         int i;
5191
5192         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5193
5194         /* Find the vlan filter for this id */
5195         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5196                 reg = rd32(E1000_VLVF(i));
5197
5198                 /* remove the vf from the pool */
5199                 reg &= ~pool_mask;
5200
5201                 /* if pool is empty then remove entry from vfta */
5202                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5203                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5204                         reg = 0;
5205                         vid = reg & E1000_VLVF_VLANID_MASK;
5206                         igb_vfta_set(hw, vid, false);
5207                 }
5208
5209                 wr32(E1000_VLVF(i), reg);
5210         }
5211
5212         adapter->vf_data[vf].vlans_enabled = 0;
5213 }
5214
5215 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5216 {
5217         struct e1000_hw *hw = &adapter->hw;
5218         u32 reg, i;
5219
5220         /* The vlvf table only exists on 82576 hardware and newer */
5221         if (hw->mac.type < e1000_82576)
5222                 return -1;
5223
5224         /* we only need to do this if VMDq is enabled */
5225         if (!adapter->vfs_allocated_count)
5226                 return -1;
5227
5228         /* Find the vlan filter for this id */
5229         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5230                 reg = rd32(E1000_VLVF(i));
5231                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5232                     vid == (reg & E1000_VLVF_VLANID_MASK))
5233                         break;
5234         }
5235
5236         if (add) {
5237                 if (i == E1000_VLVF_ARRAY_SIZE) {
5238                         /* Did not find a matching VLAN ID entry that was
5239                          * enabled.  Search for a free filter entry, i.e.
5240                          * one without the enable bit set
5241                          */
5242                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5243                                 reg = rd32(E1000_VLVF(i));
5244                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5245                                         break;
5246                         }
5247                 }
5248                 if (i < E1000_VLVF_ARRAY_SIZE) {
5249                         /* Found an enabled/available entry */
5250                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5251
5252                         /* if !enabled we need to set this up in vfta */
5253                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5254                                 /* add VID to filter table */
5255                                 igb_vfta_set(hw, vid, true);
5256                                 reg |= E1000_VLVF_VLANID_ENABLE;
5257                         }
5258                         reg &= ~E1000_VLVF_VLANID_MASK;
5259                         reg |= vid;
5260                         wr32(E1000_VLVF(i), reg);
5261
5262                         /* do not modify RLPML for PF devices */
5263                         if (vf >= adapter->vfs_allocated_count)
5264                                 return 0;
5265
5266                         if (!adapter->vf_data[vf].vlans_enabled) {
5267                                 u32 size;
5268                                 reg = rd32(E1000_VMOLR(vf));
5269                                 size = reg & E1000_VMOLR_RLPML_MASK;
5270                                 size += 4;
5271                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5272                                 reg |= size;
5273                                 wr32(E1000_VMOLR(vf), reg);
5274                         }
5275
5276                         adapter->vf_data[vf].vlans_enabled++;
5277                 }
5278         } else {
5279                 if (i < E1000_VLVF_ARRAY_SIZE) {
5280                         /* remove vf from the pool */
5281                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5282                         /* if pool is empty then remove entry from vfta */
5283                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5284                                 reg = 0;
5285                                 igb_vfta_set(hw, vid, false);
5286                         }
5287                         wr32(E1000_VLVF(i), reg);
5288
5289                         /* do not modify RLPML for PF devices */
5290                         if (vf >= adapter->vfs_allocated_count)
5291                                 return 0;
5292
5293                         adapter->vf_data[vf].vlans_enabled--;
5294                         if (!adapter->vf_data[vf].vlans_enabled) {
5295                                 u32 size;
5296                                 reg = rd32(E1000_VMOLR(vf));
5297                                 size = reg & E1000_VMOLR_RLPML_MASK;
5298                                 size -= 4;
5299                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5300                                 reg |= size;
5301                                 wr32(E1000_VMOLR(vf), reg);
5302                         }
5303                 }
5304         }
5305         return 0;
5306 }
5307
5308 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5309 {
5310         struct e1000_hw *hw = &adapter->hw;
5311
5312         if (vid)
5313                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5314         else
5315                 wr32(E1000_VMVIR(vf), 0);
5316 }
5317
5318 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5319                                int vf, u16 vlan, u8 qos)
5320 {
5321         int err = 0;
5322         struct igb_adapter *adapter = netdev_priv(netdev);
5323
5324         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5325                 return -EINVAL;
5326         if (vlan || qos) {
5327                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5328                 if (err)
5329                         goto out;
5330                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5331                 igb_set_vmolr(adapter, vf, !vlan);
5332                 adapter->vf_data[vf].pf_vlan = vlan;
5333                 adapter->vf_data[vf].pf_qos = qos;
5334                 dev_info(&adapter->pdev->dev,
5335                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5336                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5337                         dev_warn(&adapter->pdev->dev,
5338                                  "The VF VLAN has been set,"
5339                                  " but the PF device is not up.\n");
5340                         dev_warn(&adapter->pdev->dev,
5341                                  "Bring the PF device up before"
5342                                  " attempting to use the VF device.\n");
5343                 }
5344         } else {
5345                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5346                                    false, vf);
5347                 igb_set_vmvir(adapter, vlan, vf);
5348                 igb_set_vmolr(adapter, vf, true);
5349                 adapter->vf_data[vf].pf_vlan = 0;
5350                 adapter->vf_data[vf].pf_qos = 0;
5351        }
5352 out:
5353        return err;
5354 }
5355
5356 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5357 {
5358         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5359         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5360
5361         return igb_vlvf_set(adapter, vid, add, vf);
5362 }
5363
5364 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5365 {
5366         /* clear flags - except flag that indicates PF has set the MAC */
5367         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5368         adapter->vf_data[vf].last_nack = jiffies;
5369
5370         /* reset offloads to defaults */
5371         igb_set_vmolr(adapter, vf, true);
5372
5373         /* reset vlans for device */
5374         igb_clear_vf_vfta(adapter, vf);
5375         if (adapter->vf_data[vf].pf_vlan)
5376                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5377                                     adapter->vf_data[vf].pf_vlan,
5378                                     adapter->vf_data[vf].pf_qos);
5379         else
5380                 igb_clear_vf_vfta(adapter, vf);
5381
5382         /* reset multicast table array for vf */
5383         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5384
5385         /* Flush and reset the mta with the new values */
5386         igb_set_rx_mode(adapter->netdev);
5387 }
5388
5389 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5390 {
5391         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5392
5393         /* generate a new mac address as we were hotplug removed/added */
5394         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5395                 random_ether_addr(vf_mac);
5396
5397         /* process remaining reset events */
5398         igb_vf_reset(adapter, vf);
5399 }
5400
5401 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5402 {
5403         struct e1000_hw *hw = &adapter->hw;
5404         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5405         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5406         u32 reg, msgbuf[3];
5407         u8 *addr = (u8 *)(&msgbuf[1]);
5408
5409         /* process all the same items cleared in a function level reset */
5410         igb_vf_reset(adapter, vf);
5411
5412         /* set vf mac address */
5413         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5414
5415         /* enable transmit and receive for vf */
5416         reg = rd32(E1000_VFTE);
5417         wr32(E1000_VFTE, reg | (1 << vf));
5418         reg = rd32(E1000_VFRE);
5419         wr32(E1000_VFRE, reg | (1 << vf));
5420
5421         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5422
5423         /* reply to reset with ack and vf mac address */
5424         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5425         memcpy(addr, vf_mac, 6);
5426         igb_write_mbx(hw, msgbuf, 3, vf);
5427 }
5428
5429 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5430 {
5431         /*
5432          * The VF MAC Address is stored in a packed array of bytes
5433          * starting at the second 32 bit word of the msg array
5434          */
5435         unsigned char *addr = (char *)&msg[1];
5436         int err = -1;
5437
5438         if (is_valid_ether_addr(addr))
5439                 err = igb_set_vf_mac(adapter, vf, addr);
5440
5441         return err;
5442 }
5443
5444 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5445 {
5446         struct e1000_hw *hw = &adapter->hw;
5447         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5448         u32 msg = E1000_VT_MSGTYPE_NACK;
5449
5450         /* if device isn't clear to send it shouldn't be reading either */
5451         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5452             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5453                 igb_write_mbx(hw, &msg, 1, vf);
5454                 vf_data->last_nack = jiffies;
5455         }
5456 }
5457
5458 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5459 {
5460         struct pci_dev *pdev = adapter->pdev;
5461         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5462         struct e1000_hw *hw = &adapter->hw;
5463         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5464         s32 retval;
5465
5466         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5467
5468         if (retval) {
5469                 /* if receive failed revoke VF CTS stats and restart init */
5470                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5471                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5472                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5473                         return;
5474                 goto out;
5475         }
5476
5477         /* this is a message we already processed, do nothing */
5478         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5479                 return;
5480
5481         /*
5482          * until the vf completes a reset it should not be
5483          * allowed to start any configuration.
5484          */
5485
5486         if (msgbuf[0] == E1000_VF_RESET) {
5487                 igb_vf_reset_msg(adapter, vf);
5488                 return;
5489         }
5490
5491         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5492                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5493                         return;
5494                 retval = -1;
5495                 goto out;
5496         }
5497
5498         switch ((msgbuf[0] & 0xFFFF)) {
5499         case E1000_VF_SET_MAC_ADDR:
5500                 retval = -EINVAL;
5501                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5502                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5503                 else
5504                         dev_warn(&pdev->dev,
5505                                  "VF %d attempted to override administratively "
5506                                  "set MAC address\nReload the VF driver to "
5507                                  "resume operations\n", vf);
5508                 break;
5509         case E1000_VF_SET_PROMISC:
5510                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5511                 break;
5512         case E1000_VF_SET_MULTICAST:
5513                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5514                 break;
5515         case E1000_VF_SET_LPE:
5516                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5517                 break;
5518         case E1000_VF_SET_VLAN:
5519                 retval = -1;
5520                 if (vf_data->pf_vlan)
5521                         dev_warn(&pdev->dev,
5522                                  "VF %d attempted to override administratively "
5523                                  "set VLAN tag\nReload the VF driver to "
5524                                  "resume operations\n", vf);
5525                 else
5526                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5527                 break;
5528         default:
5529                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5530                 retval = -1;
5531                 break;
5532         }
5533
5534         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5535 out:
5536         /* notify the VF of the results of what it sent us */
5537         if (retval)
5538                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5539         else
5540                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5541
5542         igb_write_mbx(hw, msgbuf, 1, vf);
5543 }
5544
5545 static void igb_msg_task(struct igb_adapter *adapter)
5546 {
5547         struct e1000_hw *hw = &adapter->hw;
5548         u32 vf;
5549
5550         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5551                 /* process any reset requests */
5552                 if (!igb_check_for_rst(hw, vf))
5553                         igb_vf_reset_event(adapter, vf);
5554
5555                 /* process any messages pending */
5556                 if (!igb_check_for_msg(hw, vf))
5557                         igb_rcv_msg_from_vf(adapter, vf);
5558
5559                 /* process any acks */
5560                 if (!igb_check_for_ack(hw, vf))
5561                         igb_rcv_ack_from_vf(adapter, vf);
5562         }
5563 }
5564
5565 /**
5566  *  igb_set_uta - Set unicast filter table address
5567  *  @adapter: board private structure
5568  *
5569  *  The unicast table address is a register array of 32-bit registers.
5570  *  The table is meant to be used in a way similar to how the MTA is used
5571  *  however due to certain limitations in the hardware it is necessary to
5572  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5573  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5574  **/
5575 static void igb_set_uta(struct igb_adapter *adapter)
5576 {
5577         struct e1000_hw *hw = &adapter->hw;
5578         int i;
5579
5580         /* The UTA table only exists on 82576 hardware and newer */
5581         if (hw->mac.type < e1000_82576)
5582                 return;
5583
5584         /* we only need to do this if VMDq is enabled */
5585         if (!adapter->vfs_allocated_count)
5586                 return;
5587
5588         for (i = 0; i < hw->mac.uta_reg_count; i++)
5589                 array_wr32(E1000_UTA, i, ~0);
5590 }
5591
5592 /**
5593  * igb_intr_msi - Interrupt Handler
5594  * @irq: interrupt number
5595  * @data: pointer to a network interface device structure
5596  **/
5597 static irqreturn_t igb_intr_msi(int irq, void *data)
5598 {
5599         struct igb_adapter *adapter = data;
5600         struct igb_q_vector *q_vector = adapter->q_vector[0];
5601         struct e1000_hw *hw = &adapter->hw;
5602         /* read ICR disables interrupts using IAM */
5603         u32 icr = rd32(E1000_ICR);
5604
5605         igb_write_itr(q_vector);
5606
5607         if (icr & E1000_ICR_DRSTA)
5608                 schedule_work(&adapter->reset_task);
5609
5610         if (icr & E1000_ICR_DOUTSYNC) {
5611                 /* HW is reporting DMA is out of sync */
5612                 adapter->stats.doosync++;
5613         }
5614
5615         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5616                 hw->mac.get_link_status = 1;
5617                 if (!test_bit(__IGB_DOWN, &adapter->state))
5618                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5619         }
5620
5621         napi_schedule(&q_vector->napi);
5622
5623         return IRQ_HANDLED;
5624 }
5625
5626 /**
5627  * igb_intr - Legacy Interrupt Handler
5628  * @irq: interrupt number
5629  * @data: pointer to a network interface device structure
5630  **/
5631 static irqreturn_t igb_intr(int irq, void *data)
5632 {
5633         struct igb_adapter *adapter = data;
5634         struct igb_q_vector *q_vector = adapter->q_vector[0];
5635         struct e1000_hw *hw = &adapter->hw;
5636         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5637          * need for the IMC write */
5638         u32 icr = rd32(E1000_ICR);
5639
5640         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5641          * not set, then the adapter didn't send an interrupt */
5642         if (!(icr & E1000_ICR_INT_ASSERTED))
5643                 return IRQ_NONE;
5644
5645         igb_write_itr(q_vector);
5646
5647         if (icr & E1000_ICR_DRSTA)
5648                 schedule_work(&adapter->reset_task);
5649
5650         if (icr & E1000_ICR_DOUTSYNC) {
5651                 /* HW is reporting DMA is out of sync */
5652                 adapter->stats.doosync++;
5653         }
5654
5655         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5656                 hw->mac.get_link_status = 1;
5657                 /* guard against interrupt when we're going down */
5658                 if (!test_bit(__IGB_DOWN, &adapter->state))
5659                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5660         }
5661
5662         napi_schedule(&q_vector->napi);
5663
5664         return IRQ_HANDLED;
5665 }
5666
5667 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5668 {
5669         struct igb_adapter *adapter = q_vector->adapter;
5670         struct e1000_hw *hw = &adapter->hw;
5671
5672         if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5673             (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5674                 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5675                         igb_set_itr(q_vector);
5676                 else
5677                         igb_update_ring_itr(q_vector);
5678         }
5679
5680         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5681                 if (adapter->msix_entries)
5682                         wr32(E1000_EIMS, q_vector->eims_value);
5683                 else
5684                         igb_irq_enable(adapter);
5685         }
5686 }
5687
5688 /**
5689  * igb_poll - NAPI Rx polling callback
5690  * @napi: napi polling structure
5691  * @budget: count of how many packets we should handle
5692  **/
5693 static int igb_poll(struct napi_struct *napi, int budget)
5694 {
5695         struct igb_q_vector *q_vector = container_of(napi,
5696                                                      struct igb_q_vector,
5697                                                      napi);
5698         bool clean_complete = true;
5699
5700 #ifdef CONFIG_IGB_DCA
5701         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5702                 igb_update_dca(q_vector);
5703 #endif
5704         if (q_vector->tx.ring)
5705                 clean_complete = igb_clean_tx_irq(q_vector);
5706
5707         if (q_vector->rx.ring)
5708                 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5709
5710         /* If all work not completed, return budget and keep polling */
5711         if (!clean_complete)
5712                 return budget;
5713
5714         /* If not enough Rx work done, exit the polling mode */
5715         napi_complete(napi);
5716         igb_ring_irq_enable(q_vector);
5717
5718         return 0;
5719 }
5720
5721 /**
5722  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5723  * @adapter: board private structure
5724  * @shhwtstamps: timestamp structure to update
5725  * @regval: unsigned 64bit system time value.
5726  *
5727  * We need to convert the system time value stored in the RX/TXSTMP registers
5728  * into a hwtstamp which can be used by the upper level timestamping functions
5729  */
5730 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5731                                    struct skb_shared_hwtstamps *shhwtstamps,
5732                                    u64 regval)
5733 {
5734         u64 ns;
5735
5736         /*
5737          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5738          * 24 to match clock shift we setup earlier.
5739          */
5740         if (adapter->hw.mac.type >= e1000_82580)
5741                 regval <<= IGB_82580_TSYNC_SHIFT;
5742
5743         ns = timecounter_cyc2time(&adapter->clock, regval);
5744         timecompare_update(&adapter->compare, ns);
5745         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5746         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5747         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5748 }
5749
5750 /**
5751  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5752  * @q_vector: pointer to q_vector containing needed info
5753  * @buffer: pointer to igb_tx_buffer structure
5754  *
5755  * If we were asked to do hardware stamping and such a time stamp is
5756  * available, then it must have been for this skb here because we only
5757  * allow only one such packet into the queue.
5758  */
5759 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5760                             struct igb_tx_buffer *buffer_info)
5761 {
5762         struct igb_adapter *adapter = q_vector->adapter;
5763         struct e1000_hw *hw = &adapter->hw;
5764         struct skb_shared_hwtstamps shhwtstamps;
5765         u64 regval;
5766
5767         /* if skb does not support hw timestamp or TX stamp not valid exit */
5768         if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5769             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5770                 return;
5771
5772         regval = rd32(E1000_TXSTMPL);
5773         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5774
5775         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5776         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5777 }
5778
5779 /**
5780  * igb_clean_tx_irq - Reclaim resources after transmit completes
5781  * @q_vector: pointer to q_vector containing needed info
5782  * returns true if ring is completely cleaned
5783  **/
5784 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5785 {
5786         struct igb_adapter *adapter = q_vector->adapter;
5787         struct igb_ring *tx_ring = q_vector->tx.ring;
5788         struct igb_tx_buffer *tx_buffer;
5789         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5790         unsigned int total_bytes = 0, total_packets = 0;
5791         unsigned int budget = q_vector->tx.work_limit;
5792         unsigned int i = tx_ring->next_to_clean;
5793
5794         if (test_bit(__IGB_DOWN, &adapter->state))
5795                 return true;
5796
5797         tx_buffer = &tx_ring->tx_buffer_info[i];
5798         tx_desc = IGB_TX_DESC(tx_ring, i);
5799         i -= tx_ring->count;
5800
5801         for (; budget; budget--) {
5802                 eop_desc = tx_buffer->next_to_watch;
5803
5804                 /* prevent any other reads prior to eop_desc */
5805                 rmb();
5806
5807                 /* if next_to_watch is not set then there is no work pending */
5808                 if (!eop_desc)
5809                         break;
5810
5811                 /* if DD is not set pending work has not been completed */
5812                 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5813                         break;
5814
5815                 /* clear next_to_watch to prevent false hangs */
5816                 tx_buffer->next_to_watch = NULL;
5817
5818                 /* update the statistics for this packet */
5819                 total_bytes += tx_buffer->bytecount;
5820                 total_packets += tx_buffer->gso_segs;
5821
5822                 /* retrieve hardware timestamp */
5823                 igb_tx_hwtstamp(q_vector, tx_buffer);
5824
5825                 /* free the skb */
5826                 dev_kfree_skb_any(tx_buffer->skb);
5827                 tx_buffer->skb = NULL;
5828
5829                 /* unmap skb header data */
5830                 dma_unmap_single(tx_ring->dev,
5831                                  tx_buffer->dma,
5832                                  tx_buffer->length,
5833                                  DMA_TO_DEVICE);
5834
5835                 /* clear last DMA location and unmap remaining buffers */
5836                 while (tx_desc != eop_desc) {
5837                         tx_buffer->dma = 0;
5838
5839                         tx_buffer++;
5840                         tx_desc++;
5841                         i++;
5842                         if (unlikely(!i)) {
5843                                 i -= tx_ring->count;
5844                                 tx_buffer = tx_ring->tx_buffer_info;
5845                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
5846                         }
5847
5848                         /* unmap any remaining paged data */
5849                         if (tx_buffer->dma) {
5850                                 dma_unmap_page(tx_ring->dev,
5851                                                tx_buffer->dma,
5852                                                tx_buffer->length,
5853                                                DMA_TO_DEVICE);
5854                         }
5855                 }
5856
5857                 /* clear last DMA location */
5858                 tx_buffer->dma = 0;
5859
5860                 /* move us one more past the eop_desc for start of next pkt */
5861                 tx_buffer++;
5862                 tx_desc++;
5863                 i++;
5864                 if (unlikely(!i)) {
5865                         i -= tx_ring->count;
5866                         tx_buffer = tx_ring->tx_buffer_info;
5867                         tx_desc = IGB_TX_DESC(tx_ring, 0);
5868                 }
5869         }
5870
5871         netdev_tx_completed_queue(txring_txq(tx_ring),
5872                                   total_packets, total_bytes);
5873         i += tx_ring->count;
5874         tx_ring->next_to_clean = i;
5875         u64_stats_update_begin(&tx_ring->tx_syncp);
5876         tx_ring->tx_stats.bytes += total_bytes;
5877         tx_ring->tx_stats.packets += total_packets;
5878         u64_stats_update_end(&tx_ring->tx_syncp);
5879         q_vector->tx.total_bytes += total_bytes;
5880         q_vector->tx.total_packets += total_packets;
5881
5882         if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5883                 struct e1000_hw *hw = &adapter->hw;
5884
5885                 eop_desc = tx_buffer->next_to_watch;
5886
5887                 /* Detect a transmit hang in hardware, this serializes the
5888                  * check with the clearing of time_stamp and movement of i */
5889                 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5890                 if (eop_desc &&
5891                     time_after(jiffies, tx_buffer->time_stamp +
5892                                (adapter->tx_timeout_factor * HZ)) &&
5893                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5894
5895                         /* detected Tx unit hang */
5896                         dev_err(tx_ring->dev,
5897                                 "Detected Tx Unit Hang\n"
5898                                 "  Tx Queue             <%d>\n"
5899                                 "  TDH                  <%x>\n"
5900                                 "  TDT                  <%x>\n"
5901                                 "  next_to_use          <%x>\n"
5902                                 "  next_to_clean        <%x>\n"
5903                                 "buffer_info[next_to_clean]\n"
5904                                 "  time_stamp           <%lx>\n"
5905                                 "  next_to_watch        <%p>\n"
5906                                 "  jiffies              <%lx>\n"
5907                                 "  desc.status          <%x>\n",
5908                                 tx_ring->queue_index,
5909                                 rd32(E1000_TDH(tx_ring->reg_idx)),
5910                                 readl(tx_ring->tail),
5911                                 tx_ring->next_to_use,
5912                                 tx_ring->next_to_clean,
5913                                 tx_buffer->time_stamp,
5914                                 eop_desc,
5915                                 jiffies,
5916                                 eop_desc->wb.status);
5917                         netif_stop_subqueue(tx_ring->netdev,
5918                                             tx_ring->queue_index);
5919
5920                         /* we are about to reset, no point in enabling stuff */
5921                         return true;
5922                 }
5923         }
5924
5925         if (unlikely(total_packets &&
5926                      netif_carrier_ok(tx_ring->netdev) &&
5927                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5928                 /* Make sure that anybody stopping the queue after this
5929                  * sees the new next_to_clean.
5930                  */
5931                 smp_mb();
5932                 if (__netif_subqueue_stopped(tx_ring->netdev,
5933                                              tx_ring->queue_index) &&
5934                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5935                         netif_wake_subqueue(tx_ring->netdev,
5936                                             tx_ring->queue_index);
5937
5938                         u64_stats_update_begin(&tx_ring->tx_syncp);
5939                         tx_ring->tx_stats.restart_queue++;
5940                         u64_stats_update_end(&tx_ring->tx_syncp);
5941                 }
5942         }
5943
5944         return !!budget;
5945 }
5946
5947 static inline void igb_rx_checksum(struct igb_ring *ring,
5948                                    union e1000_adv_rx_desc *rx_desc,
5949                                    struct sk_buff *skb)
5950 {
5951         skb_checksum_none_assert(skb);
5952
5953         /* Ignore Checksum bit is set */
5954         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5955                 return;
5956
5957         /* Rx checksum disabled via ethtool */
5958         if (!(ring->netdev->features & NETIF_F_RXCSUM))
5959                 return;
5960
5961         /* TCP/UDP checksum error bit is set */
5962         if (igb_test_staterr(rx_desc,
5963                              E1000_RXDEXT_STATERR_TCPE |
5964                              E1000_RXDEXT_STATERR_IPE)) {
5965                 /*
5966                  * work around errata with sctp packets where the TCPE aka
5967                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5968                  * packets, (aka let the stack check the crc32c)
5969                  */
5970                 if (!((skb->len == 60) &&
5971                       test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5972                         u64_stats_update_begin(&ring->rx_syncp);
5973                         ring->rx_stats.csum_err++;
5974                         u64_stats_update_end(&ring->rx_syncp);
5975                 }
5976                 /* let the stack verify checksum errors */
5977                 return;
5978         }
5979         /* It must be a TCP or UDP packet with a valid checksum */
5980         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5981                                       E1000_RXD_STAT_UDPCS))
5982                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5983
5984         dev_dbg(ring->dev, "cksum success: bits %08X\n",
5985                 le32_to_cpu(rx_desc->wb.upper.status_error));
5986 }
5987
5988 static inline void igb_rx_hash(struct igb_ring *ring,
5989                                union e1000_adv_rx_desc *rx_desc,
5990                                struct sk_buff *skb)
5991 {
5992         if (ring->netdev->features & NETIF_F_RXHASH)
5993                 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5994 }
5995
5996 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5997                             union e1000_adv_rx_desc *rx_desc,
5998                             struct sk_buff *skb)
5999 {
6000         struct igb_adapter *adapter = q_vector->adapter;
6001         struct e1000_hw *hw = &adapter->hw;
6002         u64 regval;
6003
6004         if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
6005                                        E1000_RXDADV_STAT_TS))
6006                 return;
6007
6008         /*
6009          * If this bit is set, then the RX registers contain the time stamp. No
6010          * other packet will be time stamped until we read these registers, so
6011          * read the registers to make them available again. Because only one
6012          * packet can be time stamped at a time, we know that the register
6013          * values must belong to this one here and therefore we don't need to
6014          * compare any of the additional attributes stored for it.
6015          *
6016          * If nothing went wrong, then it should have a shared tx_flags that we
6017          * can turn into a skb_shared_hwtstamps.
6018          */
6019         if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
6020                 u32 *stamp = (u32 *)skb->data;
6021                 regval = le32_to_cpu(*(stamp + 2));
6022                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
6023                 skb_pull(skb, IGB_TS_HDR_LEN);
6024         } else {
6025                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
6026                         return;
6027
6028                 regval = rd32(E1000_RXSTMPL);
6029                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
6030         }
6031
6032         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
6033 }
6034
6035 static void igb_rx_vlan(struct igb_ring *ring,
6036                         union e1000_adv_rx_desc *rx_desc,
6037                         struct sk_buff *skb)
6038 {
6039         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6040                 u16 vid;
6041                 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6042                     test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6043                         vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6044                 else
6045                         vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6046
6047                 __vlan_hwaccel_put_tag(skb, vid);
6048         }
6049 }
6050
6051 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6052 {
6053         /* HW will not DMA in data larger than the given buffer, even if it
6054          * parses the (NFS, of course) header to be larger.  In that case, it
6055          * fills the header buffer and spills the rest into the page.
6056          */
6057         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6058                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6059         if (hlen > IGB_RX_HDR_LEN)
6060                 hlen = IGB_RX_HDR_LEN;
6061         return hlen;
6062 }
6063
6064 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6065 {
6066         struct igb_ring *rx_ring = q_vector->rx.ring;
6067         union e1000_adv_rx_desc *rx_desc;
6068         const int current_node = numa_node_id();
6069         unsigned int total_bytes = 0, total_packets = 0;
6070         u16 cleaned_count = igb_desc_unused(rx_ring);
6071         u16 i = rx_ring->next_to_clean;
6072
6073         rx_desc = IGB_RX_DESC(rx_ring, i);
6074
6075         while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6076                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6077                 struct sk_buff *skb = buffer_info->skb;
6078                 union e1000_adv_rx_desc *next_rxd;
6079
6080                 buffer_info->skb = NULL;
6081                 prefetch(skb->data);
6082
6083                 i++;
6084                 if (i == rx_ring->count)
6085                         i = 0;
6086
6087                 next_rxd = IGB_RX_DESC(rx_ring, i);
6088                 prefetch(next_rxd);
6089
6090                 /*
6091                  * This memory barrier is needed to keep us from reading
6092                  * any other fields out of the rx_desc until we know the
6093                  * RXD_STAT_DD bit is set
6094                  */
6095                 rmb();
6096
6097                 if (!skb_is_nonlinear(skb)) {
6098                         __skb_put(skb, igb_get_hlen(rx_desc));
6099                         dma_unmap_single(rx_ring->dev, buffer_info->dma,
6100                                          IGB_RX_HDR_LEN,
6101                                          DMA_FROM_DEVICE);
6102                         buffer_info->dma = 0;
6103                 }
6104
6105                 if (rx_desc->wb.upper.length) {
6106                         u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6107
6108                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6109                                                 buffer_info->page,
6110                                                 buffer_info->page_offset,
6111                                                 length);
6112
6113                         skb->len += length;
6114                         skb->data_len += length;
6115                         skb->truesize += PAGE_SIZE / 2;
6116
6117                         if ((page_count(buffer_info->page) != 1) ||
6118                             (page_to_nid(buffer_info->page) != current_node))
6119                                 buffer_info->page = NULL;
6120                         else
6121                                 get_page(buffer_info->page);
6122
6123                         dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6124                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
6125                         buffer_info->page_dma = 0;
6126                 }
6127
6128                 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6129                         struct igb_rx_buffer *next_buffer;
6130                         next_buffer = &rx_ring->rx_buffer_info[i];
6131                         buffer_info->skb = next_buffer->skb;
6132                         buffer_info->dma = next_buffer->dma;
6133                         next_buffer->skb = skb;
6134                         next_buffer->dma = 0;
6135                         goto next_desc;
6136                 }
6137
6138                 if (unlikely((igb_test_staterr(rx_desc,
6139                                                E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6140                              && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6141                         dev_kfree_skb_any(skb);
6142                         goto next_desc;
6143                 }
6144
6145                 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6146                 igb_rx_hash(rx_ring, rx_desc, skb);
6147                 igb_rx_checksum(rx_ring, rx_desc, skb);
6148                 igb_rx_vlan(rx_ring, rx_desc, skb);
6149
6150                 total_bytes += skb->len;
6151                 total_packets++;
6152
6153                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6154
6155                 napi_gro_receive(&q_vector->napi, skb);
6156
6157                 budget--;
6158 next_desc:
6159                 if (!budget)
6160                         break;
6161
6162                 cleaned_count++;
6163                 /* return some buffers to hardware, one at a time is too slow */
6164                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6165                         igb_alloc_rx_buffers(rx_ring, cleaned_count);
6166                         cleaned_count = 0;
6167                 }
6168
6169                 /* use prefetched values */
6170                 rx_desc = next_rxd;
6171         }
6172
6173         rx_ring->next_to_clean = i;
6174         u64_stats_update_begin(&rx_ring->rx_syncp);
6175         rx_ring->rx_stats.packets += total_packets;
6176         rx_ring->rx_stats.bytes += total_bytes;
6177         u64_stats_update_end(&rx_ring->rx_syncp);
6178         q_vector->rx.total_packets += total_packets;
6179         q_vector->rx.total_bytes += total_bytes;
6180
6181         if (cleaned_count)
6182                 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6183
6184         return !!budget;
6185 }
6186
6187 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6188                                  struct igb_rx_buffer *bi)
6189 {
6190         struct sk_buff *skb = bi->skb;
6191         dma_addr_t dma = bi->dma;
6192
6193         if (dma)
6194                 return true;
6195
6196         if (likely(!skb)) {
6197                 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6198                                                 IGB_RX_HDR_LEN);
6199                 bi->skb = skb;
6200                 if (!skb) {
6201                         rx_ring->rx_stats.alloc_failed++;
6202                         return false;
6203                 }
6204
6205                 /* initialize skb for ring */
6206                 skb_record_rx_queue(skb, rx_ring->queue_index);
6207         }
6208
6209         dma = dma_map_single(rx_ring->dev, skb->data,
6210                              IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6211
6212         if (dma_mapping_error(rx_ring->dev, dma)) {
6213                 rx_ring->rx_stats.alloc_failed++;
6214                 return false;
6215         }
6216
6217         bi->dma = dma;
6218         return true;
6219 }
6220
6221 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6222                                   struct igb_rx_buffer *bi)
6223 {
6224         struct page *page = bi->page;
6225         dma_addr_t page_dma = bi->page_dma;
6226         unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6227
6228         if (page_dma)
6229                 return true;
6230
6231         if (!page) {
6232                 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6233                 bi->page = page;
6234                 if (unlikely(!page)) {
6235                         rx_ring->rx_stats.alloc_failed++;
6236                         return false;
6237                 }
6238         }
6239
6240         page_dma = dma_map_page(rx_ring->dev, page,
6241                                 page_offset, PAGE_SIZE / 2,
6242                                 DMA_FROM_DEVICE);
6243
6244         if (dma_mapping_error(rx_ring->dev, page_dma)) {
6245                 rx_ring->rx_stats.alloc_failed++;
6246                 return false;
6247         }
6248
6249         bi->page_dma = page_dma;
6250         bi->page_offset = page_offset;
6251         return true;
6252 }
6253
6254 /**
6255  * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6256  * @adapter: address of board private structure
6257  **/
6258 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6259 {
6260         union e1000_adv_rx_desc *rx_desc;
6261         struct igb_rx_buffer *bi;
6262         u16 i = rx_ring->next_to_use;
6263
6264         rx_desc = IGB_RX_DESC(rx_ring, i);
6265         bi = &rx_ring->rx_buffer_info[i];
6266         i -= rx_ring->count;
6267
6268         while (cleaned_count--) {
6269                 if (!igb_alloc_mapped_skb(rx_ring, bi))
6270                         break;
6271
6272                 /* Refresh the desc even if buffer_addrs didn't change
6273                  * because each write-back erases this info. */
6274                 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6275
6276                 if (!igb_alloc_mapped_page(rx_ring, bi))
6277                         break;
6278
6279                 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6280
6281                 rx_desc++;
6282                 bi++;
6283                 i++;
6284                 if (unlikely(!i)) {
6285                         rx_desc = IGB_RX_DESC(rx_ring, 0);
6286                         bi = rx_ring->rx_buffer_info;
6287                         i -= rx_ring->count;
6288                 }
6289
6290                 /* clear the hdr_addr for the next_to_use descriptor */
6291                 rx_desc->read.hdr_addr = 0;
6292         }
6293
6294         i += rx_ring->count;
6295
6296         if (rx_ring->next_to_use != i) {
6297                 rx_ring->next_to_use = i;
6298
6299                 /* Force memory writes to complete before letting h/w
6300                  * know there are new descriptors to fetch.  (Only
6301                  * applicable for weak-ordered memory model archs,
6302                  * such as IA-64). */
6303                 wmb();
6304                 writel(i, rx_ring->tail);
6305         }
6306 }
6307
6308 /**
6309  * igb_mii_ioctl -
6310  * @netdev:
6311  * @ifreq:
6312  * @cmd:
6313  **/
6314 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6315 {
6316         struct igb_adapter *adapter = netdev_priv(netdev);
6317         struct mii_ioctl_data *data = if_mii(ifr);
6318
6319         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6320                 return -EOPNOTSUPP;
6321
6322         switch (cmd) {
6323         case SIOCGMIIPHY:
6324                 data->phy_id = adapter->hw.phy.addr;
6325                 break;
6326         case SIOCGMIIREG:
6327                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6328                                      &data->val_out))
6329                         return -EIO;
6330                 break;
6331         case SIOCSMIIREG:
6332         default:
6333                 return -EOPNOTSUPP;
6334         }
6335         return 0;
6336 }
6337
6338 /**
6339  * igb_hwtstamp_ioctl - control hardware time stamping
6340  * @netdev:
6341  * @ifreq:
6342  * @cmd:
6343  *
6344  * Outgoing time stamping can be enabled and disabled. Play nice and
6345  * disable it when requested, although it shouldn't case any overhead
6346  * when no packet needs it. At most one packet in the queue may be
6347  * marked for time stamping, otherwise it would be impossible to tell
6348  * for sure to which packet the hardware time stamp belongs.
6349  *
6350  * Incoming time stamping has to be configured via the hardware
6351  * filters. Not all combinations are supported, in particular event
6352  * type has to be specified. Matching the kind of event packet is
6353  * not supported, with the exception of "all V2 events regardless of
6354  * level 2 or 4".
6355  *
6356  **/
6357 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6358                               struct ifreq *ifr, int cmd)
6359 {
6360         struct igb_adapter *adapter = netdev_priv(netdev);
6361         struct e1000_hw *hw = &adapter->hw;
6362         struct hwtstamp_config config;
6363         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6364         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6365         u32 tsync_rx_cfg = 0;
6366         bool is_l4 = false;
6367         bool is_l2 = false;
6368         u32 regval;
6369
6370         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6371                 return -EFAULT;
6372
6373         /* reserved for future extensions */
6374         if (config.flags)
6375                 return -EINVAL;
6376
6377         switch (config.tx_type) {
6378         case HWTSTAMP_TX_OFF:
6379                 tsync_tx_ctl = 0;
6380         case HWTSTAMP_TX_ON:
6381                 break;
6382         default:
6383                 return -ERANGE;
6384         }
6385
6386         switch (config.rx_filter) {
6387         case HWTSTAMP_FILTER_NONE:
6388                 tsync_rx_ctl = 0;
6389                 break;
6390         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6391         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6392         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6393         case HWTSTAMP_FILTER_ALL:
6394                 /*
6395                  * register TSYNCRXCFG must be set, therefore it is not
6396                  * possible to time stamp both Sync and Delay_Req messages
6397                  * => fall back to time stamping all packets
6398                  */
6399                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6400                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6401                 break;
6402         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6403                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6404                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6405                 is_l4 = true;
6406                 break;
6407         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6408                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6409                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6410                 is_l4 = true;
6411                 break;
6412         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6413         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6414                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6415                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6416                 is_l2 = true;
6417                 is_l4 = true;
6418                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6419                 break;
6420         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6421         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6422                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6423                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6424                 is_l2 = true;
6425                 is_l4 = true;
6426                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6427                 break;
6428         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6429         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6430         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6431                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6432                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6433                 is_l2 = true;
6434                 is_l4 = true;
6435                 break;
6436         default:
6437                 return -ERANGE;
6438         }
6439
6440         if (hw->mac.type == e1000_82575) {
6441                 if (tsync_rx_ctl | tsync_tx_ctl)
6442                         return -EINVAL;
6443                 return 0;
6444         }
6445
6446         /*
6447          * Per-packet timestamping only works if all packets are
6448          * timestamped, so enable timestamping in all packets as
6449          * long as one rx filter was configured.
6450          */
6451         if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6452                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6453                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6454         }
6455
6456         /* enable/disable TX */
6457         regval = rd32(E1000_TSYNCTXCTL);
6458         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6459         regval |= tsync_tx_ctl;
6460         wr32(E1000_TSYNCTXCTL, regval);
6461
6462         /* enable/disable RX */
6463         regval = rd32(E1000_TSYNCRXCTL);
6464         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6465         regval |= tsync_rx_ctl;
6466         wr32(E1000_TSYNCRXCTL, regval);
6467
6468         /* define which PTP packets are time stamped */
6469         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6470
6471         /* define ethertype filter for timestamped packets */
6472         if (is_l2)
6473                 wr32(E1000_ETQF(3),
6474                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6475                                  E1000_ETQF_1588 | /* enable timestamping */
6476                                  ETH_P_1588));     /* 1588 eth protocol type */
6477         else
6478                 wr32(E1000_ETQF(3), 0);
6479
6480 #define PTP_PORT 319
6481         /* L4 Queue Filter[3]: filter by destination port and protocol */
6482         if (is_l4) {
6483                 u32 ftqf = (IPPROTO_UDP /* UDP */
6484                         | E1000_FTQF_VF_BP /* VF not compared */
6485                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6486                         | E1000_FTQF_MASK); /* mask all inputs */
6487                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6488
6489                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6490                 wr32(E1000_IMIREXT(3),
6491                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6492                 if (hw->mac.type == e1000_82576) {
6493                         /* enable source port check */
6494                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6495                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6496                 }
6497                 wr32(E1000_FTQF(3), ftqf);
6498         } else {
6499                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6500         }
6501         wrfl();
6502
6503         adapter->hwtstamp_config = config;
6504
6505         /* clear TX/RX time stamp registers, just to be sure */
6506         regval = rd32(E1000_TXSTMPH);
6507         regval = rd32(E1000_RXSTMPH);
6508
6509         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6510                 -EFAULT : 0;
6511 }
6512
6513 /**
6514  * igb_ioctl -
6515  * @netdev:
6516  * @ifreq:
6517  * @cmd:
6518  **/
6519 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6520 {
6521         switch (cmd) {
6522         case SIOCGMIIPHY:
6523         case SIOCGMIIREG:
6524         case SIOCSMIIREG:
6525                 return igb_mii_ioctl(netdev, ifr, cmd);
6526         case SIOCSHWTSTAMP:
6527                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6528         default:
6529                 return -EOPNOTSUPP;
6530         }
6531 }
6532
6533 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6534 {
6535         struct igb_adapter *adapter = hw->back;
6536         u16 cap_offset;
6537
6538         cap_offset = adapter->pdev->pcie_cap;
6539         if (!cap_offset)
6540                 return -E1000_ERR_CONFIG;
6541
6542         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6543
6544         return 0;
6545 }
6546
6547 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6548 {
6549         struct igb_adapter *adapter = hw->back;
6550         u16 cap_offset;
6551
6552         cap_offset = adapter->pdev->pcie_cap;
6553         if (!cap_offset)
6554                 return -E1000_ERR_CONFIG;
6555
6556         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6557
6558         return 0;
6559 }
6560
6561 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6562 {
6563         struct igb_adapter *adapter = netdev_priv(netdev);
6564         struct e1000_hw *hw = &adapter->hw;
6565         u32 ctrl, rctl;
6566         bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6567
6568         if (enable) {
6569                 /* enable VLAN tag insert/strip */
6570                 ctrl = rd32(E1000_CTRL);
6571                 ctrl |= E1000_CTRL_VME;
6572                 wr32(E1000_CTRL, ctrl);
6573
6574                 /* Disable CFI check */
6575                 rctl = rd32(E1000_RCTL);
6576                 rctl &= ~E1000_RCTL_CFIEN;
6577                 wr32(E1000_RCTL, rctl);
6578         } else {
6579                 /* disable VLAN tag insert/strip */
6580                 ctrl = rd32(E1000_CTRL);
6581                 ctrl &= ~E1000_CTRL_VME;
6582                 wr32(E1000_CTRL, ctrl);
6583         }
6584
6585         igb_rlpml_set(adapter);
6586 }
6587
6588 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6589 {
6590         struct igb_adapter *adapter = netdev_priv(netdev);
6591         struct e1000_hw *hw = &adapter->hw;
6592         int pf_id = adapter->vfs_allocated_count;
6593
6594         /* attempt to add filter to vlvf array */
6595         igb_vlvf_set(adapter, vid, true, pf_id);
6596
6597         /* add the filter since PF can receive vlans w/o entry in vlvf */
6598         igb_vfta_set(hw, vid, true);
6599
6600         set_bit(vid, adapter->active_vlans);
6601
6602         return 0;
6603 }
6604
6605 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6606 {
6607         struct igb_adapter *adapter = netdev_priv(netdev);
6608         struct e1000_hw *hw = &adapter->hw;
6609         int pf_id = adapter->vfs_allocated_count;
6610         s32 err;
6611
6612         /* remove vlan from VLVF table array */
6613         err = igb_vlvf_set(adapter, vid, false, pf_id);
6614
6615         /* if vid was not present in VLVF just remove it from table */
6616         if (err)
6617                 igb_vfta_set(hw, vid, false);
6618
6619         clear_bit(vid, adapter->active_vlans);
6620
6621         return 0;
6622 }
6623
6624 static void igb_restore_vlan(struct igb_adapter *adapter)
6625 {
6626         u16 vid;
6627
6628         igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6629
6630         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6631                 igb_vlan_rx_add_vid(adapter->netdev, vid);
6632 }
6633
6634 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6635 {
6636         struct pci_dev *pdev = adapter->pdev;
6637         struct e1000_mac_info *mac = &adapter->hw.mac;
6638
6639         mac->autoneg = 0;
6640
6641         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6642          * for the switch() below to work */
6643         if ((spd & 1) || (dplx & ~1))
6644                 goto err_inval;
6645
6646         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6647         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6648             spd != SPEED_1000 &&
6649             dplx != DUPLEX_FULL)
6650                 goto err_inval;
6651
6652         switch (spd + dplx) {
6653         case SPEED_10 + DUPLEX_HALF:
6654                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6655                 break;
6656         case SPEED_10 + DUPLEX_FULL:
6657                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6658                 break;
6659         case SPEED_100 + DUPLEX_HALF:
6660                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6661                 break;
6662         case SPEED_100 + DUPLEX_FULL:
6663                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6664                 break;
6665         case SPEED_1000 + DUPLEX_FULL:
6666                 mac->autoneg = 1;
6667                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6668                 break;
6669         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6670         default:
6671                 goto err_inval;
6672         }
6673         return 0;
6674
6675 err_inval:
6676         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6677         return -EINVAL;
6678 }
6679
6680 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6681                           bool runtime)
6682 {
6683         struct net_device *netdev = pci_get_drvdata(pdev);
6684         struct igb_adapter *adapter = netdev_priv(netdev);
6685         struct e1000_hw *hw = &adapter->hw;
6686         u32 ctrl, rctl, status;
6687         u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6688 #ifdef CONFIG_PM
6689         int retval = 0;
6690 #endif
6691
6692         netif_device_detach(netdev);
6693
6694         if (netif_running(netdev))
6695                 __igb_close(netdev, true);
6696
6697         igb_clear_interrupt_scheme(adapter);
6698
6699 #ifdef CONFIG_PM
6700         retval = pci_save_state(pdev);
6701         if (retval)
6702                 return retval;
6703 #endif
6704
6705         status = rd32(E1000_STATUS);
6706         if (status & E1000_STATUS_LU)
6707                 wufc &= ~E1000_WUFC_LNKC;
6708
6709         if (wufc) {
6710                 igb_setup_rctl(adapter);
6711                 igb_set_rx_mode(netdev);
6712
6713                 /* turn on all-multi mode if wake on multicast is enabled */
6714                 if (wufc & E1000_WUFC_MC) {
6715                         rctl = rd32(E1000_RCTL);
6716                         rctl |= E1000_RCTL_MPE;
6717                         wr32(E1000_RCTL, rctl);
6718                 }
6719
6720                 ctrl = rd32(E1000_CTRL);
6721                 /* advertise wake from D3Cold */
6722                 #define E1000_CTRL_ADVD3WUC 0x00100000
6723                 /* phy power management enable */
6724                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6725                 ctrl |= E1000_CTRL_ADVD3WUC;
6726                 wr32(E1000_CTRL, ctrl);
6727
6728                 /* Allow time for pending master requests to run */
6729                 igb_disable_pcie_master(hw);
6730
6731                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6732                 wr32(E1000_WUFC, wufc);
6733         } else {
6734                 wr32(E1000_WUC, 0);
6735                 wr32(E1000_WUFC, 0);
6736         }
6737
6738         *enable_wake = wufc || adapter->en_mng_pt;
6739         if (!*enable_wake)
6740                 igb_power_down_link(adapter);
6741         else
6742                 igb_power_up_link(adapter);
6743
6744         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6745          * would have already happened in close and is redundant. */
6746         igb_release_hw_control(adapter);
6747
6748         pci_disable_device(pdev);
6749
6750         return 0;
6751 }
6752
6753 #ifdef CONFIG_PM
6754 #ifdef CONFIG_PM_SLEEP
6755 static int igb_suspend(struct device *dev)
6756 {
6757         int retval;
6758         bool wake;
6759         struct pci_dev *pdev = to_pci_dev(dev);
6760
6761         retval = __igb_shutdown(pdev, &wake, 0);
6762         if (retval)
6763                 return retval;
6764
6765         if (wake) {
6766                 pci_prepare_to_sleep(pdev);
6767         } else {
6768                 pci_wake_from_d3(pdev, false);
6769                 pci_set_power_state(pdev, PCI_D3hot);
6770         }
6771
6772         return 0;
6773 }
6774 #endif /* CONFIG_PM_SLEEP */
6775
6776 static int igb_resume(struct device *dev)
6777 {
6778         struct pci_dev *pdev = to_pci_dev(dev);
6779         struct net_device *netdev = pci_get_drvdata(pdev);
6780         struct igb_adapter *adapter = netdev_priv(netdev);
6781         struct e1000_hw *hw = &adapter->hw;
6782         u32 err;
6783
6784         pci_set_power_state(pdev, PCI_D0);
6785         pci_restore_state(pdev);
6786         pci_save_state(pdev);
6787
6788         err = pci_enable_device_mem(pdev);
6789         if (err) {
6790                 dev_err(&pdev->dev,
6791                         "igb: Cannot enable PCI device from suspend\n");
6792                 return err;
6793         }
6794         pci_set_master(pdev);
6795
6796         pci_enable_wake(pdev, PCI_D3hot, 0);
6797         pci_enable_wake(pdev, PCI_D3cold, 0);
6798
6799         if (!rtnl_is_locked()) {
6800                 /*
6801                  * shut up ASSERT_RTNL() warning in
6802                  * netif_set_real_num_tx/rx_queues.
6803                  */
6804                 rtnl_lock();
6805                 err = igb_init_interrupt_scheme(adapter);
6806                 rtnl_unlock();
6807         } else {
6808                 err = igb_init_interrupt_scheme(adapter);
6809         }
6810         if (err) {
6811                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6812                 return -ENOMEM;
6813         }
6814
6815         igb_reset(adapter);
6816
6817         /* let the f/w know that the h/w is now under the control of the
6818          * driver. */
6819         igb_get_hw_control(adapter);
6820
6821         wr32(E1000_WUS, ~0);
6822
6823         if (netdev->flags & IFF_UP) {
6824                 err = __igb_open(netdev, true);
6825                 if (err)
6826                         return err;
6827         }
6828
6829         netif_device_attach(netdev);
6830         return 0;
6831 }
6832
6833 #ifdef CONFIG_PM_RUNTIME
6834 static int igb_runtime_idle(struct device *dev)
6835 {
6836         struct pci_dev *pdev = to_pci_dev(dev);
6837         struct net_device *netdev = pci_get_drvdata(pdev);
6838         struct igb_adapter *adapter = netdev_priv(netdev);
6839
6840         if (!igb_has_link(adapter))
6841                 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6842
6843         return -EBUSY;
6844 }
6845
6846 static int igb_runtime_suspend(struct device *dev)
6847 {
6848         struct pci_dev *pdev = to_pci_dev(dev);
6849         int retval;
6850         bool wake;
6851
6852         retval = __igb_shutdown(pdev, &wake, 1);
6853         if (retval)
6854                 return retval;
6855
6856         if (wake) {
6857                 pci_prepare_to_sleep(pdev);
6858         } else {
6859                 pci_wake_from_d3(pdev, false);
6860                 pci_set_power_state(pdev, PCI_D3hot);
6861         }
6862
6863         return 0;
6864 }
6865
6866 static int igb_runtime_resume(struct device *dev)
6867 {
6868         return igb_resume(dev);
6869 }
6870 #endif /* CONFIG_PM_RUNTIME */
6871 #endif
6872
6873 static void igb_shutdown(struct pci_dev *pdev)
6874 {
6875         bool wake;
6876
6877         __igb_shutdown(pdev, &wake, 0);
6878
6879         if (system_state == SYSTEM_POWER_OFF) {
6880                 pci_wake_from_d3(pdev, wake);
6881                 pci_set_power_state(pdev, PCI_D3hot);
6882         }
6883 }
6884
6885 #ifdef CONFIG_NET_POLL_CONTROLLER
6886 /*
6887  * Polling 'interrupt' - used by things like netconsole to send skbs
6888  * without having to re-enable interrupts. It's not called while
6889  * the interrupt routine is executing.
6890  */
6891 static void igb_netpoll(struct net_device *netdev)
6892 {
6893         struct igb_adapter *adapter = netdev_priv(netdev);
6894         struct e1000_hw *hw = &adapter->hw;
6895         struct igb_q_vector *q_vector;
6896         int i;
6897
6898         for (i = 0; i < adapter->num_q_vectors; i++) {
6899                 q_vector = adapter->q_vector[i];
6900                 if (adapter->msix_entries)
6901                         wr32(E1000_EIMC, q_vector->eims_value);
6902                 else
6903                         igb_irq_disable(adapter);
6904                 napi_schedule(&q_vector->napi);
6905         }
6906 }
6907 #endif /* CONFIG_NET_POLL_CONTROLLER */
6908
6909 /**
6910  * igb_io_error_detected - called when PCI error is detected
6911  * @pdev: Pointer to PCI device
6912  * @state: The current pci connection state
6913  *
6914  * This function is called after a PCI bus error affecting
6915  * this device has been detected.
6916  */
6917 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6918                                               pci_channel_state_t state)
6919 {
6920         struct net_device *netdev = pci_get_drvdata(pdev);
6921         struct igb_adapter *adapter = netdev_priv(netdev);
6922
6923         netif_device_detach(netdev);
6924
6925         if (state == pci_channel_io_perm_failure)
6926                 return PCI_ERS_RESULT_DISCONNECT;
6927
6928         if (netif_running(netdev))
6929                 igb_down(adapter);
6930         pci_disable_device(pdev);
6931
6932         /* Request a slot slot reset. */
6933         return PCI_ERS_RESULT_NEED_RESET;
6934 }
6935
6936 /**
6937  * igb_io_slot_reset - called after the pci bus has been reset.
6938  * @pdev: Pointer to PCI device
6939  *
6940  * Restart the card from scratch, as if from a cold-boot. Implementation
6941  * resembles the first-half of the igb_resume routine.
6942  */
6943 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6944 {
6945         struct net_device *netdev = pci_get_drvdata(pdev);
6946         struct igb_adapter *adapter = netdev_priv(netdev);
6947         struct e1000_hw *hw = &adapter->hw;
6948         pci_ers_result_t result;
6949         int err;
6950
6951         if (pci_enable_device_mem(pdev)) {
6952                 dev_err(&pdev->dev,
6953                         "Cannot re-enable PCI device after reset.\n");
6954                 result = PCI_ERS_RESULT_DISCONNECT;
6955         } else {
6956                 pci_set_master(pdev);
6957                 pci_restore_state(pdev);
6958                 pci_save_state(pdev);
6959
6960                 pci_enable_wake(pdev, PCI_D3hot, 0);
6961                 pci_enable_wake(pdev, PCI_D3cold, 0);
6962
6963                 igb_reset(adapter);
6964                 wr32(E1000_WUS, ~0);
6965                 result = PCI_ERS_RESULT_RECOVERED;
6966         }
6967
6968         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6969         if (err) {
6970                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6971                         "failed 0x%0x\n", err);
6972                 /* non-fatal, continue */
6973         }
6974
6975         return result;
6976 }
6977
6978 /**
6979  * igb_io_resume - called when traffic can start flowing again.
6980  * @pdev: Pointer to PCI device
6981  *
6982  * This callback is called when the error recovery driver tells us that
6983  * its OK to resume normal operation. Implementation resembles the
6984  * second-half of the igb_resume routine.
6985  */
6986 static void igb_io_resume(struct pci_dev *pdev)
6987 {
6988         struct net_device *netdev = pci_get_drvdata(pdev);
6989         struct igb_adapter *adapter = netdev_priv(netdev);
6990
6991         if (netif_running(netdev)) {
6992                 if (igb_up(adapter)) {
6993                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6994                         return;
6995                 }
6996         }
6997
6998         netif_device_attach(netdev);
6999
7000         /* let the f/w know that the h/w is now under the control of the
7001          * driver. */
7002         igb_get_hw_control(adapter);
7003 }
7004
7005 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
7006                              u8 qsel)
7007 {
7008         u32 rar_low, rar_high;
7009         struct e1000_hw *hw = &adapter->hw;
7010
7011         /* HW expects these in little endian so we reverse the byte order
7012          * from network order (big endian) to little endian
7013          */
7014         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
7015                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
7016         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
7017
7018         /* Indicate to hardware the Address is Valid. */
7019         rar_high |= E1000_RAH_AV;
7020
7021         if (hw->mac.type == e1000_82575)
7022                 rar_high |= E1000_RAH_POOL_1 * qsel;
7023         else
7024                 rar_high |= E1000_RAH_POOL_1 << qsel;
7025
7026         wr32(E1000_RAL(index), rar_low);
7027         wrfl();
7028         wr32(E1000_RAH(index), rar_high);
7029         wrfl();
7030 }
7031
7032 static int igb_set_vf_mac(struct igb_adapter *adapter,
7033                           int vf, unsigned char *mac_addr)
7034 {
7035         struct e1000_hw *hw = &adapter->hw;
7036         /* VF MAC addresses start at end of receive addresses and moves
7037          * torwards the first, as a result a collision should not be possible */
7038         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7039
7040         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7041
7042         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7043
7044         return 0;
7045 }
7046
7047 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7048 {
7049         struct igb_adapter *adapter = netdev_priv(netdev);
7050         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7051                 return -EINVAL;
7052         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7053         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7054         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7055                                       " change effective.");
7056         if (test_bit(__IGB_DOWN, &adapter->state)) {
7057                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7058                          " but the PF device is not up.\n");
7059                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7060                          " attempting to use the VF device.\n");
7061         }
7062         return igb_set_vf_mac(adapter, vf, mac);
7063 }
7064
7065 static int igb_link_mbps(int internal_link_speed)
7066 {
7067         switch (internal_link_speed) {
7068         case SPEED_100:
7069                 return 100;
7070         case SPEED_1000:
7071                 return 1000;
7072         default:
7073                 return 0;
7074         }
7075 }
7076
7077 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7078                                   int link_speed)
7079 {
7080         int rf_dec, rf_int;
7081         u32 bcnrc_val;
7082
7083         if (tx_rate != 0) {
7084                 /* Calculate the rate factor values to set */
7085                 rf_int = link_speed / tx_rate;
7086                 rf_dec = (link_speed - (rf_int * tx_rate));
7087                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7088
7089                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7090                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7091                                E1000_RTTBCNRC_RF_INT_MASK);
7092                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7093         } else {
7094                 bcnrc_val = 0;
7095         }
7096
7097         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7098         wr32(E1000_RTTBCNRC, bcnrc_val);
7099 }
7100
7101 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7102 {
7103         int actual_link_speed, i;
7104         bool reset_rate = false;
7105
7106         /* VF TX rate limit was not set or not supported */
7107         if ((adapter->vf_rate_link_speed == 0) ||
7108             (adapter->hw.mac.type != e1000_82576))
7109                 return;
7110
7111         actual_link_speed = igb_link_mbps(adapter->link_speed);
7112         if (actual_link_speed != adapter->vf_rate_link_speed) {
7113                 reset_rate = true;
7114                 adapter->vf_rate_link_speed = 0;
7115                 dev_info(&adapter->pdev->dev,
7116                          "Link speed has been changed. VF Transmit "
7117                          "rate is disabled\n");
7118         }
7119
7120         for (i = 0; i < adapter->vfs_allocated_count; i++) {
7121                 if (reset_rate)
7122                         adapter->vf_data[i].tx_rate = 0;
7123
7124                 igb_set_vf_rate_limit(&adapter->hw, i,
7125                                       adapter->vf_data[i].tx_rate,
7126                                       actual_link_speed);
7127         }
7128 }
7129
7130 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7131 {
7132         struct igb_adapter *adapter = netdev_priv(netdev);
7133         struct e1000_hw *hw = &adapter->hw;
7134         int actual_link_speed;
7135
7136         if (hw->mac.type != e1000_82576)
7137                 return -EOPNOTSUPP;
7138
7139         actual_link_speed = igb_link_mbps(adapter->link_speed);
7140         if ((vf >= adapter->vfs_allocated_count) ||
7141             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7142             (tx_rate < 0) || (tx_rate > actual_link_speed))
7143                 return -EINVAL;
7144
7145         adapter->vf_rate_link_speed = actual_link_speed;
7146         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7147         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7148
7149         return 0;
7150 }
7151
7152 static int igb_ndo_get_vf_config(struct net_device *netdev,
7153                                  int vf, struct ifla_vf_info *ivi)
7154 {
7155         struct igb_adapter *adapter = netdev_priv(netdev);
7156         if (vf >= adapter->vfs_allocated_count)
7157                 return -EINVAL;
7158         ivi->vf = vf;
7159         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7160         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7161         ivi->vlan = adapter->vf_data[vf].pf_vlan;
7162         ivi->qos = adapter->vf_data[vf].pf_qos;
7163         return 0;
7164 }
7165
7166 static void igb_vmm_control(struct igb_adapter *adapter)
7167 {
7168         struct e1000_hw *hw = &adapter->hw;
7169         u32 reg;
7170
7171         switch (hw->mac.type) {
7172         case e1000_82575:
7173         default:
7174                 /* replication is not supported for 82575 */
7175                 return;
7176         case e1000_82576:
7177                 /* notify HW that the MAC is adding vlan tags */
7178                 reg = rd32(E1000_DTXCTL);
7179                 reg |= E1000_DTXCTL_VLAN_ADDED;
7180                 wr32(E1000_DTXCTL, reg);
7181         case e1000_82580:
7182                 /* enable replication vlan tag stripping */
7183                 reg = rd32(E1000_RPLOLR);
7184                 reg |= E1000_RPLOLR_STRVLAN;
7185                 wr32(E1000_RPLOLR, reg);
7186         case e1000_i350:
7187                 /* none of the above registers are supported by i350 */
7188                 break;
7189         }
7190
7191         if (adapter->vfs_allocated_count) {
7192                 igb_vmdq_set_loopback_pf(hw, true);
7193                 igb_vmdq_set_replication_pf(hw, true);
7194                 igb_vmdq_set_anti_spoofing_pf(hw, true,
7195                                                 adapter->vfs_allocated_count);
7196         } else {
7197                 igb_vmdq_set_loopback_pf(hw, false);
7198                 igb_vmdq_set_replication_pf(hw, false);
7199         }
7200 }
7201
7202 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7203 {
7204         struct e1000_hw *hw = &adapter->hw;
7205         u32 dmac_thr;
7206         u16 hwm;
7207
7208         if (hw->mac.type > e1000_82580) {
7209                 if (adapter->flags & IGB_FLAG_DMAC) {
7210                         u32 reg;
7211
7212                         /* force threshold to 0. */
7213                         wr32(E1000_DMCTXTH, 0);
7214
7215                         /*
7216                          * DMA Coalescing high water mark needs to be greater
7217                          * than the Rx threshold. Set hwm to PBA - max frame
7218                          * size in 16B units, capping it at PBA - 6KB.
7219                          */
7220                         hwm = 64 * pba - adapter->max_frame_size / 16;
7221                         if (hwm < 64 * (pba - 6))
7222                                 hwm = 64 * (pba - 6);
7223                         reg = rd32(E1000_FCRTC);
7224                         reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7225                         reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7226                                 & E1000_FCRTC_RTH_COAL_MASK);
7227                         wr32(E1000_FCRTC, reg);
7228
7229                         /*
7230                          * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7231                          * frame size, capping it at PBA - 10KB.
7232                          */
7233                         dmac_thr = pba - adapter->max_frame_size / 512;
7234                         if (dmac_thr < pba - 10)
7235                                 dmac_thr = pba - 10;
7236                         reg = rd32(E1000_DMACR);
7237                         reg &= ~E1000_DMACR_DMACTHR_MASK;
7238                         reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7239                                 & E1000_DMACR_DMACTHR_MASK);
7240
7241                         /* transition to L0x or L1 if available..*/
7242                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7243
7244                         /* watchdog timer= +-1000 usec in 32usec intervals */
7245                         reg |= (1000 >> 5);
7246                         wr32(E1000_DMACR, reg);
7247
7248                         /*
7249                          * no lower threshold to disable
7250                          * coalescing(smart fifb)-UTRESH=0
7251                          */
7252                         wr32(E1000_DMCRTRH, 0);
7253
7254                         reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7255
7256                         wr32(E1000_DMCTLX, reg);
7257
7258                         /*
7259                          * free space in tx packet buffer to wake from
7260                          * DMA coal
7261                          */
7262                         wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7263                              (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7264
7265                         /*
7266                          * make low power state decision controlled
7267                          * by DMA coal
7268                          */
7269                         reg = rd32(E1000_PCIEMISC);
7270                         reg &= ~E1000_PCIEMISC_LX_DECISION;
7271                         wr32(E1000_PCIEMISC, reg);
7272                 } /* endif adapter->dmac is not disabled */
7273         } else if (hw->mac.type == e1000_82580) {
7274                 u32 reg = rd32(E1000_PCIEMISC);
7275                 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7276                 wr32(E1000_DMACR, 0);
7277         }
7278 }
7279
7280 /* igb_main.c */