Merge tag 'md-3.4-fixes' of git://neil.brown.name/md
[linux-flexiantxendom0-3.2.10.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2012 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
61
62 #define MAJ 3
63 #define MIN 2
64 #define BUILD 10
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70                                 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73 static const struct e1000_info *igb_info_tbl[] = {
74         [board_82575] = &e1000_82575_info,
75 };
76
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103         /* required last entry */
104         {0, }
105 };
106
107 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
108
109 void igb_reset(struct igb_adapter *);
110 static int igb_setup_all_tx_resources(struct igb_adapter *);
111 static int igb_setup_all_rx_resources(struct igb_adapter *);
112 static void igb_free_all_tx_resources(struct igb_adapter *);
113 static void igb_free_all_rx_resources(struct igb_adapter *);
114 static void igb_setup_mrqc(struct igb_adapter *);
115 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116 static void __devexit igb_remove(struct pci_dev *pdev);
117 static void igb_init_hw_timer(struct igb_adapter *adapter);
118 static int igb_sw_init(struct igb_adapter *);
119 static int igb_open(struct net_device *);
120 static int igb_close(struct net_device *);
121 static void igb_configure_tx(struct igb_adapter *);
122 static void igb_configure_rx(struct igb_adapter *);
123 static void igb_clean_all_tx_rings(struct igb_adapter *);
124 static void igb_clean_all_rx_rings(struct igb_adapter *);
125 static void igb_clean_tx_ring(struct igb_ring *);
126 static void igb_clean_rx_ring(struct igb_ring *);
127 static void igb_set_rx_mode(struct net_device *);
128 static void igb_update_phy_info(unsigned long);
129 static void igb_watchdog(unsigned long);
130 static void igb_watchdog_task(struct work_struct *);
131 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133                                                  struct rtnl_link_stats64 *stats);
134 static int igb_change_mtu(struct net_device *, int);
135 static int igb_set_mac(struct net_device *, void *);
136 static void igb_set_uta(struct igb_adapter *adapter);
137 static irqreturn_t igb_intr(int irq, void *);
138 static irqreturn_t igb_intr_msi(int irq, void *);
139 static irqreturn_t igb_msix_other(int irq, void *);
140 static irqreturn_t igb_msix_ring(int irq, void *);
141 #ifdef CONFIG_IGB_DCA
142 static void igb_update_dca(struct igb_q_vector *);
143 static void igb_setup_dca(struct igb_adapter *);
144 #endif /* CONFIG_IGB_DCA */
145 static int igb_poll(struct napi_struct *, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector *);
147 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149 static void igb_tx_timeout(struct net_device *);
150 static void igb_reset_task(struct work_struct *);
151 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152 static int igb_vlan_rx_add_vid(struct net_device *, u16);
153 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154 static void igb_restore_vlan(struct igb_adapter *);
155 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156 static void igb_ping_all_vfs(struct igb_adapter *);
157 static void igb_msg_task(struct igb_adapter *);
158 static void igb_vmm_control(struct igb_adapter *);
159 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163                                int vf, u16 vlan, u8 qos);
164 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166                                  struct ifla_vf_info *ivi);
167 static void igb_check_vf_rate_limit(struct igb_adapter *);
168
169 #ifdef CONFIG_PCI_IOV
170 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172 static int igb_check_vf_assignment(struct igb_adapter *adapter);
173 #endif
174
175 #ifdef CONFIG_PM
176 #ifdef CONFIG_PM_SLEEP
177 static int igb_suspend(struct device *);
178 #endif
179 static int igb_resume(struct device *);
180 #ifdef CONFIG_PM_RUNTIME
181 static int igb_runtime_suspend(struct device *dev);
182 static int igb_runtime_resume(struct device *dev);
183 static int igb_runtime_idle(struct device *dev);
184 #endif
185 static const struct dev_pm_ops igb_pm_ops = {
186         SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
187         SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
188                         igb_runtime_idle)
189 };
190 #endif
191 static void igb_shutdown(struct pci_dev *);
192 #ifdef CONFIG_IGB_DCA
193 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
194 static struct notifier_block dca_notifier = {
195         .notifier_call  = igb_notify_dca,
196         .next           = NULL,
197         .priority       = 0
198 };
199 #endif
200 #ifdef CONFIG_NET_POLL_CONTROLLER
201 /* for netdump / net console */
202 static void igb_netpoll(struct net_device *);
203 #endif
204 #ifdef CONFIG_PCI_IOV
205 static unsigned int max_vfs = 0;
206 module_param(max_vfs, uint, 0);
207 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
208                  "per physical function");
209 #endif /* CONFIG_PCI_IOV */
210
211 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
212                      pci_channel_state_t);
213 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
214 static void igb_io_resume(struct pci_dev *);
215
216 static struct pci_error_handlers igb_err_handler = {
217         .error_detected = igb_io_error_detected,
218         .slot_reset = igb_io_slot_reset,
219         .resume = igb_io_resume,
220 };
221
222 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
223
224 static struct pci_driver igb_driver = {
225         .name     = igb_driver_name,
226         .id_table = igb_pci_tbl,
227         .probe    = igb_probe,
228         .remove   = __devexit_p(igb_remove),
229 #ifdef CONFIG_PM
230         .driver.pm = &igb_pm_ops,
231 #endif
232         .shutdown = igb_shutdown,
233         .err_handler = &igb_err_handler
234 };
235
236 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
237 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
238 MODULE_LICENSE("GPL");
239 MODULE_VERSION(DRV_VERSION);
240
241 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
242 static int debug = -1;
243 module_param(debug, int, 0);
244 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
245
246 struct igb_reg_info {
247         u32 ofs;
248         char *name;
249 };
250
251 static const struct igb_reg_info igb_reg_info_tbl[] = {
252
253         /* General Registers */
254         {E1000_CTRL, "CTRL"},
255         {E1000_STATUS, "STATUS"},
256         {E1000_CTRL_EXT, "CTRL_EXT"},
257
258         /* Interrupt Registers */
259         {E1000_ICR, "ICR"},
260
261         /* RX Registers */
262         {E1000_RCTL, "RCTL"},
263         {E1000_RDLEN(0), "RDLEN"},
264         {E1000_RDH(0), "RDH"},
265         {E1000_RDT(0), "RDT"},
266         {E1000_RXDCTL(0), "RXDCTL"},
267         {E1000_RDBAL(0), "RDBAL"},
268         {E1000_RDBAH(0), "RDBAH"},
269
270         /* TX Registers */
271         {E1000_TCTL, "TCTL"},
272         {E1000_TDBAL(0), "TDBAL"},
273         {E1000_TDBAH(0), "TDBAH"},
274         {E1000_TDLEN(0), "TDLEN"},
275         {E1000_TDH(0), "TDH"},
276         {E1000_TDT(0), "TDT"},
277         {E1000_TXDCTL(0), "TXDCTL"},
278         {E1000_TDFH, "TDFH"},
279         {E1000_TDFT, "TDFT"},
280         {E1000_TDFHS, "TDFHS"},
281         {E1000_TDFPC, "TDFPC"},
282
283         /* List Terminator */
284         {}
285 };
286
287 /*
288  * igb_regdump - register printout routine
289  */
290 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
291 {
292         int n = 0;
293         char rname[16];
294         u32 regs[8];
295
296         switch (reginfo->ofs) {
297         case E1000_RDLEN(0):
298                 for (n = 0; n < 4; n++)
299                         regs[n] = rd32(E1000_RDLEN(n));
300                 break;
301         case E1000_RDH(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_RDH(n));
304                 break;
305         case E1000_RDT(0):
306                 for (n = 0; n < 4; n++)
307                         regs[n] = rd32(E1000_RDT(n));
308                 break;
309         case E1000_RXDCTL(0):
310                 for (n = 0; n < 4; n++)
311                         regs[n] = rd32(E1000_RXDCTL(n));
312                 break;
313         case E1000_RDBAL(0):
314                 for (n = 0; n < 4; n++)
315                         regs[n] = rd32(E1000_RDBAL(n));
316                 break;
317         case E1000_RDBAH(0):
318                 for (n = 0; n < 4; n++)
319                         regs[n] = rd32(E1000_RDBAH(n));
320                 break;
321         case E1000_TDBAL(0):
322                 for (n = 0; n < 4; n++)
323                         regs[n] = rd32(E1000_RDBAL(n));
324                 break;
325         case E1000_TDBAH(0):
326                 for (n = 0; n < 4; n++)
327                         regs[n] = rd32(E1000_TDBAH(n));
328                 break;
329         case E1000_TDLEN(0):
330                 for (n = 0; n < 4; n++)
331                         regs[n] = rd32(E1000_TDLEN(n));
332                 break;
333         case E1000_TDH(0):
334                 for (n = 0; n < 4; n++)
335                         regs[n] = rd32(E1000_TDH(n));
336                 break;
337         case E1000_TDT(0):
338                 for (n = 0; n < 4; n++)
339                         regs[n] = rd32(E1000_TDT(n));
340                 break;
341         case E1000_TXDCTL(0):
342                 for (n = 0; n < 4; n++)
343                         regs[n] = rd32(E1000_TXDCTL(n));
344                 break;
345         default:
346                 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
347                 return;
348         }
349
350         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
351         pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
352                 regs[2], regs[3]);
353 }
354
355 /*
356  * igb_dump - Print registers, tx-rings and rx-rings
357  */
358 static void igb_dump(struct igb_adapter *adapter)
359 {
360         struct net_device *netdev = adapter->netdev;
361         struct e1000_hw *hw = &adapter->hw;
362         struct igb_reg_info *reginfo;
363         struct igb_ring *tx_ring;
364         union e1000_adv_tx_desc *tx_desc;
365         struct my_u0 { u64 a; u64 b; } *u0;
366         struct igb_ring *rx_ring;
367         union e1000_adv_rx_desc *rx_desc;
368         u32 staterr;
369         u16 i, n;
370
371         if (!netif_msg_hw(adapter))
372                 return;
373
374         /* Print netdevice Info */
375         if (netdev) {
376                 dev_info(&adapter->pdev->dev, "Net device Info\n");
377                 pr_info("Device Name     state            trans_start      "
378                         "last_rx\n");
379                 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
380                         netdev->state, netdev->trans_start, netdev->last_rx);
381         }
382
383         /* Print Registers */
384         dev_info(&adapter->pdev->dev, "Register Dump\n");
385         pr_info(" Register Name   Value\n");
386         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
387              reginfo->name; reginfo++) {
388                 igb_regdump(hw, reginfo);
389         }
390
391         /* Print TX Ring Summary */
392         if (!netdev || !netif_running(netdev))
393                 goto exit;
394
395         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
396         pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
397         for (n = 0; n < adapter->num_tx_queues; n++) {
398                 struct igb_tx_buffer *buffer_info;
399                 tx_ring = adapter->tx_ring[n];
400                 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
401                 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
402                         n, tx_ring->next_to_use, tx_ring->next_to_clean,
403                         (u64)buffer_info->dma,
404                         buffer_info->length,
405                         buffer_info->next_to_watch,
406                         (u64)buffer_info->time_stamp);
407         }
408
409         /* Print TX Rings */
410         if (!netif_msg_tx_done(adapter))
411                 goto rx_ring_summary;
412
413         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
414
415         /* Transmit Descriptor Formats
416          *
417          * Advanced Transmit Descriptor
418          *   +--------------------------------------------------------------+
419          * 0 |         Buffer Address [63:0]                                |
420          *   +--------------------------------------------------------------+
421          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
422          *   +--------------------------------------------------------------+
423          *   63      46 45    40 39 38 36 35 32 31   24             15       0
424          */
425
426         for (n = 0; n < adapter->num_tx_queues; n++) {
427                 tx_ring = adapter->tx_ring[n];
428                 pr_info("------------------------------------\n");
429                 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
430                 pr_info("------------------------------------\n");
431                 pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
432                         "[bi->dma       ] leng  ntw timestamp        "
433                         "bi->skb\n");
434
435                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
436                         const char *next_desc;
437                         struct igb_tx_buffer *buffer_info;
438                         tx_desc = IGB_TX_DESC(tx_ring, i);
439                         buffer_info = &tx_ring->tx_buffer_info[i];
440                         u0 = (struct my_u0 *)tx_desc;
441                         if (i == tx_ring->next_to_use &&
442                             i == tx_ring->next_to_clean)
443                                 next_desc = " NTC/U";
444                         else if (i == tx_ring->next_to_use)
445                                 next_desc = " NTU";
446                         else if (i == tx_ring->next_to_clean)
447                                 next_desc = " NTC";
448                         else
449                                 next_desc = "";
450
451                         pr_info("T [0x%03X]    %016llX %016llX %016llX"
452                                 " %04X  %p %016llX %p%s\n", i,
453                                 le64_to_cpu(u0->a),
454                                 le64_to_cpu(u0->b),
455                                 (u64)buffer_info->dma,
456                                 buffer_info->length,
457                                 buffer_info->next_to_watch,
458                                 (u64)buffer_info->time_stamp,
459                                 buffer_info->skb, next_desc);
460
461                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
462                                 print_hex_dump(KERN_INFO, "",
463                                         DUMP_PREFIX_ADDRESS,
464                                         16, 1, phys_to_virt(buffer_info->dma),
465                                         buffer_info->length, true);
466                 }
467         }
468
469         /* Print RX Rings Summary */
470 rx_ring_summary:
471         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
472         pr_info("Queue [NTU] [NTC]\n");
473         for (n = 0; n < adapter->num_rx_queues; n++) {
474                 rx_ring = adapter->rx_ring[n];
475                 pr_info(" %5d %5X %5X\n",
476                         n, rx_ring->next_to_use, rx_ring->next_to_clean);
477         }
478
479         /* Print RX Rings */
480         if (!netif_msg_rx_status(adapter))
481                 goto exit;
482
483         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
484
485         /* Advanced Receive Descriptor (Read) Format
486          *    63                                           1        0
487          *    +-----------------------------------------------------+
488          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
489          *    +----------------------------------------------+------+
490          *  8 |       Header Buffer Address [63:1]           |  DD  |
491          *    +-----------------------------------------------------+
492          *
493          *
494          * Advanced Receive Descriptor (Write-Back) Format
495          *
496          *   63       48 47    32 31  30      21 20 17 16   4 3     0
497          *   +------------------------------------------------------+
498          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
499          *   | Checksum   Ident  |   |           |    | Type | Type |
500          *   +------------------------------------------------------+
501          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
502          *   +------------------------------------------------------+
503          *   63       48 47    32 31            20 19               0
504          */
505
506         for (n = 0; n < adapter->num_rx_queues; n++) {
507                 rx_ring = adapter->rx_ring[n];
508                 pr_info("------------------------------------\n");
509                 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
510                 pr_info("------------------------------------\n");
511                 pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
512                         "[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
513                 pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
514                         "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
515
516                 for (i = 0; i < rx_ring->count; i++) {
517                         const char *next_desc;
518                         struct igb_rx_buffer *buffer_info;
519                         buffer_info = &rx_ring->rx_buffer_info[i];
520                         rx_desc = IGB_RX_DESC(rx_ring, i);
521                         u0 = (struct my_u0 *)rx_desc;
522                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
523
524                         if (i == rx_ring->next_to_use)
525                                 next_desc = " NTU";
526                         else if (i == rx_ring->next_to_clean)
527                                 next_desc = " NTC";
528                         else
529                                 next_desc = "";
530
531                         if (staterr & E1000_RXD_STAT_DD) {
532                                 /* Descriptor Done */
533                                 pr_info("%s[0x%03X]     %016llX %016llX -------"
534                                         "--------- %p%s\n", "RWB", i,
535                                         le64_to_cpu(u0->a),
536                                         le64_to_cpu(u0->b),
537                                         buffer_info->skb, next_desc);
538                         } else {
539                                 pr_info("%s[0x%03X]     %016llX %016llX %016llX"
540                                         " %p%s\n", "R  ", i,
541                                         le64_to_cpu(u0->a),
542                                         le64_to_cpu(u0->b),
543                                         (u64)buffer_info->dma,
544                                         buffer_info->skb, next_desc);
545
546                                 if (netif_msg_pktdata(adapter)) {
547                                         print_hex_dump(KERN_INFO, "",
548                                                 DUMP_PREFIX_ADDRESS,
549                                                 16, 1,
550                                                 phys_to_virt(buffer_info->dma),
551                                                 IGB_RX_HDR_LEN, true);
552                                         print_hex_dump(KERN_INFO, "",
553                                           DUMP_PREFIX_ADDRESS,
554                                           16, 1,
555                                           phys_to_virt(
556                                             buffer_info->page_dma +
557                                             buffer_info->page_offset),
558                                           PAGE_SIZE/2, true);
559                                 }
560                         }
561                 }
562         }
563
564 exit:
565         return;
566 }
567
568
569 /**
570  * igb_read_clock - read raw cycle counter (to be used by time counter)
571  */
572 static cycle_t igb_read_clock(const struct cyclecounter *tc)
573 {
574         struct igb_adapter *adapter =
575                 container_of(tc, struct igb_adapter, cycles);
576         struct e1000_hw *hw = &adapter->hw;
577         u64 stamp = 0;
578         int shift = 0;
579
580         /*
581          * The timestamp latches on lowest register read. For the 82580
582          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
583          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
584          */
585         if (hw->mac.type >= e1000_82580) {
586                 stamp = rd32(E1000_SYSTIMR) >> 8;
587                 shift = IGB_82580_TSYNC_SHIFT;
588         }
589
590         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
591         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
592         return stamp;
593 }
594
595 /**
596  * igb_get_hw_dev - return device
597  * used by hardware layer to print debugging information
598  **/
599 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
600 {
601         struct igb_adapter *adapter = hw->back;
602         return adapter->netdev;
603 }
604
605 /**
606  * igb_init_module - Driver Registration Routine
607  *
608  * igb_init_module is the first routine called when the driver is
609  * loaded. All it does is register with the PCI subsystem.
610  **/
611 static int __init igb_init_module(void)
612 {
613         int ret;
614         pr_info("%s - version %s\n",
615                igb_driver_string, igb_driver_version);
616
617         pr_info("%s\n", igb_copyright);
618
619 #ifdef CONFIG_IGB_DCA
620         dca_register_notify(&dca_notifier);
621 #endif
622         ret = pci_register_driver(&igb_driver);
623         return ret;
624 }
625
626 module_init(igb_init_module);
627
628 /**
629  * igb_exit_module - Driver Exit Cleanup Routine
630  *
631  * igb_exit_module is called just before the driver is removed
632  * from memory.
633  **/
634 static void __exit igb_exit_module(void)
635 {
636 #ifdef CONFIG_IGB_DCA
637         dca_unregister_notify(&dca_notifier);
638 #endif
639         pci_unregister_driver(&igb_driver);
640 }
641
642 module_exit(igb_exit_module);
643
644 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
645 /**
646  * igb_cache_ring_register - Descriptor ring to register mapping
647  * @adapter: board private structure to initialize
648  *
649  * Once we know the feature-set enabled for the device, we'll cache
650  * the register offset the descriptor ring is assigned to.
651  **/
652 static void igb_cache_ring_register(struct igb_adapter *adapter)
653 {
654         int i = 0, j = 0;
655         u32 rbase_offset = adapter->vfs_allocated_count;
656
657         switch (adapter->hw.mac.type) {
658         case e1000_82576:
659                 /* The queues are allocated for virtualization such that VF 0
660                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
661                  * In order to avoid collision we start at the first free queue
662                  * and continue consuming queues in the same sequence
663                  */
664                 if (adapter->vfs_allocated_count) {
665                         for (; i < adapter->rss_queues; i++)
666                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
667                                                                Q_IDX_82576(i);
668                 }
669         case e1000_82575:
670         case e1000_82580:
671         case e1000_i350:
672         default:
673                 for (; i < adapter->num_rx_queues; i++)
674                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
675                 for (; j < adapter->num_tx_queues; j++)
676                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
677                 break;
678         }
679 }
680
681 static void igb_free_queues(struct igb_adapter *adapter)
682 {
683         int i;
684
685         for (i = 0; i < adapter->num_tx_queues; i++) {
686                 kfree(adapter->tx_ring[i]);
687                 adapter->tx_ring[i] = NULL;
688         }
689         for (i = 0; i < adapter->num_rx_queues; i++) {
690                 kfree(adapter->rx_ring[i]);
691                 adapter->rx_ring[i] = NULL;
692         }
693         adapter->num_rx_queues = 0;
694         adapter->num_tx_queues = 0;
695 }
696
697 /**
698  * igb_alloc_queues - Allocate memory for all rings
699  * @adapter: board private structure to initialize
700  *
701  * We allocate one ring per queue at run-time since we don't know the
702  * number of queues at compile-time.
703  **/
704 static int igb_alloc_queues(struct igb_adapter *adapter)
705 {
706         struct igb_ring *ring;
707         int i;
708         int orig_node = adapter->node;
709
710         for (i = 0; i < adapter->num_tx_queues; i++) {
711                 if (orig_node == -1) {
712                         int cur_node = next_online_node(adapter->node);
713                         if (cur_node == MAX_NUMNODES)
714                                 cur_node = first_online_node;
715                         adapter->node = cur_node;
716                 }
717                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
718                                     adapter->node);
719                 if (!ring)
720                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
721                 if (!ring)
722                         goto err;
723                 ring->count = adapter->tx_ring_count;
724                 ring->queue_index = i;
725                 ring->dev = &adapter->pdev->dev;
726                 ring->netdev = adapter->netdev;
727                 ring->numa_node = adapter->node;
728                 /* For 82575, context index must be unique per ring. */
729                 if (adapter->hw.mac.type == e1000_82575)
730                         set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
731                 adapter->tx_ring[i] = ring;
732         }
733         /* Restore the adapter's original node */
734         adapter->node = orig_node;
735
736         for (i = 0; i < adapter->num_rx_queues; i++) {
737                 if (orig_node == -1) {
738                         int cur_node = next_online_node(adapter->node);
739                         if (cur_node == MAX_NUMNODES)
740                                 cur_node = first_online_node;
741                         adapter->node = cur_node;
742                 }
743                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
744                                     adapter->node);
745                 if (!ring)
746                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
747                 if (!ring)
748                         goto err;
749                 ring->count = adapter->rx_ring_count;
750                 ring->queue_index = i;
751                 ring->dev = &adapter->pdev->dev;
752                 ring->netdev = adapter->netdev;
753                 ring->numa_node = adapter->node;
754                 /* set flag indicating ring supports SCTP checksum offload */
755                 if (adapter->hw.mac.type >= e1000_82576)
756                         set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
757
758                 /* On i350, loopback VLAN packets have the tag byte-swapped. */
759                 if (adapter->hw.mac.type == e1000_i350)
760                         set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
761
762                 adapter->rx_ring[i] = ring;
763         }
764         /* Restore the adapter's original node */
765         adapter->node = orig_node;
766
767         igb_cache_ring_register(adapter);
768
769         return 0;
770
771 err:
772         /* Restore the adapter's original node */
773         adapter->node = orig_node;
774         igb_free_queues(adapter);
775
776         return -ENOMEM;
777 }
778
779 /**
780  *  igb_write_ivar - configure ivar for given MSI-X vector
781  *  @hw: pointer to the HW structure
782  *  @msix_vector: vector number we are allocating to a given ring
783  *  @index: row index of IVAR register to write within IVAR table
784  *  @offset: column offset of in IVAR, should be multiple of 8
785  *
786  *  This function is intended to handle the writing of the IVAR register
787  *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
788  *  each containing an cause allocation for an Rx and Tx ring, and a
789  *  variable number of rows depending on the number of queues supported.
790  **/
791 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
792                            int index, int offset)
793 {
794         u32 ivar = array_rd32(E1000_IVAR0, index);
795
796         /* clear any bits that are currently set */
797         ivar &= ~((u32)0xFF << offset);
798
799         /* write vector and valid bit */
800         ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
801
802         array_wr32(E1000_IVAR0, index, ivar);
803 }
804
805 #define IGB_N0_QUEUE -1
806 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
807 {
808         struct igb_adapter *adapter = q_vector->adapter;
809         struct e1000_hw *hw = &adapter->hw;
810         int rx_queue = IGB_N0_QUEUE;
811         int tx_queue = IGB_N0_QUEUE;
812         u32 msixbm = 0;
813
814         if (q_vector->rx.ring)
815                 rx_queue = q_vector->rx.ring->reg_idx;
816         if (q_vector->tx.ring)
817                 tx_queue = q_vector->tx.ring->reg_idx;
818
819         switch (hw->mac.type) {
820         case e1000_82575:
821                 /* The 82575 assigns vectors using a bitmask, which matches the
822                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
823                    or more queues to a vector, we write the appropriate bits
824                    into the MSIXBM register for that vector. */
825                 if (rx_queue > IGB_N0_QUEUE)
826                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
827                 if (tx_queue > IGB_N0_QUEUE)
828                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
829                 if (!adapter->msix_entries && msix_vector == 0)
830                         msixbm |= E1000_EIMS_OTHER;
831                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
832                 q_vector->eims_value = msixbm;
833                 break;
834         case e1000_82576:
835                 /*
836                  * 82576 uses a table that essentially consists of 2 columns
837                  * with 8 rows.  The ordering is column-major so we use the
838                  * lower 3 bits as the row index, and the 4th bit as the
839                  * column offset.
840                  */
841                 if (rx_queue > IGB_N0_QUEUE)
842                         igb_write_ivar(hw, msix_vector,
843                                        rx_queue & 0x7,
844                                        (rx_queue & 0x8) << 1);
845                 if (tx_queue > IGB_N0_QUEUE)
846                         igb_write_ivar(hw, msix_vector,
847                                        tx_queue & 0x7,
848                                        ((tx_queue & 0x8) << 1) + 8);
849                 q_vector->eims_value = 1 << msix_vector;
850                 break;
851         case e1000_82580:
852         case e1000_i350:
853                 /*
854                  * On 82580 and newer adapters the scheme is similar to 82576
855                  * however instead of ordering column-major we have things
856                  * ordered row-major.  So we traverse the table by using
857                  * bit 0 as the column offset, and the remaining bits as the
858                  * row index.
859                  */
860                 if (rx_queue > IGB_N0_QUEUE)
861                         igb_write_ivar(hw, msix_vector,
862                                        rx_queue >> 1,
863                                        (rx_queue & 0x1) << 4);
864                 if (tx_queue > IGB_N0_QUEUE)
865                         igb_write_ivar(hw, msix_vector,
866                                        tx_queue >> 1,
867                                        ((tx_queue & 0x1) << 4) + 8);
868                 q_vector->eims_value = 1 << msix_vector;
869                 break;
870         default:
871                 BUG();
872                 break;
873         }
874
875         /* add q_vector eims value to global eims_enable_mask */
876         adapter->eims_enable_mask |= q_vector->eims_value;
877
878         /* configure q_vector to set itr on first interrupt */
879         q_vector->set_itr = 1;
880 }
881
882 /**
883  * igb_configure_msix - Configure MSI-X hardware
884  *
885  * igb_configure_msix sets up the hardware to properly
886  * generate MSI-X interrupts.
887  **/
888 static void igb_configure_msix(struct igb_adapter *adapter)
889 {
890         u32 tmp;
891         int i, vector = 0;
892         struct e1000_hw *hw = &adapter->hw;
893
894         adapter->eims_enable_mask = 0;
895
896         /* set vector for other causes, i.e. link changes */
897         switch (hw->mac.type) {
898         case e1000_82575:
899                 tmp = rd32(E1000_CTRL_EXT);
900                 /* enable MSI-X PBA support*/
901                 tmp |= E1000_CTRL_EXT_PBA_CLR;
902
903                 /* Auto-Mask interrupts upon ICR read. */
904                 tmp |= E1000_CTRL_EXT_EIAME;
905                 tmp |= E1000_CTRL_EXT_IRCA;
906
907                 wr32(E1000_CTRL_EXT, tmp);
908
909                 /* enable msix_other interrupt */
910                 array_wr32(E1000_MSIXBM(0), vector++,
911                                       E1000_EIMS_OTHER);
912                 adapter->eims_other = E1000_EIMS_OTHER;
913
914                 break;
915
916         case e1000_82576:
917         case e1000_82580:
918         case e1000_i350:
919                 /* Turn on MSI-X capability first, or our settings
920                  * won't stick.  And it will take days to debug. */
921                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
922                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
923                                 E1000_GPIE_NSICR);
924
925                 /* enable msix_other interrupt */
926                 adapter->eims_other = 1 << vector;
927                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
928
929                 wr32(E1000_IVAR_MISC, tmp);
930                 break;
931         default:
932                 /* do nothing, since nothing else supports MSI-X */
933                 break;
934         } /* switch (hw->mac.type) */
935
936         adapter->eims_enable_mask |= adapter->eims_other;
937
938         for (i = 0; i < adapter->num_q_vectors; i++)
939                 igb_assign_vector(adapter->q_vector[i], vector++);
940
941         wrfl();
942 }
943
944 /**
945  * igb_request_msix - Initialize MSI-X interrupts
946  *
947  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
948  * kernel.
949  **/
950 static int igb_request_msix(struct igb_adapter *adapter)
951 {
952         struct net_device *netdev = adapter->netdev;
953         struct e1000_hw *hw = &adapter->hw;
954         int i, err = 0, vector = 0;
955
956         err = request_irq(adapter->msix_entries[vector].vector,
957                           igb_msix_other, 0, netdev->name, adapter);
958         if (err)
959                 goto out;
960         vector++;
961
962         for (i = 0; i < adapter->num_q_vectors; i++) {
963                 struct igb_q_vector *q_vector = adapter->q_vector[i];
964
965                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
966
967                 if (q_vector->rx.ring && q_vector->tx.ring)
968                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
969                                 q_vector->rx.ring->queue_index);
970                 else if (q_vector->tx.ring)
971                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
972                                 q_vector->tx.ring->queue_index);
973                 else if (q_vector->rx.ring)
974                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
975                                 q_vector->rx.ring->queue_index);
976                 else
977                         sprintf(q_vector->name, "%s-unused", netdev->name);
978
979                 err = request_irq(adapter->msix_entries[vector].vector,
980                                   igb_msix_ring, 0, q_vector->name,
981                                   q_vector);
982                 if (err)
983                         goto out;
984                 vector++;
985         }
986
987         igb_configure_msix(adapter);
988         return 0;
989 out:
990         return err;
991 }
992
993 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
994 {
995         if (adapter->msix_entries) {
996                 pci_disable_msix(adapter->pdev);
997                 kfree(adapter->msix_entries);
998                 adapter->msix_entries = NULL;
999         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
1000                 pci_disable_msi(adapter->pdev);
1001         }
1002 }
1003
1004 /**
1005  * igb_free_q_vectors - Free memory allocated for interrupt vectors
1006  * @adapter: board private structure to initialize
1007  *
1008  * This function frees the memory allocated to the q_vectors.  In addition if
1009  * NAPI is enabled it will delete any references to the NAPI struct prior
1010  * to freeing the q_vector.
1011  **/
1012 static void igb_free_q_vectors(struct igb_adapter *adapter)
1013 {
1014         int v_idx;
1015
1016         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1017                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1018                 adapter->q_vector[v_idx] = NULL;
1019                 if (!q_vector)
1020                         continue;
1021                 netif_napi_del(&q_vector->napi);
1022                 kfree(q_vector);
1023         }
1024         adapter->num_q_vectors = 0;
1025 }
1026
1027 /**
1028  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1029  *
1030  * This function resets the device so that it has 0 rx queues, tx queues, and
1031  * MSI-X interrupts allocated.
1032  */
1033 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1034 {
1035         igb_free_queues(adapter);
1036         igb_free_q_vectors(adapter);
1037         igb_reset_interrupt_capability(adapter);
1038 }
1039
1040 /**
1041  * igb_set_interrupt_capability - set MSI or MSI-X if supported
1042  *
1043  * Attempt to configure interrupts using the best available
1044  * capabilities of the hardware and kernel.
1045  **/
1046 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1047 {
1048         int err;
1049         int numvecs, i;
1050
1051         /* Number of supported queues. */
1052         adapter->num_rx_queues = adapter->rss_queues;
1053         if (adapter->vfs_allocated_count)
1054                 adapter->num_tx_queues = 1;
1055         else
1056                 adapter->num_tx_queues = adapter->rss_queues;
1057
1058         /* start with one vector for every rx queue */
1059         numvecs = adapter->num_rx_queues;
1060
1061         /* if tx handler is separate add 1 for every tx queue */
1062         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1063                 numvecs += adapter->num_tx_queues;
1064
1065         /* store the number of vectors reserved for queues */
1066         adapter->num_q_vectors = numvecs;
1067
1068         /* add 1 vector for link status interrupts */
1069         numvecs++;
1070         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1071                                         GFP_KERNEL);
1072         if (!adapter->msix_entries)
1073                 goto msi_only;
1074
1075         for (i = 0; i < numvecs; i++)
1076                 adapter->msix_entries[i].entry = i;
1077
1078         err = pci_enable_msix(adapter->pdev,
1079                               adapter->msix_entries,
1080                               numvecs);
1081         if (err == 0)
1082                 goto out;
1083
1084         igb_reset_interrupt_capability(adapter);
1085
1086         /* If we can't do MSI-X, try MSI */
1087 msi_only:
1088 #ifdef CONFIG_PCI_IOV
1089         /* disable SR-IOV for non MSI-X configurations */
1090         if (adapter->vf_data) {
1091                 struct e1000_hw *hw = &adapter->hw;
1092                 /* disable iov and allow time for transactions to clear */
1093                 pci_disable_sriov(adapter->pdev);
1094                 msleep(500);
1095
1096                 kfree(adapter->vf_data);
1097                 adapter->vf_data = NULL;
1098                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1099                 wrfl();
1100                 msleep(100);
1101                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1102         }
1103 #endif
1104         adapter->vfs_allocated_count = 0;
1105         adapter->rss_queues = 1;
1106         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1107         adapter->num_rx_queues = 1;
1108         adapter->num_tx_queues = 1;
1109         adapter->num_q_vectors = 1;
1110         if (!pci_enable_msi(adapter->pdev))
1111                 adapter->flags |= IGB_FLAG_HAS_MSI;
1112 out:
1113         /* Notify the stack of the (possibly) reduced queue counts. */
1114         rtnl_lock();
1115         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1116         err = netif_set_real_num_rx_queues(adapter->netdev,
1117                 adapter->num_rx_queues);
1118         rtnl_unlock();
1119         return err;
1120 }
1121
1122 /**
1123  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1124  * @adapter: board private structure to initialize
1125  *
1126  * We allocate one q_vector per queue interrupt.  If allocation fails we
1127  * return -ENOMEM.
1128  **/
1129 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1130 {
1131         struct igb_q_vector *q_vector;
1132         struct e1000_hw *hw = &adapter->hw;
1133         int v_idx;
1134         int orig_node = adapter->node;
1135
1136         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1137                 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1138                                                 adapter->num_tx_queues)) &&
1139                     (adapter->num_rx_queues == v_idx))
1140                         adapter->node = orig_node;
1141                 if (orig_node == -1) {
1142                         int cur_node = next_online_node(adapter->node);
1143                         if (cur_node == MAX_NUMNODES)
1144                                 cur_node = first_online_node;
1145                         adapter->node = cur_node;
1146                 }
1147                 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1148                                         adapter->node);
1149                 if (!q_vector)
1150                         q_vector = kzalloc(sizeof(struct igb_q_vector),
1151                                            GFP_KERNEL);
1152                 if (!q_vector)
1153                         goto err_out;
1154                 q_vector->adapter = adapter;
1155                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1156                 q_vector->itr_val = IGB_START_ITR;
1157                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1158                 adapter->q_vector[v_idx] = q_vector;
1159         }
1160         /* Restore the adapter's original node */
1161         adapter->node = orig_node;
1162
1163         return 0;
1164
1165 err_out:
1166         /* Restore the adapter's original node */
1167         adapter->node = orig_node;
1168         igb_free_q_vectors(adapter);
1169         return -ENOMEM;
1170 }
1171
1172 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1173                                       int ring_idx, int v_idx)
1174 {
1175         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1176
1177         q_vector->rx.ring = adapter->rx_ring[ring_idx];
1178         q_vector->rx.ring->q_vector = q_vector;
1179         q_vector->rx.count++;
1180         q_vector->itr_val = adapter->rx_itr_setting;
1181         if (q_vector->itr_val && q_vector->itr_val <= 3)
1182                 q_vector->itr_val = IGB_START_ITR;
1183 }
1184
1185 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1186                                       int ring_idx, int v_idx)
1187 {
1188         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1189
1190         q_vector->tx.ring = adapter->tx_ring[ring_idx];
1191         q_vector->tx.ring->q_vector = q_vector;
1192         q_vector->tx.count++;
1193         q_vector->itr_val = adapter->tx_itr_setting;
1194         q_vector->tx.work_limit = adapter->tx_work_limit;
1195         if (q_vector->itr_val && q_vector->itr_val <= 3)
1196                 q_vector->itr_val = IGB_START_ITR;
1197 }
1198
1199 /**
1200  * igb_map_ring_to_vector - maps allocated queues to vectors
1201  *
1202  * This function maps the recently allocated queues to vectors.
1203  **/
1204 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1205 {
1206         int i;
1207         int v_idx = 0;
1208
1209         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1210             (adapter->num_q_vectors < adapter->num_tx_queues))
1211                 return -ENOMEM;
1212
1213         if (adapter->num_q_vectors >=
1214             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1215                 for (i = 0; i < adapter->num_rx_queues; i++)
1216                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1217                 for (i = 0; i < adapter->num_tx_queues; i++)
1218                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1219         } else {
1220                 for (i = 0; i < adapter->num_rx_queues; i++) {
1221                         if (i < adapter->num_tx_queues)
1222                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1223                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1224                 }
1225                 for (; i < adapter->num_tx_queues; i++)
1226                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1227         }
1228         return 0;
1229 }
1230
1231 /**
1232  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1233  *
1234  * This function initializes the interrupts and allocates all of the queues.
1235  **/
1236 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1237 {
1238         struct pci_dev *pdev = adapter->pdev;
1239         int err;
1240
1241         err = igb_set_interrupt_capability(adapter);
1242         if (err)
1243                 return err;
1244
1245         err = igb_alloc_q_vectors(adapter);
1246         if (err) {
1247                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1248                 goto err_alloc_q_vectors;
1249         }
1250
1251         err = igb_alloc_queues(adapter);
1252         if (err) {
1253                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1254                 goto err_alloc_queues;
1255         }
1256
1257         err = igb_map_ring_to_vector(adapter);
1258         if (err) {
1259                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1260                 goto err_map_queues;
1261         }
1262
1263
1264         return 0;
1265 err_map_queues:
1266         igb_free_queues(adapter);
1267 err_alloc_queues:
1268         igb_free_q_vectors(adapter);
1269 err_alloc_q_vectors:
1270         igb_reset_interrupt_capability(adapter);
1271         return err;
1272 }
1273
1274 /**
1275  * igb_request_irq - initialize interrupts
1276  *
1277  * Attempts to configure interrupts using the best available
1278  * capabilities of the hardware and kernel.
1279  **/
1280 static int igb_request_irq(struct igb_adapter *adapter)
1281 {
1282         struct net_device *netdev = adapter->netdev;
1283         struct pci_dev *pdev = adapter->pdev;
1284         int err = 0;
1285
1286         if (adapter->msix_entries) {
1287                 err = igb_request_msix(adapter);
1288                 if (!err)
1289                         goto request_done;
1290                 /* fall back to MSI */
1291                 igb_clear_interrupt_scheme(adapter);
1292                 if (!pci_enable_msi(pdev))
1293                         adapter->flags |= IGB_FLAG_HAS_MSI;
1294                 igb_free_all_tx_resources(adapter);
1295                 igb_free_all_rx_resources(adapter);
1296                 adapter->num_tx_queues = 1;
1297                 adapter->num_rx_queues = 1;
1298                 adapter->num_q_vectors = 1;
1299                 err = igb_alloc_q_vectors(adapter);
1300                 if (err) {
1301                         dev_err(&pdev->dev,
1302                                 "Unable to allocate memory for vectors\n");
1303                         goto request_done;
1304                 }
1305                 err = igb_alloc_queues(adapter);
1306                 if (err) {
1307                         dev_err(&pdev->dev,
1308                                 "Unable to allocate memory for queues\n");
1309                         igb_free_q_vectors(adapter);
1310                         goto request_done;
1311                 }
1312                 igb_setup_all_tx_resources(adapter);
1313                 igb_setup_all_rx_resources(adapter);
1314         }
1315
1316         igb_assign_vector(adapter->q_vector[0], 0);
1317
1318         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1319                 err = request_irq(pdev->irq, igb_intr_msi, 0,
1320                                   netdev->name, adapter);
1321                 if (!err)
1322                         goto request_done;
1323
1324                 /* fall back to legacy interrupts */
1325                 igb_reset_interrupt_capability(adapter);
1326                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1327         }
1328
1329         err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1330                           netdev->name, adapter);
1331
1332         if (err)
1333                 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1334                         err);
1335
1336 request_done:
1337         return err;
1338 }
1339
1340 static void igb_free_irq(struct igb_adapter *adapter)
1341 {
1342         if (adapter->msix_entries) {
1343                 int vector = 0, i;
1344
1345                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1346
1347                 for (i = 0; i < adapter->num_q_vectors; i++)
1348                         free_irq(adapter->msix_entries[vector++].vector,
1349                                  adapter->q_vector[i]);
1350         } else {
1351                 free_irq(adapter->pdev->irq, adapter);
1352         }
1353 }
1354
1355 /**
1356  * igb_irq_disable - Mask off interrupt generation on the NIC
1357  * @adapter: board private structure
1358  **/
1359 static void igb_irq_disable(struct igb_adapter *adapter)
1360 {
1361         struct e1000_hw *hw = &adapter->hw;
1362
1363         /*
1364          * we need to be careful when disabling interrupts.  The VFs are also
1365          * mapped into these registers and so clearing the bits can cause
1366          * issues on the VF drivers so we only need to clear what we set
1367          */
1368         if (adapter->msix_entries) {
1369                 u32 regval = rd32(E1000_EIAM);
1370                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1371                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1372                 regval = rd32(E1000_EIAC);
1373                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1374         }
1375
1376         wr32(E1000_IAM, 0);
1377         wr32(E1000_IMC, ~0);
1378         wrfl();
1379         if (adapter->msix_entries) {
1380                 int i;
1381                 for (i = 0; i < adapter->num_q_vectors; i++)
1382                         synchronize_irq(adapter->msix_entries[i].vector);
1383         } else {
1384                 synchronize_irq(adapter->pdev->irq);
1385         }
1386 }
1387
1388 /**
1389  * igb_irq_enable - Enable default interrupt generation settings
1390  * @adapter: board private structure
1391  **/
1392 static void igb_irq_enable(struct igb_adapter *adapter)
1393 {
1394         struct e1000_hw *hw = &adapter->hw;
1395
1396         if (adapter->msix_entries) {
1397                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1398                 u32 regval = rd32(E1000_EIAC);
1399                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1400                 regval = rd32(E1000_EIAM);
1401                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1402                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1403                 if (adapter->vfs_allocated_count) {
1404                         wr32(E1000_MBVFIMR, 0xFF);
1405                         ims |= E1000_IMS_VMMB;
1406                 }
1407                 wr32(E1000_IMS, ims);
1408         } else {
1409                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1410                                 E1000_IMS_DRSTA);
1411                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1412                                 E1000_IMS_DRSTA);
1413         }
1414 }
1415
1416 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1417 {
1418         struct e1000_hw *hw = &adapter->hw;
1419         u16 vid = adapter->hw.mng_cookie.vlan_id;
1420         u16 old_vid = adapter->mng_vlan_id;
1421
1422         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1423                 /* add VID to filter table */
1424                 igb_vfta_set(hw, vid, true);
1425                 adapter->mng_vlan_id = vid;
1426         } else {
1427                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1428         }
1429
1430         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1431             (vid != old_vid) &&
1432             !test_bit(old_vid, adapter->active_vlans)) {
1433                 /* remove VID from filter table */
1434                 igb_vfta_set(hw, old_vid, false);
1435         }
1436 }
1437
1438 /**
1439  * igb_release_hw_control - release control of the h/w to f/w
1440  * @adapter: address of board private structure
1441  *
1442  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1443  * For ASF and Pass Through versions of f/w this means that the
1444  * driver is no longer loaded.
1445  *
1446  **/
1447 static void igb_release_hw_control(struct igb_adapter *adapter)
1448 {
1449         struct e1000_hw *hw = &adapter->hw;
1450         u32 ctrl_ext;
1451
1452         /* Let firmware take over control of h/w */
1453         ctrl_ext = rd32(E1000_CTRL_EXT);
1454         wr32(E1000_CTRL_EXT,
1455                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1456 }
1457
1458 /**
1459  * igb_get_hw_control - get control of the h/w from f/w
1460  * @adapter: address of board private structure
1461  *
1462  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1463  * For ASF and Pass Through versions of f/w this means that
1464  * the driver is loaded.
1465  *
1466  **/
1467 static void igb_get_hw_control(struct igb_adapter *adapter)
1468 {
1469         struct e1000_hw *hw = &adapter->hw;
1470         u32 ctrl_ext;
1471
1472         /* Let firmware know the driver has taken over */
1473         ctrl_ext = rd32(E1000_CTRL_EXT);
1474         wr32(E1000_CTRL_EXT,
1475                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1476 }
1477
1478 /**
1479  * igb_configure - configure the hardware for RX and TX
1480  * @adapter: private board structure
1481  **/
1482 static void igb_configure(struct igb_adapter *adapter)
1483 {
1484         struct net_device *netdev = adapter->netdev;
1485         int i;
1486
1487         igb_get_hw_control(adapter);
1488         igb_set_rx_mode(netdev);
1489
1490         igb_restore_vlan(adapter);
1491
1492         igb_setup_tctl(adapter);
1493         igb_setup_mrqc(adapter);
1494         igb_setup_rctl(adapter);
1495
1496         igb_configure_tx(adapter);
1497         igb_configure_rx(adapter);
1498
1499         igb_rx_fifo_flush_82575(&adapter->hw);
1500
1501         /* call igb_desc_unused which always leaves
1502          * at least 1 descriptor unused to make sure
1503          * next_to_use != next_to_clean */
1504         for (i = 0; i < adapter->num_rx_queues; i++) {
1505                 struct igb_ring *ring = adapter->rx_ring[i];
1506                 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1507         }
1508 }
1509
1510 /**
1511  * igb_power_up_link - Power up the phy/serdes link
1512  * @adapter: address of board private structure
1513  **/
1514 void igb_power_up_link(struct igb_adapter *adapter)
1515 {
1516         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1517                 igb_power_up_phy_copper(&adapter->hw);
1518         else
1519                 igb_power_up_serdes_link_82575(&adapter->hw);
1520         igb_reset_phy(&adapter->hw);
1521 }
1522
1523 /**
1524  * igb_power_down_link - Power down the phy/serdes link
1525  * @adapter: address of board private structure
1526  */
1527 static void igb_power_down_link(struct igb_adapter *adapter)
1528 {
1529         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1530                 igb_power_down_phy_copper_82575(&adapter->hw);
1531         else
1532                 igb_shutdown_serdes_link_82575(&adapter->hw);
1533 }
1534
1535 /**
1536  * igb_up - Open the interface and prepare it to handle traffic
1537  * @adapter: board private structure
1538  **/
1539 int igb_up(struct igb_adapter *adapter)
1540 {
1541         struct e1000_hw *hw = &adapter->hw;
1542         int i;
1543
1544         /* hardware has been reset, we need to reload some things */
1545         igb_configure(adapter);
1546
1547         clear_bit(__IGB_DOWN, &adapter->state);
1548
1549         for (i = 0; i < adapter->num_q_vectors; i++)
1550                 napi_enable(&(adapter->q_vector[i]->napi));
1551
1552         if (adapter->msix_entries)
1553                 igb_configure_msix(adapter);
1554         else
1555                 igb_assign_vector(adapter->q_vector[0], 0);
1556
1557         /* Clear any pending interrupts. */
1558         rd32(E1000_ICR);
1559         igb_irq_enable(adapter);
1560
1561         /* notify VFs that reset has been completed */
1562         if (adapter->vfs_allocated_count) {
1563                 u32 reg_data = rd32(E1000_CTRL_EXT);
1564                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1565                 wr32(E1000_CTRL_EXT, reg_data);
1566         }
1567
1568         netif_tx_start_all_queues(adapter->netdev);
1569
1570         /* start the watchdog. */
1571         hw->mac.get_link_status = 1;
1572         schedule_work(&adapter->watchdog_task);
1573
1574         return 0;
1575 }
1576
1577 void igb_down(struct igb_adapter *adapter)
1578 {
1579         struct net_device *netdev = adapter->netdev;
1580         struct e1000_hw *hw = &adapter->hw;
1581         u32 tctl, rctl;
1582         int i;
1583
1584         /* signal that we're down so the interrupt handler does not
1585          * reschedule our watchdog timer */
1586         set_bit(__IGB_DOWN, &adapter->state);
1587
1588         /* disable receives in the hardware */
1589         rctl = rd32(E1000_RCTL);
1590         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1591         /* flush and sleep below */
1592
1593         netif_tx_stop_all_queues(netdev);
1594
1595         /* disable transmits in the hardware */
1596         tctl = rd32(E1000_TCTL);
1597         tctl &= ~E1000_TCTL_EN;
1598         wr32(E1000_TCTL, tctl);
1599         /* flush both disables and wait for them to finish */
1600         wrfl();
1601         msleep(10);
1602
1603         for (i = 0; i < adapter->num_q_vectors; i++)
1604                 napi_disable(&(adapter->q_vector[i]->napi));
1605
1606         igb_irq_disable(adapter);
1607
1608         del_timer_sync(&adapter->watchdog_timer);
1609         del_timer_sync(&adapter->phy_info_timer);
1610
1611         netif_carrier_off(netdev);
1612
1613         /* record the stats before reset*/
1614         spin_lock(&adapter->stats64_lock);
1615         igb_update_stats(adapter, &adapter->stats64);
1616         spin_unlock(&adapter->stats64_lock);
1617
1618         adapter->link_speed = 0;
1619         adapter->link_duplex = 0;
1620
1621         if (!pci_channel_offline(adapter->pdev))
1622                 igb_reset(adapter);
1623         igb_clean_all_tx_rings(adapter);
1624         igb_clean_all_rx_rings(adapter);
1625 #ifdef CONFIG_IGB_DCA
1626
1627         /* since we reset the hardware DCA settings were cleared */
1628         igb_setup_dca(adapter);
1629 #endif
1630 }
1631
1632 void igb_reinit_locked(struct igb_adapter *adapter)
1633 {
1634         WARN_ON(in_interrupt());
1635         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1636                 msleep(1);
1637         igb_down(adapter);
1638         igb_up(adapter);
1639         clear_bit(__IGB_RESETTING, &adapter->state);
1640 }
1641
1642 void igb_reset(struct igb_adapter *adapter)
1643 {
1644         struct pci_dev *pdev = adapter->pdev;
1645         struct e1000_hw *hw = &adapter->hw;
1646         struct e1000_mac_info *mac = &hw->mac;
1647         struct e1000_fc_info *fc = &hw->fc;
1648         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1649         u16 hwm;
1650
1651         /* Repartition Pba for greater than 9k mtu
1652          * To take effect CTRL.RST is required.
1653          */
1654         switch (mac->type) {
1655         case e1000_i350:
1656         case e1000_82580:
1657                 pba = rd32(E1000_RXPBS);
1658                 pba = igb_rxpbs_adjust_82580(pba);
1659                 break;
1660         case e1000_82576:
1661                 pba = rd32(E1000_RXPBS);
1662                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1663                 break;
1664         case e1000_82575:
1665         default:
1666                 pba = E1000_PBA_34K;
1667                 break;
1668         }
1669
1670         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1671             (mac->type < e1000_82576)) {
1672                 /* adjust PBA for jumbo frames */
1673                 wr32(E1000_PBA, pba);
1674
1675                 /* To maintain wire speed transmits, the Tx FIFO should be
1676                  * large enough to accommodate two full transmit packets,
1677                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1678                  * the Rx FIFO should be large enough to accommodate at least
1679                  * one full receive packet and is similarly rounded up and
1680                  * expressed in KB. */
1681                 pba = rd32(E1000_PBA);
1682                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1683                 tx_space = pba >> 16;
1684                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1685                 pba &= 0xffff;
1686                 /* the tx fifo also stores 16 bytes of information about the tx
1687                  * but don't include ethernet FCS because hardware appends it */
1688                 min_tx_space = (adapter->max_frame_size +
1689                                 sizeof(union e1000_adv_tx_desc) -
1690                                 ETH_FCS_LEN) * 2;
1691                 min_tx_space = ALIGN(min_tx_space, 1024);
1692                 min_tx_space >>= 10;
1693                 /* software strips receive CRC, so leave room for it */
1694                 min_rx_space = adapter->max_frame_size;
1695                 min_rx_space = ALIGN(min_rx_space, 1024);
1696                 min_rx_space >>= 10;
1697
1698                 /* If current Tx allocation is less than the min Tx FIFO size,
1699                  * and the min Tx FIFO size is less than the current Rx FIFO
1700                  * allocation, take space away from current Rx allocation */
1701                 if (tx_space < min_tx_space &&
1702                     ((min_tx_space - tx_space) < pba)) {
1703                         pba = pba - (min_tx_space - tx_space);
1704
1705                         /* if short on rx space, rx wins and must trump tx
1706                          * adjustment */
1707                         if (pba < min_rx_space)
1708                                 pba = min_rx_space;
1709                 }
1710                 wr32(E1000_PBA, pba);
1711         }
1712
1713         /* flow control settings */
1714         /* The high water mark must be low enough to fit one full frame
1715          * (or the size used for early receive) above it in the Rx FIFO.
1716          * Set it to the lower of:
1717          * - 90% of the Rx FIFO size, or
1718          * - the full Rx FIFO size minus one full frame */
1719         hwm = min(((pba << 10) * 9 / 10),
1720                         ((pba << 10) - 2 * adapter->max_frame_size));
1721
1722         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1723         fc->low_water = fc->high_water - 16;
1724         fc->pause_time = 0xFFFF;
1725         fc->send_xon = 1;
1726         fc->current_mode = fc->requested_mode;
1727
1728         /* disable receive for all VFs and wait one second */
1729         if (adapter->vfs_allocated_count) {
1730                 int i;
1731                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1732                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1733
1734                 /* ping all the active vfs to let them know we are going down */
1735                 igb_ping_all_vfs(adapter);
1736
1737                 /* disable transmits and receives */
1738                 wr32(E1000_VFRE, 0);
1739                 wr32(E1000_VFTE, 0);
1740         }
1741
1742         /* Allow time for pending master requests to run */
1743         hw->mac.ops.reset_hw(hw);
1744         wr32(E1000_WUC, 0);
1745
1746         if (hw->mac.ops.init_hw(hw))
1747                 dev_err(&pdev->dev, "Hardware Error\n");
1748
1749         igb_init_dmac(adapter, pba);
1750         if (!netif_running(adapter->netdev))
1751                 igb_power_down_link(adapter);
1752
1753         igb_update_mng_vlan(adapter);
1754
1755         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1756         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1757
1758         igb_get_phy_info(hw);
1759 }
1760
1761 static netdev_features_t igb_fix_features(struct net_device *netdev,
1762         netdev_features_t features)
1763 {
1764         /*
1765          * Since there is no support for separate rx/tx vlan accel
1766          * enable/disable make sure tx flag is always in same state as rx.
1767          */
1768         if (features & NETIF_F_HW_VLAN_RX)
1769                 features |= NETIF_F_HW_VLAN_TX;
1770         else
1771                 features &= ~NETIF_F_HW_VLAN_TX;
1772
1773         return features;
1774 }
1775
1776 static int igb_set_features(struct net_device *netdev,
1777         netdev_features_t features)
1778 {
1779         netdev_features_t changed = netdev->features ^ features;
1780         struct igb_adapter *adapter = netdev_priv(netdev);
1781
1782         if (changed & NETIF_F_HW_VLAN_RX)
1783                 igb_vlan_mode(netdev, features);
1784
1785         if (!(changed & NETIF_F_RXALL))
1786                 return 0;
1787
1788         netdev->features = features;
1789
1790         if (netif_running(netdev))
1791                 igb_reinit_locked(adapter);
1792         else
1793                 igb_reset(adapter);
1794
1795         return 0;
1796 }
1797
1798 static const struct net_device_ops igb_netdev_ops = {
1799         .ndo_open               = igb_open,
1800         .ndo_stop               = igb_close,
1801         .ndo_start_xmit         = igb_xmit_frame,
1802         .ndo_get_stats64        = igb_get_stats64,
1803         .ndo_set_rx_mode        = igb_set_rx_mode,
1804         .ndo_set_mac_address    = igb_set_mac,
1805         .ndo_change_mtu         = igb_change_mtu,
1806         .ndo_do_ioctl           = igb_ioctl,
1807         .ndo_tx_timeout         = igb_tx_timeout,
1808         .ndo_validate_addr      = eth_validate_addr,
1809         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1810         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1811         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1812         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1813         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1814         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1815 #ifdef CONFIG_NET_POLL_CONTROLLER
1816         .ndo_poll_controller    = igb_netpoll,
1817 #endif
1818         .ndo_fix_features       = igb_fix_features,
1819         .ndo_set_features       = igb_set_features,
1820 };
1821
1822 /**
1823  * igb_probe - Device Initialization Routine
1824  * @pdev: PCI device information struct
1825  * @ent: entry in igb_pci_tbl
1826  *
1827  * Returns 0 on success, negative on failure
1828  *
1829  * igb_probe initializes an adapter identified by a pci_dev structure.
1830  * The OS initialization, configuring of the adapter private structure,
1831  * and a hardware reset occur.
1832  **/
1833 static int __devinit igb_probe(struct pci_dev *pdev,
1834                                const struct pci_device_id *ent)
1835 {
1836         struct net_device *netdev;
1837         struct igb_adapter *adapter;
1838         struct e1000_hw *hw;
1839         u16 eeprom_data = 0;
1840         s32 ret_val;
1841         static int global_quad_port_a; /* global quad port a indication */
1842         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1843         unsigned long mmio_start, mmio_len;
1844         int err, pci_using_dac;
1845         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1846         u8 part_str[E1000_PBANUM_LENGTH];
1847
1848         /* Catch broken hardware that put the wrong VF device ID in
1849          * the PCIe SR-IOV capability.
1850          */
1851         if (pdev->is_virtfn) {
1852                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1853                      pci_name(pdev), pdev->vendor, pdev->device);
1854                 return -EINVAL;
1855         }
1856
1857         err = pci_enable_device_mem(pdev);
1858         if (err)
1859                 return err;
1860
1861         pci_using_dac = 0;
1862         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1863         if (!err) {
1864                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1865                 if (!err)
1866                         pci_using_dac = 1;
1867         } else {
1868                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1869                 if (err) {
1870                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1871                         if (err) {
1872                                 dev_err(&pdev->dev, "No usable DMA "
1873                                         "configuration, aborting\n");
1874                                 goto err_dma;
1875                         }
1876                 }
1877         }
1878
1879         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1880                                            IORESOURCE_MEM),
1881                                            igb_driver_name);
1882         if (err)
1883                 goto err_pci_reg;
1884
1885         pci_enable_pcie_error_reporting(pdev);
1886
1887         pci_set_master(pdev);
1888         pci_save_state(pdev);
1889
1890         err = -ENOMEM;
1891         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1892                                    IGB_MAX_TX_QUEUES);
1893         if (!netdev)
1894                 goto err_alloc_etherdev;
1895
1896         SET_NETDEV_DEV(netdev, &pdev->dev);
1897
1898         pci_set_drvdata(pdev, netdev);
1899         adapter = netdev_priv(netdev);
1900         adapter->netdev = netdev;
1901         adapter->pdev = pdev;
1902         hw = &adapter->hw;
1903         hw->back = adapter;
1904         adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1905
1906         mmio_start = pci_resource_start(pdev, 0);
1907         mmio_len = pci_resource_len(pdev, 0);
1908
1909         err = -EIO;
1910         hw->hw_addr = ioremap(mmio_start, mmio_len);
1911         if (!hw->hw_addr)
1912                 goto err_ioremap;
1913
1914         netdev->netdev_ops = &igb_netdev_ops;
1915         igb_set_ethtool_ops(netdev);
1916         netdev->watchdog_timeo = 5 * HZ;
1917
1918         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1919
1920         netdev->mem_start = mmio_start;
1921         netdev->mem_end = mmio_start + mmio_len;
1922
1923         /* PCI config space info */
1924         hw->vendor_id = pdev->vendor;
1925         hw->device_id = pdev->device;
1926         hw->revision_id = pdev->revision;
1927         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1928         hw->subsystem_device_id = pdev->subsystem_device;
1929
1930         /* Copy the default MAC, PHY and NVM function pointers */
1931         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1932         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1933         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1934         /* Initialize skew-specific constants */
1935         err = ei->get_invariants(hw);
1936         if (err)
1937                 goto err_sw_init;
1938
1939         /* setup the private structure */
1940         err = igb_sw_init(adapter);
1941         if (err)
1942                 goto err_sw_init;
1943
1944         igb_get_bus_info_pcie(hw);
1945
1946         hw->phy.autoneg_wait_to_complete = false;
1947
1948         /* Copper options */
1949         if (hw->phy.media_type == e1000_media_type_copper) {
1950                 hw->phy.mdix = AUTO_ALL_MODES;
1951                 hw->phy.disable_polarity_correction = false;
1952                 hw->phy.ms_type = e1000_ms_hw_default;
1953         }
1954
1955         if (igb_check_reset_block(hw))
1956                 dev_info(&pdev->dev,
1957                         "PHY reset is blocked due to SOL/IDER session.\n");
1958
1959         /*
1960          * features is initialized to 0 in allocation, it might have bits
1961          * set by igb_sw_init so we should use an or instead of an
1962          * assignment.
1963          */
1964         netdev->features |= NETIF_F_SG |
1965                             NETIF_F_IP_CSUM |
1966                             NETIF_F_IPV6_CSUM |
1967                             NETIF_F_TSO |
1968                             NETIF_F_TSO6 |
1969                             NETIF_F_RXHASH |
1970                             NETIF_F_RXCSUM |
1971                             NETIF_F_HW_VLAN_RX |
1972                             NETIF_F_HW_VLAN_TX;
1973
1974         /* copy netdev features into list of user selectable features */
1975         netdev->hw_features |= netdev->features;
1976         netdev->hw_features |= NETIF_F_RXALL;
1977
1978         /* set this bit last since it cannot be part of hw_features */
1979         netdev->features |= NETIF_F_HW_VLAN_FILTER;
1980
1981         netdev->vlan_features |= NETIF_F_TSO |
1982                                  NETIF_F_TSO6 |
1983                                  NETIF_F_IP_CSUM |
1984                                  NETIF_F_IPV6_CSUM |
1985                                  NETIF_F_SG;
1986
1987         netdev->priv_flags |= IFF_SUPP_NOFCS;
1988
1989         if (pci_using_dac) {
1990                 netdev->features |= NETIF_F_HIGHDMA;
1991                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1992         }
1993
1994         if (hw->mac.type >= e1000_82576) {
1995                 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1996                 netdev->features |= NETIF_F_SCTP_CSUM;
1997         }
1998
1999         netdev->priv_flags |= IFF_UNICAST_FLT;
2000
2001         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2002
2003         /* before reading the NVM, reset the controller to put the device in a
2004          * known good starting state */
2005         hw->mac.ops.reset_hw(hw);
2006
2007         /* make sure the NVM is good */
2008         if (hw->nvm.ops.validate(hw) < 0) {
2009                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2010                 err = -EIO;
2011                 goto err_eeprom;
2012         }
2013
2014         /* copy the MAC address out of the NVM */
2015         if (hw->mac.ops.read_mac_addr(hw))
2016                 dev_err(&pdev->dev, "NVM Read Error\n");
2017
2018         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2019         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2020
2021         if (!is_valid_ether_addr(netdev->perm_addr)) {
2022                 dev_err(&pdev->dev, "Invalid MAC Address\n");
2023                 err = -EIO;
2024                 goto err_eeprom;
2025         }
2026
2027         setup_timer(&adapter->watchdog_timer, igb_watchdog,
2028                     (unsigned long) adapter);
2029         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2030                     (unsigned long) adapter);
2031
2032         INIT_WORK(&adapter->reset_task, igb_reset_task);
2033         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2034
2035         /* Initialize link properties that are user-changeable */
2036         adapter->fc_autoneg = true;
2037         hw->mac.autoneg = true;
2038         hw->phy.autoneg_advertised = 0x2f;
2039
2040         hw->fc.requested_mode = e1000_fc_default;
2041         hw->fc.current_mode = e1000_fc_default;
2042
2043         igb_validate_mdi_setting(hw);
2044
2045         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2046          * enable the ACPI Magic Packet filter
2047          */
2048
2049         if (hw->bus.func == 0)
2050                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2051         else if (hw->mac.type >= e1000_82580)
2052                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2053                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2054                                  &eeprom_data);
2055         else if (hw->bus.func == 1)
2056                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2057
2058         if (eeprom_data & eeprom_apme_mask)
2059                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2060
2061         /* now that we have the eeprom settings, apply the special cases where
2062          * the eeprom may be wrong or the board simply won't support wake on
2063          * lan on a particular port */
2064         switch (pdev->device) {
2065         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2066                 adapter->eeprom_wol = 0;
2067                 break;
2068         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2069         case E1000_DEV_ID_82576_FIBER:
2070         case E1000_DEV_ID_82576_SERDES:
2071                 /* Wake events only supported on port A for dual fiber
2072                  * regardless of eeprom setting */
2073                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2074                         adapter->eeprom_wol = 0;
2075                 break;
2076         case E1000_DEV_ID_82576_QUAD_COPPER:
2077         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2078                 /* if quad port adapter, disable WoL on all but port A */
2079                 if (global_quad_port_a != 0)
2080                         adapter->eeprom_wol = 0;
2081                 else
2082                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2083                 /* Reset for multiple quad port adapters */
2084                 if (++global_quad_port_a == 4)
2085                         global_quad_port_a = 0;
2086                 break;
2087         }
2088
2089         /* initialize the wol settings based on the eeprom settings */
2090         adapter->wol = adapter->eeprom_wol;
2091         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2092
2093         /* reset the hardware with the new settings */
2094         igb_reset(adapter);
2095
2096         /* let the f/w know that the h/w is now under the control of the
2097          * driver. */
2098         igb_get_hw_control(adapter);
2099
2100         strcpy(netdev->name, "eth%d");
2101         err = register_netdev(netdev);
2102         if (err)
2103                 goto err_register;
2104
2105         /* carrier off reporting is important to ethtool even BEFORE open */
2106         netif_carrier_off(netdev);
2107
2108 #ifdef CONFIG_IGB_DCA
2109         if (dca_add_requester(&pdev->dev) == 0) {
2110                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2111                 dev_info(&pdev->dev, "DCA enabled\n");
2112                 igb_setup_dca(adapter);
2113         }
2114
2115 #endif
2116         /* do hw tstamp init after resetting */
2117         igb_init_hw_timer(adapter);
2118
2119         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2120         /* print bus type/speed/width info */
2121         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2122                  netdev->name,
2123                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2124                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2125                                                             "unknown"),
2126                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2127                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2128                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2129                    "unknown"),
2130                  netdev->dev_addr);
2131
2132         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2133         if (ret_val)
2134                 strcpy(part_str, "Unknown");
2135         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2136         dev_info(&pdev->dev,
2137                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2138                 adapter->msix_entries ? "MSI-X" :
2139                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2140                 adapter->num_rx_queues, adapter->num_tx_queues);
2141         switch (hw->mac.type) {
2142         case e1000_i350:
2143                 igb_set_eee_i350(hw);
2144                 break;
2145         default:
2146                 break;
2147         }
2148
2149         pm_runtime_put_noidle(&pdev->dev);
2150         return 0;
2151
2152 err_register:
2153         igb_release_hw_control(adapter);
2154 err_eeprom:
2155         if (!igb_check_reset_block(hw))
2156                 igb_reset_phy(hw);
2157
2158         if (hw->flash_address)
2159                 iounmap(hw->flash_address);
2160 err_sw_init:
2161         igb_clear_interrupt_scheme(adapter);
2162         iounmap(hw->hw_addr);
2163 err_ioremap:
2164         free_netdev(netdev);
2165 err_alloc_etherdev:
2166         pci_release_selected_regions(pdev,
2167                                      pci_select_bars(pdev, IORESOURCE_MEM));
2168 err_pci_reg:
2169 err_dma:
2170         pci_disable_device(pdev);
2171         return err;
2172 }
2173
2174 /**
2175  * igb_remove - Device Removal Routine
2176  * @pdev: PCI device information struct
2177  *
2178  * igb_remove is called by the PCI subsystem to alert the driver
2179  * that it should release a PCI device.  The could be caused by a
2180  * Hot-Plug event, or because the driver is going to be removed from
2181  * memory.
2182  **/
2183 static void __devexit igb_remove(struct pci_dev *pdev)
2184 {
2185         struct net_device *netdev = pci_get_drvdata(pdev);
2186         struct igb_adapter *adapter = netdev_priv(netdev);
2187         struct e1000_hw *hw = &adapter->hw;
2188
2189         pm_runtime_get_noresume(&pdev->dev);
2190
2191         /*
2192          * The watchdog timer may be rescheduled, so explicitly
2193          * disable watchdog from being rescheduled.
2194          */
2195         set_bit(__IGB_DOWN, &adapter->state);
2196         del_timer_sync(&adapter->watchdog_timer);
2197         del_timer_sync(&adapter->phy_info_timer);
2198
2199         cancel_work_sync(&adapter->reset_task);
2200         cancel_work_sync(&adapter->watchdog_task);
2201
2202 #ifdef CONFIG_IGB_DCA
2203         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2204                 dev_info(&pdev->dev, "DCA disabled\n");
2205                 dca_remove_requester(&pdev->dev);
2206                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2207                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2208         }
2209 #endif
2210
2211         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2212          * would have already happened in close and is redundant. */
2213         igb_release_hw_control(adapter);
2214
2215         unregister_netdev(netdev);
2216
2217         igb_clear_interrupt_scheme(adapter);
2218
2219 #ifdef CONFIG_PCI_IOV
2220         /* reclaim resources allocated to VFs */
2221         if (adapter->vf_data) {
2222                 /* disable iov and allow time for transactions to clear */
2223                 if (!igb_check_vf_assignment(adapter)) {
2224                         pci_disable_sriov(pdev);
2225                         msleep(500);
2226                 } else {
2227                         dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2228                 }
2229
2230                 kfree(adapter->vf_data);
2231                 adapter->vf_data = NULL;
2232                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2233                 wrfl();
2234                 msleep(100);
2235                 dev_info(&pdev->dev, "IOV Disabled\n");
2236         }
2237 #endif
2238
2239         iounmap(hw->hw_addr);
2240         if (hw->flash_address)
2241                 iounmap(hw->flash_address);
2242         pci_release_selected_regions(pdev,
2243                                      pci_select_bars(pdev, IORESOURCE_MEM));
2244
2245         kfree(adapter->shadow_vfta);
2246         free_netdev(netdev);
2247
2248         pci_disable_pcie_error_reporting(pdev);
2249
2250         pci_disable_device(pdev);
2251 }
2252
2253 /**
2254  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2255  * @adapter: board private structure to initialize
2256  *
2257  * This function initializes the vf specific data storage and then attempts to
2258  * allocate the VFs.  The reason for ordering it this way is because it is much
2259  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2260  * the memory for the VFs.
2261  **/
2262 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2263 {
2264 #ifdef CONFIG_PCI_IOV
2265         struct pci_dev *pdev = adapter->pdev;
2266         int old_vfs = igb_find_enabled_vfs(adapter);
2267         int i;
2268
2269         if (old_vfs) {
2270                 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2271                          "max_vfs setting of %d\n", old_vfs, max_vfs);
2272                 adapter->vfs_allocated_count = old_vfs;
2273         }
2274
2275         if (!adapter->vfs_allocated_count)
2276                 return;
2277
2278         adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2279                                 sizeof(struct vf_data_storage), GFP_KERNEL);
2280         /* if allocation failed then we do not support SR-IOV */
2281         if (!adapter->vf_data) {
2282                 adapter->vfs_allocated_count = 0;
2283                 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2284                         "Data Storage\n");
2285                 goto out;
2286         }
2287
2288         if (!old_vfs) {
2289                 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2290                         goto err_out;
2291         }
2292         dev_info(&pdev->dev, "%d VFs allocated\n",
2293                  adapter->vfs_allocated_count);
2294         for (i = 0; i < adapter->vfs_allocated_count; i++)
2295                 igb_vf_configure(adapter, i);
2296
2297         /* DMA Coalescing is not supported in IOV mode. */
2298         adapter->flags &= ~IGB_FLAG_DMAC;
2299         goto out;
2300 err_out:
2301         kfree(adapter->vf_data);
2302         adapter->vf_data = NULL;
2303         adapter->vfs_allocated_count = 0;
2304 out:
2305         return;
2306 #endif /* CONFIG_PCI_IOV */
2307 }
2308
2309 /**
2310  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2311  * @adapter: board private structure to initialize
2312  *
2313  * igb_init_hw_timer initializes the function pointer and values for the hw
2314  * timer found in hardware.
2315  **/
2316 static void igb_init_hw_timer(struct igb_adapter *adapter)
2317 {
2318         struct e1000_hw *hw = &adapter->hw;
2319
2320         switch (hw->mac.type) {
2321         case e1000_i350:
2322         case e1000_82580:
2323                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2324                 adapter->cycles.read = igb_read_clock;
2325                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2326                 adapter->cycles.mult = 1;
2327                 /*
2328                  * The 82580 timesync updates the system timer every 8ns by 8ns
2329                  * and the value cannot be shifted.  Instead we need to shift
2330                  * the registers to generate a 64bit timer value.  As a result
2331                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2332                  * 24 in order to generate a larger value for synchronization.
2333                  */
2334                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2335                 /* disable system timer temporarily by setting bit 31 */
2336                 wr32(E1000_TSAUXC, 0x80000000);
2337                 wrfl();
2338
2339                 /* Set registers so that rollover occurs soon to test this. */
2340                 wr32(E1000_SYSTIMR, 0x00000000);
2341                 wr32(E1000_SYSTIML, 0x80000000);
2342                 wr32(E1000_SYSTIMH, 0x000000FF);
2343                 wrfl();
2344
2345                 /* enable system timer by clearing bit 31 */
2346                 wr32(E1000_TSAUXC, 0x0);
2347                 wrfl();
2348
2349                 timecounter_init(&adapter->clock,
2350                                  &adapter->cycles,
2351                                  ktime_to_ns(ktime_get_real()));
2352                 /*
2353                  * Synchronize our NIC clock against system wall clock. NIC
2354                  * time stamp reading requires ~3us per sample, each sample
2355                  * was pretty stable even under load => only require 10
2356                  * samples for each offset comparison.
2357                  */
2358                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2359                 adapter->compare.source = &adapter->clock;
2360                 adapter->compare.target = ktime_get_real;
2361                 adapter->compare.num_samples = 10;
2362                 timecompare_update(&adapter->compare, 0);
2363                 break;
2364         case e1000_82576:
2365                 /*
2366                  * Initialize hardware timer: we keep it running just in case
2367                  * that some program needs it later on.
2368                  */
2369                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2370                 adapter->cycles.read = igb_read_clock;
2371                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2372                 adapter->cycles.mult = 1;
2373                 /**
2374                  * Scale the NIC clock cycle by a large factor so that
2375                  * relatively small clock corrections can be added or
2376                  * subtracted at each clock tick. The drawbacks of a large
2377                  * factor are a) that the clock register overflows more quickly
2378                  * (not such a big deal) and b) that the increment per tick has
2379                  * to fit into 24 bits.  As a result we need to use a shift of
2380                  * 19 so we can fit a value of 16 into the TIMINCA register.
2381                  */
2382                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2383                 wr32(E1000_TIMINCA,
2384                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2385                                 (16 << IGB_82576_TSYNC_SHIFT));
2386
2387                 /* Set registers so that rollover occurs soon to test this. */
2388                 wr32(E1000_SYSTIML, 0x00000000);
2389                 wr32(E1000_SYSTIMH, 0xFF800000);
2390                 wrfl();
2391
2392                 timecounter_init(&adapter->clock,
2393                                  &adapter->cycles,
2394                                  ktime_to_ns(ktime_get_real()));
2395                 /*
2396                  * Synchronize our NIC clock against system wall clock. NIC
2397                  * time stamp reading requires ~3us per sample, each sample
2398                  * was pretty stable even under load => only require 10
2399                  * samples for each offset comparison.
2400                  */
2401                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2402                 adapter->compare.source = &adapter->clock;
2403                 adapter->compare.target = ktime_get_real;
2404                 adapter->compare.num_samples = 10;
2405                 timecompare_update(&adapter->compare, 0);
2406                 break;
2407         case e1000_82575:
2408                 /* 82575 does not support timesync */
2409         default:
2410                 break;
2411         }
2412
2413 }
2414
2415 /**
2416  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2417  * @adapter: board private structure to initialize
2418  *
2419  * igb_sw_init initializes the Adapter private data structure.
2420  * Fields are initialized based on PCI device information and
2421  * OS network device settings (MTU size).
2422  **/
2423 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2424 {
2425         struct e1000_hw *hw = &adapter->hw;
2426         struct net_device *netdev = adapter->netdev;
2427         struct pci_dev *pdev = adapter->pdev;
2428
2429         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2430
2431         /* set default ring sizes */
2432         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2433         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2434
2435         /* set default ITR values */
2436         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2437         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2438
2439         /* set default work limits */
2440         adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2441
2442         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2443                                   VLAN_HLEN;
2444         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2445
2446         adapter->node = -1;
2447
2448         spin_lock_init(&adapter->stats64_lock);
2449 #ifdef CONFIG_PCI_IOV
2450         switch (hw->mac.type) {
2451         case e1000_82576:
2452         case e1000_i350:
2453                 if (max_vfs > 7) {
2454                         dev_warn(&pdev->dev,
2455                                  "Maximum of 7 VFs per PF, using max\n");
2456                         adapter->vfs_allocated_count = 7;
2457                 } else
2458                         adapter->vfs_allocated_count = max_vfs;
2459                 break;
2460         default:
2461                 break;
2462         }
2463 #endif /* CONFIG_PCI_IOV */
2464         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2465         /* i350 cannot do RSS and SR-IOV at the same time */
2466         if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2467                 adapter->rss_queues = 1;
2468
2469         /*
2470          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2471          * then we should combine the queues into a queue pair in order to
2472          * conserve interrupts due to limited supply
2473          */
2474         if ((adapter->rss_queues > 4) ||
2475             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2476                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2477
2478         /* Setup and initialize a copy of the hw vlan table array */
2479         adapter->shadow_vfta = kzalloc(sizeof(u32) *
2480                                 E1000_VLAN_FILTER_TBL_SIZE,
2481                                 GFP_ATOMIC);
2482
2483         /* This call may decrease the number of queues */
2484         if (igb_init_interrupt_scheme(adapter)) {
2485                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2486                 return -ENOMEM;
2487         }
2488
2489         igb_probe_vfs(adapter);
2490
2491         /* Explicitly disable IRQ since the NIC can be in any state. */
2492         igb_irq_disable(adapter);
2493
2494         if (hw->mac.type == e1000_i350)
2495                 adapter->flags &= ~IGB_FLAG_DMAC;
2496
2497         set_bit(__IGB_DOWN, &adapter->state);
2498         return 0;
2499 }
2500
2501 /**
2502  * igb_open - Called when a network interface is made active
2503  * @netdev: network interface device structure
2504  *
2505  * Returns 0 on success, negative value on failure
2506  *
2507  * The open entry point is called when a network interface is made
2508  * active by the system (IFF_UP).  At this point all resources needed
2509  * for transmit and receive operations are allocated, the interrupt
2510  * handler is registered with the OS, the watchdog timer is started,
2511  * and the stack is notified that the interface is ready.
2512  **/
2513 static int __igb_open(struct net_device *netdev, bool resuming)
2514 {
2515         struct igb_adapter *adapter = netdev_priv(netdev);
2516         struct e1000_hw *hw = &adapter->hw;
2517         struct pci_dev *pdev = adapter->pdev;
2518         int err;
2519         int i;
2520
2521         /* disallow open during test */
2522         if (test_bit(__IGB_TESTING, &adapter->state)) {
2523                 WARN_ON(resuming);
2524                 return -EBUSY;
2525         }
2526
2527         if (!resuming)
2528                 pm_runtime_get_sync(&pdev->dev);
2529
2530         netif_carrier_off(netdev);
2531
2532         /* allocate transmit descriptors */
2533         err = igb_setup_all_tx_resources(adapter);
2534         if (err)
2535                 goto err_setup_tx;
2536
2537         /* allocate receive descriptors */
2538         err = igb_setup_all_rx_resources(adapter);
2539         if (err)
2540                 goto err_setup_rx;
2541
2542         igb_power_up_link(adapter);
2543
2544         /* before we allocate an interrupt, we must be ready to handle it.
2545          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2546          * as soon as we call pci_request_irq, so we have to setup our
2547          * clean_rx handler before we do so.  */
2548         igb_configure(adapter);
2549
2550         err = igb_request_irq(adapter);
2551         if (err)
2552                 goto err_req_irq;
2553
2554         /* From here on the code is the same as igb_up() */
2555         clear_bit(__IGB_DOWN, &adapter->state);
2556
2557         for (i = 0; i < adapter->num_q_vectors; i++)
2558                 napi_enable(&(adapter->q_vector[i]->napi));
2559
2560         /* Clear any pending interrupts. */
2561         rd32(E1000_ICR);
2562
2563         igb_irq_enable(adapter);
2564
2565         /* notify VFs that reset has been completed */
2566         if (adapter->vfs_allocated_count) {
2567                 u32 reg_data = rd32(E1000_CTRL_EXT);
2568                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2569                 wr32(E1000_CTRL_EXT, reg_data);
2570         }
2571
2572         netif_tx_start_all_queues(netdev);
2573
2574         if (!resuming)
2575                 pm_runtime_put(&pdev->dev);
2576
2577         /* start the watchdog. */
2578         hw->mac.get_link_status = 1;
2579         schedule_work(&adapter->watchdog_task);
2580
2581         return 0;
2582
2583 err_req_irq:
2584         igb_release_hw_control(adapter);
2585         igb_power_down_link(adapter);
2586         igb_free_all_rx_resources(adapter);
2587 err_setup_rx:
2588         igb_free_all_tx_resources(adapter);
2589 err_setup_tx:
2590         igb_reset(adapter);
2591         if (!resuming)
2592                 pm_runtime_put(&pdev->dev);
2593
2594         return err;
2595 }
2596
2597 static int igb_open(struct net_device *netdev)
2598 {
2599         return __igb_open(netdev, false);
2600 }
2601
2602 /**
2603  * igb_close - Disables a network interface
2604  * @netdev: network interface device structure
2605  *
2606  * Returns 0, this is not allowed to fail
2607  *
2608  * The close entry point is called when an interface is de-activated
2609  * by the OS.  The hardware is still under the driver's control, but
2610  * needs to be disabled.  A global MAC reset is issued to stop the
2611  * hardware, and all transmit and receive resources are freed.
2612  **/
2613 static int __igb_close(struct net_device *netdev, bool suspending)
2614 {
2615         struct igb_adapter *adapter = netdev_priv(netdev);
2616         struct pci_dev *pdev = adapter->pdev;
2617
2618         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2619
2620         if (!suspending)
2621                 pm_runtime_get_sync(&pdev->dev);
2622
2623         igb_down(adapter);
2624         igb_free_irq(adapter);
2625
2626         igb_free_all_tx_resources(adapter);
2627         igb_free_all_rx_resources(adapter);
2628
2629         if (!suspending)
2630                 pm_runtime_put_sync(&pdev->dev);
2631         return 0;
2632 }
2633
2634 static int igb_close(struct net_device *netdev)
2635 {
2636         return __igb_close(netdev, false);
2637 }
2638
2639 /**
2640  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2641  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2642  *
2643  * Return 0 on success, negative on failure
2644  **/
2645 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2646 {
2647         struct device *dev = tx_ring->dev;
2648         int orig_node = dev_to_node(dev);
2649         int size;
2650
2651         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2652         tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2653         if (!tx_ring->tx_buffer_info)
2654                 tx_ring->tx_buffer_info = vzalloc(size);
2655         if (!tx_ring->tx_buffer_info)
2656                 goto err;
2657
2658         /* round up to nearest 4K */
2659         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2660         tx_ring->size = ALIGN(tx_ring->size, 4096);
2661
2662         set_dev_node(dev, tx_ring->numa_node);
2663         tx_ring->desc = dma_alloc_coherent(dev,
2664                                            tx_ring->size,
2665                                            &tx_ring->dma,
2666                                            GFP_KERNEL);
2667         set_dev_node(dev, orig_node);
2668         if (!tx_ring->desc)
2669                 tx_ring->desc = dma_alloc_coherent(dev,
2670                                                    tx_ring->size,
2671                                                    &tx_ring->dma,
2672                                                    GFP_KERNEL);
2673
2674         if (!tx_ring->desc)
2675                 goto err;
2676
2677         tx_ring->next_to_use = 0;
2678         tx_ring->next_to_clean = 0;
2679
2680         return 0;
2681
2682 err:
2683         vfree(tx_ring->tx_buffer_info);
2684         dev_err(dev,
2685                 "Unable to allocate memory for the transmit descriptor ring\n");
2686         return -ENOMEM;
2687 }
2688
2689 /**
2690  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2691  *                                (Descriptors) for all queues
2692  * @adapter: board private structure
2693  *
2694  * Return 0 on success, negative on failure
2695  **/
2696 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2697 {
2698         struct pci_dev *pdev = adapter->pdev;
2699         int i, err = 0;
2700
2701         for (i = 0; i < adapter->num_tx_queues; i++) {
2702                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2703                 if (err) {
2704                         dev_err(&pdev->dev,
2705                                 "Allocation for Tx Queue %u failed\n", i);
2706                         for (i--; i >= 0; i--)
2707                                 igb_free_tx_resources(adapter->tx_ring[i]);
2708                         break;
2709                 }
2710         }
2711
2712         return err;
2713 }
2714
2715 /**
2716  * igb_setup_tctl - configure the transmit control registers
2717  * @adapter: Board private structure
2718  **/
2719 void igb_setup_tctl(struct igb_adapter *adapter)
2720 {
2721         struct e1000_hw *hw = &adapter->hw;
2722         u32 tctl;
2723
2724         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2725         wr32(E1000_TXDCTL(0), 0);
2726
2727         /* Program the Transmit Control Register */
2728         tctl = rd32(E1000_TCTL);
2729         tctl &= ~E1000_TCTL_CT;
2730         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2731                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2732
2733         igb_config_collision_dist(hw);
2734
2735         /* Enable transmits */
2736         tctl |= E1000_TCTL_EN;
2737
2738         wr32(E1000_TCTL, tctl);
2739 }
2740
2741 /**
2742  * igb_configure_tx_ring - Configure transmit ring after Reset
2743  * @adapter: board private structure
2744  * @ring: tx ring to configure
2745  *
2746  * Configure a transmit ring after a reset.
2747  **/
2748 void igb_configure_tx_ring(struct igb_adapter *adapter,
2749                            struct igb_ring *ring)
2750 {
2751         struct e1000_hw *hw = &adapter->hw;
2752         u32 txdctl = 0;
2753         u64 tdba = ring->dma;
2754         int reg_idx = ring->reg_idx;
2755
2756         /* disable the queue */
2757         wr32(E1000_TXDCTL(reg_idx), 0);
2758         wrfl();
2759         mdelay(10);
2760
2761         wr32(E1000_TDLEN(reg_idx),
2762                         ring->count * sizeof(union e1000_adv_tx_desc));
2763         wr32(E1000_TDBAL(reg_idx),
2764                         tdba & 0x00000000ffffffffULL);
2765         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2766
2767         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2768         wr32(E1000_TDH(reg_idx), 0);
2769         writel(0, ring->tail);
2770
2771         txdctl |= IGB_TX_PTHRESH;
2772         txdctl |= IGB_TX_HTHRESH << 8;
2773         txdctl |= IGB_TX_WTHRESH << 16;
2774
2775         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2776         wr32(E1000_TXDCTL(reg_idx), txdctl);
2777 }
2778
2779 /**
2780  * igb_configure_tx - Configure transmit Unit after Reset
2781  * @adapter: board private structure
2782  *
2783  * Configure the Tx unit of the MAC after a reset.
2784  **/
2785 static void igb_configure_tx(struct igb_adapter *adapter)
2786 {
2787         int i;
2788
2789         for (i = 0; i < adapter->num_tx_queues; i++)
2790                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2791 }
2792
2793 /**
2794  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2795  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2796  *
2797  * Returns 0 on success, negative on failure
2798  **/
2799 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2800 {
2801         struct device *dev = rx_ring->dev;
2802         int orig_node = dev_to_node(dev);
2803         int size, desc_len;
2804
2805         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2806         rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2807         if (!rx_ring->rx_buffer_info)
2808                 rx_ring->rx_buffer_info = vzalloc(size);
2809         if (!rx_ring->rx_buffer_info)
2810                 goto err;
2811
2812         desc_len = sizeof(union e1000_adv_rx_desc);
2813
2814         /* Round up to nearest 4K */
2815         rx_ring->size = rx_ring->count * desc_len;
2816         rx_ring->size = ALIGN(rx_ring->size, 4096);
2817
2818         set_dev_node(dev, rx_ring->numa_node);
2819         rx_ring->desc = dma_alloc_coherent(dev,
2820                                            rx_ring->size,
2821                                            &rx_ring->dma,
2822                                            GFP_KERNEL);
2823         set_dev_node(dev, orig_node);
2824         if (!rx_ring->desc)
2825                 rx_ring->desc = dma_alloc_coherent(dev,
2826                                                    rx_ring->size,
2827                                                    &rx_ring->dma,
2828                                                    GFP_KERNEL);
2829
2830         if (!rx_ring->desc)
2831                 goto err;
2832
2833         rx_ring->next_to_clean = 0;
2834         rx_ring->next_to_use = 0;
2835
2836         return 0;
2837
2838 err:
2839         vfree(rx_ring->rx_buffer_info);
2840         rx_ring->rx_buffer_info = NULL;
2841         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2842                 " ring\n");
2843         return -ENOMEM;
2844 }
2845
2846 /**
2847  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2848  *                                (Descriptors) for all queues
2849  * @adapter: board private structure
2850  *
2851  * Return 0 on success, negative on failure
2852  **/
2853 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2854 {
2855         struct pci_dev *pdev = adapter->pdev;
2856         int i, err = 0;
2857
2858         for (i = 0; i < adapter->num_rx_queues; i++) {
2859                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2860                 if (err) {
2861                         dev_err(&pdev->dev,
2862                                 "Allocation for Rx Queue %u failed\n", i);
2863                         for (i--; i >= 0; i--)
2864                                 igb_free_rx_resources(adapter->rx_ring[i]);
2865                         break;
2866                 }
2867         }
2868
2869         return err;
2870 }
2871
2872 /**
2873  * igb_setup_mrqc - configure the multiple receive queue control registers
2874  * @adapter: Board private structure
2875  **/
2876 static void igb_setup_mrqc(struct igb_adapter *adapter)
2877 {
2878         struct e1000_hw *hw = &adapter->hw;
2879         u32 mrqc, rxcsum;
2880         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2881         union e1000_reta {
2882                 u32 dword;
2883                 u8  bytes[4];
2884         } reta;
2885         static const u8 rsshash[40] = {
2886                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2887                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2888                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2889                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2890
2891         /* Fill out hash function seeds */
2892         for (j = 0; j < 10; j++) {
2893                 u32 rsskey = rsshash[(j * 4)];
2894                 rsskey |= rsshash[(j * 4) + 1] << 8;
2895                 rsskey |= rsshash[(j * 4) + 2] << 16;
2896                 rsskey |= rsshash[(j * 4) + 3] << 24;
2897                 array_wr32(E1000_RSSRK(0), j, rsskey);
2898         }
2899
2900         num_rx_queues = adapter->rss_queues;
2901
2902         if (adapter->vfs_allocated_count) {
2903                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2904                 switch (hw->mac.type) {
2905                 case e1000_i350:
2906                 case e1000_82580:
2907                         num_rx_queues = 1;
2908                         shift = 0;
2909                         break;
2910                 case e1000_82576:
2911                         shift = 3;
2912                         num_rx_queues = 2;
2913                         break;
2914                 case e1000_82575:
2915                         shift = 2;
2916                         shift2 = 6;
2917                 default:
2918                         break;
2919                 }
2920         } else {
2921                 if (hw->mac.type == e1000_82575)
2922                         shift = 6;
2923         }
2924
2925         for (j = 0; j < (32 * 4); j++) {
2926                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2927                 if (shift2)
2928                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2929                 if ((j & 3) == 3)
2930                         wr32(E1000_RETA(j >> 2), reta.dword);
2931         }
2932
2933         /*
2934          * Disable raw packet checksumming so that RSS hash is placed in
2935          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2936          * offloads as they are enabled by default
2937          */
2938         rxcsum = rd32(E1000_RXCSUM);
2939         rxcsum |= E1000_RXCSUM_PCSD;
2940
2941         if (adapter->hw.mac.type >= e1000_82576)
2942                 /* Enable Receive Checksum Offload for SCTP */
2943                 rxcsum |= E1000_RXCSUM_CRCOFL;
2944
2945         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2946         wr32(E1000_RXCSUM, rxcsum);
2947
2948         /* If VMDq is enabled then we set the appropriate mode for that, else
2949          * we default to RSS so that an RSS hash is calculated per packet even
2950          * if we are only using one queue */
2951         if (adapter->vfs_allocated_count) {
2952                 if (hw->mac.type > e1000_82575) {
2953                         /* Set the default pool for the PF's first queue */
2954                         u32 vtctl = rd32(E1000_VT_CTL);
2955                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2956                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2957                         vtctl |= adapter->vfs_allocated_count <<
2958                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2959                         wr32(E1000_VT_CTL, vtctl);
2960                 }
2961                 if (adapter->rss_queues > 1)
2962                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2963                 else
2964                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2965         } else {
2966                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2967         }
2968         igb_vmm_control(adapter);
2969
2970         /*
2971          * Generate RSS hash based on TCP port numbers and/or
2972          * IPv4/v6 src and dst addresses since UDP cannot be
2973          * hashed reliably due to IP fragmentation
2974          */
2975         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2976                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2977                 E1000_MRQC_RSS_FIELD_IPV6 |
2978                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2979                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2980
2981         wr32(E1000_MRQC, mrqc);
2982 }
2983
2984 /**
2985  * igb_setup_rctl - configure the receive control registers
2986  * @adapter: Board private structure
2987  **/
2988 void igb_setup_rctl(struct igb_adapter *adapter)
2989 {
2990         struct e1000_hw *hw = &adapter->hw;
2991         u32 rctl;
2992
2993         rctl = rd32(E1000_RCTL);
2994
2995         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2996         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2997
2998         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2999                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3000
3001         /*
3002          * enable stripping of CRC. It's unlikely this will break BMC
3003          * redirection as it did with e1000. Newer features require
3004          * that the HW strips the CRC.
3005          */
3006         rctl |= E1000_RCTL_SECRC;
3007
3008         /* disable store bad packets and clear size bits. */
3009         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3010
3011         /* enable LPE to prevent packets larger than max_frame_size */
3012         rctl |= E1000_RCTL_LPE;
3013
3014         /* disable queue 0 to prevent tail write w/o re-config */
3015         wr32(E1000_RXDCTL(0), 0);
3016
3017         /* Attention!!!  For SR-IOV PF driver operations you must enable
3018          * queue drop for all VF and PF queues to prevent head of line blocking
3019          * if an un-trusted VF does not provide descriptors to hardware.
3020          */
3021         if (adapter->vfs_allocated_count) {
3022                 /* set all queue drop enable bits */
3023                 wr32(E1000_QDE, ALL_QUEUES);
3024         }
3025
3026         /* This is useful for sniffing bad packets. */
3027         if (adapter->netdev->features & NETIF_F_RXALL) {
3028                 /* UPE and MPE will be handled by normal PROMISC logic
3029                  * in e1000e_set_rx_mode */
3030                 rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3031                          E1000_RCTL_BAM | /* RX All Bcast Pkts */
3032                          E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3033
3034                 rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
3035                           E1000_RCTL_DPF | /* Allow filtered pause */
3036                           E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3037                 /* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3038                  * and that breaks VLANs.
3039                  */
3040         }
3041
3042         wr32(E1000_RCTL, rctl);
3043 }
3044
3045 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3046                                    int vfn)
3047 {
3048         struct e1000_hw *hw = &adapter->hw;
3049         u32 vmolr;
3050
3051         /* if it isn't the PF check to see if VFs are enabled and
3052          * increase the size to support vlan tags */
3053         if (vfn < adapter->vfs_allocated_count &&
3054             adapter->vf_data[vfn].vlans_enabled)
3055                 size += VLAN_TAG_SIZE;
3056
3057         vmolr = rd32(E1000_VMOLR(vfn));
3058         vmolr &= ~E1000_VMOLR_RLPML_MASK;
3059         vmolr |= size | E1000_VMOLR_LPE;
3060         wr32(E1000_VMOLR(vfn), vmolr);
3061
3062         return 0;
3063 }
3064
3065 /**
3066  * igb_rlpml_set - set maximum receive packet size
3067  * @adapter: board private structure
3068  *
3069  * Configure maximum receivable packet size.
3070  **/
3071 static void igb_rlpml_set(struct igb_adapter *adapter)
3072 {
3073         u32 max_frame_size = adapter->max_frame_size;
3074         struct e1000_hw *hw = &adapter->hw;
3075         u16 pf_id = adapter->vfs_allocated_count;
3076
3077         if (pf_id) {
3078                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3079                 /*
3080                  * If we're in VMDQ or SR-IOV mode, then set global RLPML
3081                  * to our max jumbo frame size, in case we need to enable
3082                  * jumbo frames on one of the rings later.
3083                  * This will not pass over-length frames into the default
3084                  * queue because it's gated by the VMOLR.RLPML.
3085                  */
3086                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3087         }
3088
3089         wr32(E1000_RLPML, max_frame_size);
3090 }
3091
3092 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3093                                  int vfn, bool aupe)
3094 {
3095         struct e1000_hw *hw = &adapter->hw;
3096         u32 vmolr;
3097
3098         /*
3099          * This register exists only on 82576 and newer so if we are older then
3100          * we should exit and do nothing
3101          */
3102         if (hw->mac.type < e1000_82576)
3103                 return;
3104
3105         vmolr = rd32(E1000_VMOLR(vfn));
3106         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3107         if (aupe)
3108                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3109         else
3110                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3111
3112         /* clear all bits that might not be set */
3113         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3114
3115         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3116                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3117         /*
3118          * for VMDq only allow the VFs and pool 0 to accept broadcast and
3119          * multicast packets
3120          */
3121         if (vfn <= adapter->vfs_allocated_count)
3122                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3123
3124         wr32(E1000_VMOLR(vfn), vmolr);
3125 }
3126
3127 /**
3128  * igb_configure_rx_ring - Configure a receive ring after Reset
3129  * @adapter: board private structure
3130  * @ring: receive ring to be configured
3131  *
3132  * Configure the Rx unit of the MAC after a reset.
3133  **/
3134 void igb_configure_rx_ring(struct igb_adapter *adapter,
3135                            struct igb_ring *ring)
3136 {
3137         struct e1000_hw *hw = &adapter->hw;
3138         u64 rdba = ring->dma;
3139         int reg_idx = ring->reg_idx;
3140         u32 srrctl = 0, rxdctl = 0;
3141
3142         /* disable the queue */
3143         wr32(E1000_RXDCTL(reg_idx), 0);
3144
3145         /* Set DMA base address registers */
3146         wr32(E1000_RDBAL(reg_idx),
3147              rdba & 0x00000000ffffffffULL);
3148         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3149         wr32(E1000_RDLEN(reg_idx),
3150                        ring->count * sizeof(union e1000_adv_rx_desc));
3151
3152         /* initialize head and tail */
3153         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3154         wr32(E1000_RDH(reg_idx), 0);
3155         writel(0, ring->tail);
3156
3157         /* set descriptor configuration */
3158         srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3159 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3160         srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3161 #else
3162         srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3163 #endif
3164         srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3165         if (hw->mac.type >= e1000_82580)
3166                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3167         /* Only set Drop Enable if we are supporting multiple queues */
3168         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3169                 srrctl |= E1000_SRRCTL_DROP_EN;
3170
3171         wr32(E1000_SRRCTL(reg_idx), srrctl);
3172
3173         /* set filtering for VMDQ pools */
3174         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3175
3176         rxdctl |= IGB_RX_PTHRESH;
3177         rxdctl |= IGB_RX_HTHRESH << 8;
3178         rxdctl |= IGB_RX_WTHRESH << 16;
3179
3180         /* enable receive descriptor fetching */
3181         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3182         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3183 }
3184
3185 /**
3186  * igb_configure_rx - Configure receive Unit after Reset
3187  * @adapter: board private structure
3188  *
3189  * Configure the Rx unit of the MAC after a reset.
3190  **/
3191 static void igb_configure_rx(struct igb_adapter *adapter)
3192 {
3193         int i;
3194
3195         /* set UTA to appropriate mode */
3196         igb_set_uta(adapter);
3197
3198         /* set the correct pool for the PF default MAC address in entry 0 */
3199         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3200                          adapter->vfs_allocated_count);
3201
3202         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3203          * the Base and Length of the Rx Descriptor Ring */
3204         for (i = 0; i < adapter->num_rx_queues; i++)
3205                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3206 }
3207
3208 /**
3209  * igb_free_tx_resources - Free Tx Resources per Queue
3210  * @tx_ring: Tx descriptor ring for a specific queue
3211  *
3212  * Free all transmit software resources
3213  **/
3214 void igb_free_tx_resources(struct igb_ring *tx_ring)
3215 {
3216         igb_clean_tx_ring(tx_ring);
3217
3218         vfree(tx_ring->tx_buffer_info);
3219         tx_ring->tx_buffer_info = NULL;
3220
3221         /* if not set, then don't free */
3222         if (!tx_ring->desc)
3223                 return;
3224
3225         dma_free_coherent(tx_ring->dev, tx_ring->size,
3226                           tx_ring->desc, tx_ring->dma);
3227
3228         tx_ring->desc = NULL;
3229 }
3230
3231 /**
3232  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3233  * @adapter: board private structure
3234  *
3235  * Free all transmit software resources
3236  **/
3237 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3238 {
3239         int i;
3240
3241         for (i = 0; i < adapter->num_tx_queues; i++)
3242                 igb_free_tx_resources(adapter->tx_ring[i]);
3243 }
3244
3245 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3246                                     struct igb_tx_buffer *tx_buffer)
3247 {
3248         if (tx_buffer->skb) {
3249                 dev_kfree_skb_any(tx_buffer->skb);
3250                 if (tx_buffer->dma)
3251                         dma_unmap_single(ring->dev,
3252                                          tx_buffer->dma,
3253                                          tx_buffer->length,
3254                                          DMA_TO_DEVICE);
3255         } else if (tx_buffer->dma) {
3256                 dma_unmap_page(ring->dev,
3257                                tx_buffer->dma,
3258                                tx_buffer->length,
3259                                DMA_TO_DEVICE);
3260         }
3261         tx_buffer->next_to_watch = NULL;
3262         tx_buffer->skb = NULL;
3263         tx_buffer->dma = 0;
3264         /* buffer_info must be completely set up in the transmit path */
3265 }
3266
3267 /**
3268  * igb_clean_tx_ring - Free Tx Buffers
3269  * @tx_ring: ring to be cleaned
3270  **/
3271 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3272 {
3273         struct igb_tx_buffer *buffer_info;
3274         unsigned long size;
3275         u16 i;
3276
3277         if (!tx_ring->tx_buffer_info)
3278                 return;
3279         /* Free all the Tx ring sk_buffs */
3280
3281         for (i = 0; i < tx_ring->count; i++) {
3282                 buffer_info = &tx_ring->tx_buffer_info[i];
3283                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3284         }
3285
3286         netdev_tx_reset_queue(txring_txq(tx_ring));
3287
3288         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3289         memset(tx_ring->tx_buffer_info, 0, size);
3290
3291         /* Zero out the descriptor ring */
3292         memset(tx_ring->desc, 0, tx_ring->size);
3293
3294         tx_ring->next_to_use = 0;
3295         tx_ring->next_to_clean = 0;
3296 }
3297
3298 /**
3299  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3300  * @adapter: board private structure
3301  **/
3302 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3303 {
3304         int i;
3305
3306         for (i = 0; i < adapter->num_tx_queues; i++)
3307                 igb_clean_tx_ring(adapter->tx_ring[i]);
3308 }
3309
3310 /**
3311  * igb_free_rx_resources - Free Rx Resources
3312  * @rx_ring: ring to clean the resources from
3313  *
3314  * Free all receive software resources
3315  **/
3316 void igb_free_rx_resources(struct igb_ring *rx_ring)
3317 {
3318         igb_clean_rx_ring(rx_ring);
3319
3320         vfree(rx_ring->rx_buffer_info);
3321         rx_ring->rx_buffer_info = NULL;
3322
3323         /* if not set, then don't free */
3324         if (!rx_ring->desc)
3325                 return;
3326
3327         dma_free_coherent(rx_ring->dev, rx_ring->size,
3328                           rx_ring->desc, rx_ring->dma);
3329
3330         rx_ring->desc = NULL;
3331 }
3332
3333 /**
3334  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3335  * @adapter: board private structure
3336  *
3337  * Free all receive software resources
3338  **/
3339 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3340 {
3341         int i;
3342
3343         for (i = 0; i < adapter->num_rx_queues; i++)
3344                 igb_free_rx_resources(adapter->rx_ring[i]);
3345 }
3346
3347 /**
3348  * igb_clean_rx_ring - Free Rx Buffers per Queue
3349  * @rx_ring: ring to free buffers from
3350  **/
3351 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3352 {
3353         unsigned long size;
3354         u16 i;
3355
3356         if (!rx_ring->rx_buffer_info)
3357                 return;
3358
3359         /* Free all the Rx ring sk_buffs */
3360         for (i = 0; i < rx_ring->count; i++) {
3361                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3362                 if (buffer_info->dma) {
3363                         dma_unmap_single(rx_ring->dev,
3364                                          buffer_info->dma,
3365                                          IGB_RX_HDR_LEN,
3366                                          DMA_FROM_DEVICE);
3367                         buffer_info->dma = 0;
3368                 }
3369
3370                 if (buffer_info->skb) {
3371                         dev_kfree_skb(buffer_info->skb);
3372                         buffer_info->skb = NULL;
3373                 }
3374                 if (buffer_info->page_dma) {
3375                         dma_unmap_page(rx_ring->dev,
3376                                        buffer_info->page_dma,
3377                                        PAGE_SIZE / 2,
3378                                        DMA_FROM_DEVICE);
3379                         buffer_info->page_dma = 0;
3380                 }
3381                 if (buffer_info->page) {
3382                         put_page(buffer_info->page);
3383                         buffer_info->page = NULL;
3384                         buffer_info->page_offset = 0;
3385                 }
3386         }
3387
3388         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3389         memset(rx_ring->rx_buffer_info, 0, size);
3390
3391         /* Zero out the descriptor ring */
3392         memset(rx_ring->desc, 0, rx_ring->size);
3393
3394         rx_ring->next_to_clean = 0;
3395         rx_ring->next_to_use = 0;
3396 }
3397
3398 /**
3399  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3400  * @adapter: board private structure
3401  **/
3402 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3403 {
3404         int i;
3405
3406         for (i = 0; i < adapter->num_rx_queues; i++)
3407                 igb_clean_rx_ring(adapter->rx_ring[i]);
3408 }
3409
3410 /**
3411  * igb_set_mac - Change the Ethernet Address of the NIC
3412  * @netdev: network interface device structure
3413  * @p: pointer to an address structure
3414  *
3415  * Returns 0 on success, negative on failure
3416  **/
3417 static int igb_set_mac(struct net_device *netdev, void *p)
3418 {
3419         struct igb_adapter *adapter = netdev_priv(netdev);
3420         struct e1000_hw *hw = &adapter->hw;
3421         struct sockaddr *addr = p;
3422
3423         if (!is_valid_ether_addr(addr->sa_data))
3424                 return -EADDRNOTAVAIL;
3425
3426         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3427         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3428
3429         /* set the correct pool for the new PF MAC address in entry 0 */
3430         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3431                          adapter->vfs_allocated_count);
3432
3433         return 0;
3434 }
3435
3436 /**
3437  * igb_write_mc_addr_list - write multicast addresses to MTA
3438  * @netdev: network interface device structure
3439  *
3440  * Writes multicast address list to the MTA hash table.
3441  * Returns: -ENOMEM on failure
3442  *                0 on no addresses written
3443  *                X on writing X addresses to MTA
3444  **/
3445 static int igb_write_mc_addr_list(struct net_device *netdev)
3446 {
3447         struct igb_adapter *adapter = netdev_priv(netdev);
3448         struct e1000_hw *hw = &adapter->hw;
3449         struct netdev_hw_addr *ha;
3450         u8  *mta_list;
3451         int i;
3452
3453         if (netdev_mc_empty(netdev)) {
3454                 /* nothing to program, so clear mc list */
3455                 igb_update_mc_addr_list(hw, NULL, 0);
3456                 igb_restore_vf_multicasts(adapter);
3457                 return 0;
3458         }
3459
3460         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3461         if (!mta_list)
3462                 return -ENOMEM;
3463
3464         /* The shared function expects a packed array of only addresses. */
3465         i = 0;
3466         netdev_for_each_mc_addr(ha, netdev)
3467                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3468
3469         igb_update_mc_addr_list(hw, mta_list, i);
3470         kfree(mta_list);
3471
3472         return netdev_mc_count(netdev);
3473 }
3474
3475 /**
3476  * igb_write_uc_addr_list - write unicast addresses to RAR table
3477  * @netdev: network interface device structure
3478  *
3479  * Writes unicast address list to the RAR table.
3480  * Returns: -ENOMEM on failure/insufficient address space
3481  *                0 on no addresses written
3482  *                X on writing X addresses to the RAR table
3483  **/
3484 static int igb_write_uc_addr_list(struct net_device *netdev)
3485 {
3486         struct igb_adapter *adapter = netdev_priv(netdev);
3487         struct e1000_hw *hw = &adapter->hw;
3488         unsigned int vfn = adapter->vfs_allocated_count;
3489         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3490         int count = 0;
3491
3492         /* return ENOMEM indicating insufficient memory for addresses */
3493         if (netdev_uc_count(netdev) > rar_entries)
3494                 return -ENOMEM;
3495
3496         if (!netdev_uc_empty(netdev) && rar_entries) {
3497                 struct netdev_hw_addr *ha;
3498
3499                 netdev_for_each_uc_addr(ha, netdev) {
3500                         if (!rar_entries)
3501                                 break;
3502                         igb_rar_set_qsel(adapter, ha->addr,
3503                                          rar_entries--,
3504                                          vfn);
3505                         count++;
3506                 }
3507         }
3508         /* write the addresses in reverse order to avoid write combining */
3509         for (; rar_entries > 0 ; rar_entries--) {
3510                 wr32(E1000_RAH(rar_entries), 0);
3511                 wr32(E1000_RAL(rar_entries), 0);
3512         }
3513         wrfl();
3514
3515         return count;
3516 }
3517
3518 /**
3519  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3520  * @netdev: network interface device structure
3521  *
3522  * The set_rx_mode entry point is called whenever the unicast or multicast
3523  * address lists or the network interface flags are updated.  This routine is
3524  * responsible for configuring the hardware for proper unicast, multicast,
3525  * promiscuous mode, and all-multi behavior.
3526  **/
3527 static void igb_set_rx_mode(struct net_device *netdev)
3528 {
3529         struct igb_adapter *adapter = netdev_priv(netdev);
3530         struct e1000_hw *hw = &adapter->hw;
3531         unsigned int vfn = adapter->vfs_allocated_count;
3532         u32 rctl, vmolr = 0;
3533         int count;
3534
3535         /* Check for Promiscuous and All Multicast modes */
3536         rctl = rd32(E1000_RCTL);
3537
3538         /* clear the effected bits */
3539         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3540
3541         if (netdev->flags & IFF_PROMISC) {
3542                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3543                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3544         } else {
3545                 if (netdev->flags & IFF_ALLMULTI) {
3546                         rctl |= E1000_RCTL_MPE;
3547                         vmolr |= E1000_VMOLR_MPME;
3548                 } else {
3549                         /*
3550                          * Write addresses to the MTA, if the attempt fails
3551                          * then we should just turn on promiscuous mode so
3552                          * that we can at least receive multicast traffic
3553                          */
3554                         count = igb_write_mc_addr_list(netdev);
3555                         if (count < 0) {
3556                                 rctl |= E1000_RCTL_MPE;
3557                                 vmolr |= E1000_VMOLR_MPME;
3558                         } else if (count) {
3559                                 vmolr |= E1000_VMOLR_ROMPE;
3560                         }
3561                 }
3562                 /*
3563                  * Write addresses to available RAR registers, if there is not
3564                  * sufficient space to store all the addresses then enable
3565                  * unicast promiscuous mode
3566                  */
3567                 count = igb_write_uc_addr_list(netdev);
3568                 if (count < 0) {
3569                         rctl |= E1000_RCTL_UPE;
3570                         vmolr |= E1000_VMOLR_ROPE;
3571                 }
3572                 rctl |= E1000_RCTL_VFE;
3573         }
3574         wr32(E1000_RCTL, rctl);
3575
3576         /*
3577          * In order to support SR-IOV and eventually VMDq it is necessary to set
3578          * the VMOLR to enable the appropriate modes.  Without this workaround
3579          * we will have issues with VLAN tag stripping not being done for frames
3580          * that are only arriving because we are the default pool
3581          */
3582         if (hw->mac.type < e1000_82576)
3583                 return;
3584
3585         vmolr |= rd32(E1000_VMOLR(vfn)) &
3586                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3587         wr32(E1000_VMOLR(vfn), vmolr);
3588         igb_restore_vf_multicasts(adapter);
3589 }
3590
3591 static void igb_check_wvbr(struct igb_adapter *adapter)
3592 {
3593         struct e1000_hw *hw = &adapter->hw;
3594         u32 wvbr = 0;
3595
3596         switch (hw->mac.type) {
3597         case e1000_82576:
3598         case e1000_i350:
3599                 if (!(wvbr = rd32(E1000_WVBR)))
3600                         return;
3601                 break;
3602         default:
3603                 break;
3604         }
3605
3606         adapter->wvbr |= wvbr;
3607 }
3608
3609 #define IGB_STAGGERED_QUEUE_OFFSET 8
3610
3611 static void igb_spoof_check(struct igb_adapter *adapter)
3612 {
3613         int j;
3614
3615         if (!adapter->wvbr)
3616                 return;
3617
3618         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3619                 if (adapter->wvbr & (1 << j) ||
3620                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3621                         dev_warn(&adapter->pdev->dev,
3622                                 "Spoof event(s) detected on VF %d\n", j);
3623                         adapter->wvbr &=
3624                                 ~((1 << j) |
3625                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3626                 }
3627         }
3628 }
3629
3630 /* Need to wait a few seconds after link up to get diagnostic information from
3631  * the phy */
3632 static void igb_update_phy_info(unsigned long data)
3633 {
3634         struct igb_adapter *adapter = (struct igb_adapter *) data;
3635         igb_get_phy_info(&adapter->hw);
3636 }
3637
3638 /**
3639  * igb_has_link - check shared code for link and determine up/down
3640  * @adapter: pointer to driver private info
3641  **/
3642 bool igb_has_link(struct igb_adapter *adapter)
3643 {
3644         struct e1000_hw *hw = &adapter->hw;
3645         bool link_active = false;
3646         s32 ret_val = 0;
3647
3648         /* get_link_status is set on LSC (link status) interrupt or
3649          * rx sequence error interrupt.  get_link_status will stay
3650          * false until the e1000_check_for_link establishes link
3651          * for copper adapters ONLY
3652          */
3653         switch (hw->phy.media_type) {
3654         case e1000_media_type_copper:
3655                 if (hw->mac.get_link_status) {
3656                         ret_val = hw->mac.ops.check_for_link(hw);
3657                         link_active = !hw->mac.get_link_status;
3658                 } else {
3659                         link_active = true;
3660                 }
3661                 break;
3662         case e1000_media_type_internal_serdes:
3663                 ret_val = hw->mac.ops.check_for_link(hw);
3664                 link_active = hw->mac.serdes_has_link;
3665                 break;
3666         default:
3667         case e1000_media_type_unknown:
3668                 break;
3669         }
3670
3671         return link_active;
3672 }
3673
3674 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3675 {
3676         bool ret = false;
3677         u32 ctrl_ext, thstat;
3678
3679         /* check for thermal sensor event on i350, copper only */
3680         if (hw->mac.type == e1000_i350) {
3681                 thstat = rd32(E1000_THSTAT);
3682                 ctrl_ext = rd32(E1000_CTRL_EXT);
3683
3684                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3685                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3686                         ret = !!(thstat & event);
3687                 }
3688         }
3689
3690         return ret;
3691 }
3692
3693 /**
3694  * igb_watchdog - Timer Call-back
3695  * @data: pointer to adapter cast into an unsigned long
3696  **/
3697 static void igb_watchdog(unsigned long data)
3698 {
3699         struct igb_adapter *adapter = (struct igb_adapter *)data;
3700         /* Do the rest outside of interrupt context */
3701         schedule_work(&adapter->watchdog_task);
3702 }
3703
3704 static void igb_watchdog_task(struct work_struct *work)
3705 {
3706         struct igb_adapter *adapter = container_of(work,
3707                                                    struct igb_adapter,
3708                                                    watchdog_task);
3709         struct e1000_hw *hw = &adapter->hw;
3710         struct net_device *netdev = adapter->netdev;
3711         u32 link;
3712         int i;
3713
3714         link = igb_has_link(adapter);
3715         if (link) {
3716                 /* Cancel scheduled suspend requests. */
3717                 pm_runtime_resume(netdev->dev.parent);
3718
3719                 if (!netif_carrier_ok(netdev)) {
3720                         u32 ctrl;
3721                         hw->mac.ops.get_speed_and_duplex(hw,
3722                                                          &adapter->link_speed,
3723                                                          &adapter->link_duplex);
3724
3725                         ctrl = rd32(E1000_CTRL);
3726                         /* Links status message must follow this format */
3727                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3728                                "Duplex, Flow Control: %s\n",
3729                                netdev->name,
3730                                adapter->link_speed,
3731                                adapter->link_duplex == FULL_DUPLEX ?
3732                                "Full" : "Half",
3733                                (ctrl & E1000_CTRL_TFCE) &&
3734                                (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3735                                (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3736                                (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3737
3738                         /* check for thermal sensor event */
3739                         if (igb_thermal_sensor_event(hw,
3740                             E1000_THSTAT_LINK_THROTTLE)) {
3741                                 netdev_info(netdev, "The network adapter link "
3742                                             "speed was downshifted because it "
3743                                             "overheated\n");
3744                         }
3745
3746                         /* adjust timeout factor according to speed/duplex */
3747                         adapter->tx_timeout_factor = 1;
3748                         switch (adapter->link_speed) {
3749                         case SPEED_10:
3750                                 adapter->tx_timeout_factor = 14;
3751                                 break;
3752                         case SPEED_100:
3753                                 /* maybe add some timeout factor ? */
3754                                 break;
3755                         }
3756
3757                         netif_carrier_on(netdev);
3758
3759                         igb_ping_all_vfs(adapter);
3760                         igb_check_vf_rate_limit(adapter);
3761
3762                         /* link state has changed, schedule phy info update */
3763                         if (!test_bit(__IGB_DOWN, &adapter->state))
3764                                 mod_timer(&adapter->phy_info_timer,
3765                                           round_jiffies(jiffies + 2 * HZ));
3766                 }
3767         } else {
3768                 if (netif_carrier_ok(netdev)) {
3769                         adapter->link_speed = 0;
3770                         adapter->link_duplex = 0;
3771
3772                         /* check for thermal sensor event */
3773                         if (igb_thermal_sensor_event(hw,
3774                             E1000_THSTAT_PWR_DOWN)) {
3775                                 netdev_err(netdev, "The network adapter was "
3776                                            "stopped because it overheated\n");
3777                         }
3778
3779                         /* Links status message must follow this format */
3780                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3781                                netdev->name);
3782                         netif_carrier_off(netdev);
3783
3784                         igb_ping_all_vfs(adapter);
3785
3786                         /* link state has changed, schedule phy info update */
3787                         if (!test_bit(__IGB_DOWN, &adapter->state))
3788                                 mod_timer(&adapter->phy_info_timer,
3789                                           round_jiffies(jiffies + 2 * HZ));
3790
3791                         pm_schedule_suspend(netdev->dev.parent,
3792                                             MSEC_PER_SEC * 5);
3793                 }
3794         }
3795
3796         spin_lock(&adapter->stats64_lock);
3797         igb_update_stats(adapter, &adapter->stats64);
3798         spin_unlock(&adapter->stats64_lock);
3799
3800         for (i = 0; i < adapter->num_tx_queues; i++) {
3801                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3802                 if (!netif_carrier_ok(netdev)) {
3803                         /* We've lost link, so the controller stops DMA,
3804                          * but we've got queued Tx work that's never going
3805                          * to get done, so reset controller to flush Tx.
3806                          * (Do the reset outside of interrupt context). */
3807                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3808                                 adapter->tx_timeout_count++;
3809                                 schedule_work(&adapter->reset_task);
3810                                 /* return immediately since reset is imminent */
3811                                 return;
3812                         }
3813                 }
3814
3815                 /* Force detection of hung controller every watchdog period */
3816                 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3817         }
3818
3819         /* Cause software interrupt to ensure rx ring is cleaned */
3820         if (adapter->msix_entries) {
3821                 u32 eics = 0;
3822                 for (i = 0; i < adapter->num_q_vectors; i++)
3823                         eics |= adapter->q_vector[i]->eims_value;
3824                 wr32(E1000_EICS, eics);
3825         } else {
3826                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3827         }
3828
3829         igb_spoof_check(adapter);
3830
3831         /* Reset the timer */
3832         if (!test_bit(__IGB_DOWN, &adapter->state))
3833                 mod_timer(&adapter->watchdog_timer,
3834                           round_jiffies(jiffies + 2 * HZ));
3835 }
3836
3837 enum latency_range {
3838         lowest_latency = 0,
3839         low_latency = 1,
3840         bulk_latency = 2,
3841         latency_invalid = 255
3842 };
3843
3844 /**
3845  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3846  *
3847  *      Stores a new ITR value based on strictly on packet size.  This
3848  *      algorithm is less sophisticated than that used in igb_update_itr,
3849  *      due to the difficulty of synchronizing statistics across multiple
3850  *      receive rings.  The divisors and thresholds used by this function
3851  *      were determined based on theoretical maximum wire speed and testing
3852  *      data, in order to minimize response time while increasing bulk
3853  *      throughput.
3854  *      This functionality is controlled by the InterruptThrottleRate module
3855  *      parameter (see igb_param.c)
3856  *      NOTE:  This function is called only when operating in a multiqueue
3857  *             receive environment.
3858  * @q_vector: pointer to q_vector
3859  **/
3860 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3861 {
3862         int new_val = q_vector->itr_val;
3863         int avg_wire_size = 0;
3864         struct igb_adapter *adapter = q_vector->adapter;
3865         unsigned int packets;
3866
3867         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3868          * ints/sec - ITR timer value of 120 ticks.
3869          */
3870         if (adapter->link_speed != SPEED_1000) {
3871                 new_val = IGB_4K_ITR;
3872                 goto set_itr_val;
3873         }
3874
3875         packets = q_vector->rx.total_packets;
3876         if (packets)
3877                 avg_wire_size = q_vector->rx.total_bytes / packets;
3878
3879         packets = q_vector->tx.total_packets;
3880         if (packets)
3881                 avg_wire_size = max_t(u32, avg_wire_size,
3882                                       q_vector->tx.total_bytes / packets);
3883
3884         /* if avg_wire_size isn't set no work was done */
3885         if (!avg_wire_size)
3886                 goto clear_counts;
3887
3888         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3889         avg_wire_size += 24;
3890
3891         /* Don't starve jumbo frames */
3892         avg_wire_size = min(avg_wire_size, 3000);
3893
3894         /* Give a little boost to mid-size frames */
3895         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3896                 new_val = avg_wire_size / 3;
3897         else
3898                 new_val = avg_wire_size / 2;
3899
3900         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3901         if (new_val < IGB_20K_ITR &&
3902             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3903              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3904                 new_val = IGB_20K_ITR;
3905
3906 set_itr_val:
3907         if (new_val != q_vector->itr_val) {
3908                 q_vector->itr_val = new_val;
3909                 q_vector->set_itr = 1;
3910         }
3911 clear_counts:
3912         q_vector->rx.total_bytes = 0;
3913         q_vector->rx.total_packets = 0;
3914         q_vector->tx.total_bytes = 0;
3915         q_vector->tx.total_packets = 0;
3916 }
3917
3918 /**
3919  * igb_update_itr - update the dynamic ITR value based on statistics
3920  *      Stores a new ITR value based on packets and byte
3921  *      counts during the last interrupt.  The advantage of per interrupt
3922  *      computation is faster updates and more accurate ITR for the current
3923  *      traffic pattern.  Constants in this function were computed
3924  *      based on theoretical maximum wire speed and thresholds were set based
3925  *      on testing data as well as attempting to minimize response time
3926  *      while increasing bulk throughput.
3927  *      this functionality is controlled by the InterruptThrottleRate module
3928  *      parameter (see igb_param.c)
3929  *      NOTE:  These calculations are only valid when operating in a single-
3930  *             queue environment.
3931  * @q_vector: pointer to q_vector
3932  * @ring_container: ring info to update the itr for
3933  **/
3934 static void igb_update_itr(struct igb_q_vector *q_vector,
3935                            struct igb_ring_container *ring_container)
3936 {
3937         unsigned int packets = ring_container->total_packets;
3938         unsigned int bytes = ring_container->total_bytes;
3939         u8 itrval = ring_container->itr;
3940
3941         /* no packets, exit with status unchanged */
3942         if (packets == 0)
3943                 return;
3944
3945         switch (itrval) {
3946         case lowest_latency:
3947                 /* handle TSO and jumbo frames */
3948                 if (bytes/packets > 8000)
3949                         itrval = bulk_latency;
3950                 else if ((packets < 5) && (bytes > 512))
3951                         itrval = low_latency;
3952                 break;
3953         case low_latency:  /* 50 usec aka 20000 ints/s */
3954                 if (bytes > 10000) {
3955                         /* this if handles the TSO accounting */
3956                         if (bytes/packets > 8000) {
3957                                 itrval = bulk_latency;
3958                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3959                                 itrval = bulk_latency;
3960                         } else if ((packets > 35)) {
3961                                 itrval = lowest_latency;
3962                         }
3963                 } else if (bytes/packets > 2000) {
3964                         itrval = bulk_latency;
3965                 } else if (packets <= 2 && bytes < 512) {
3966                         itrval = lowest_latency;
3967                 }
3968                 break;
3969         case bulk_latency: /* 250 usec aka 4000 ints/s */
3970                 if (bytes > 25000) {
3971                         if (packets > 35)
3972                                 itrval = low_latency;
3973                 } else if (bytes < 1500) {
3974                         itrval = low_latency;
3975                 }
3976                 break;
3977         }
3978
3979         /* clear work counters since we have the values we need */
3980         ring_container->total_bytes = 0;
3981         ring_container->total_packets = 0;
3982
3983         /* write updated itr to ring container */
3984         ring_container->itr = itrval;
3985 }
3986
3987 static void igb_set_itr(struct igb_q_vector *q_vector)
3988 {
3989         struct igb_adapter *adapter = q_vector->adapter;
3990         u32 new_itr = q_vector->itr_val;
3991         u8 current_itr = 0;
3992
3993         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3994         if (adapter->link_speed != SPEED_1000) {
3995                 current_itr = 0;
3996                 new_itr = IGB_4K_ITR;
3997                 goto set_itr_now;
3998         }
3999
4000         igb_update_itr(q_vector, &q_vector->tx);
4001         igb_update_itr(q_vector, &q_vector->rx);
4002
4003         current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4004
4005         /* conservative mode (itr 3) eliminates the lowest_latency setting */
4006         if (current_itr == lowest_latency &&
4007             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4008              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4009                 current_itr = low_latency;
4010
4011         switch (current_itr) {
4012         /* counts and packets in update_itr are dependent on these numbers */
4013         case lowest_latency:
4014                 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4015                 break;
4016         case low_latency:
4017                 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4018                 break;
4019         case bulk_latency:
4020                 new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4021                 break;
4022         default:
4023                 break;
4024         }
4025
4026 set_itr_now:
4027         if (new_itr != q_vector->itr_val) {
4028                 /* this attempts to bias the interrupt rate towards Bulk
4029                  * by adding intermediate steps when interrupt rate is
4030                  * increasing */
4031                 new_itr = new_itr > q_vector->itr_val ?
4032                              max((new_itr * q_vector->itr_val) /
4033                                  (new_itr + (q_vector->itr_val >> 2)),
4034                                  new_itr) :
4035                              new_itr;
4036                 /* Don't write the value here; it resets the adapter's
4037                  * internal timer, and causes us to delay far longer than
4038                  * we should between interrupts.  Instead, we write the ITR
4039                  * value at the beginning of the next interrupt so the timing
4040                  * ends up being correct.
4041                  */
4042                 q_vector->itr_val = new_itr;
4043                 q_vector->set_itr = 1;
4044         }
4045 }
4046
4047 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4048                             u32 type_tucmd, u32 mss_l4len_idx)
4049 {
4050         struct e1000_adv_tx_context_desc *context_desc;
4051         u16 i = tx_ring->next_to_use;
4052
4053         context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4054
4055         i++;
4056         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4057
4058         /* set bits to identify this as an advanced context descriptor */
4059         type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4060
4061         /* For 82575, context index must be unique per ring. */
4062         if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4063                 mss_l4len_idx |= tx_ring->reg_idx << 4;
4064
4065         context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
4066         context_desc->seqnum_seed       = 0;
4067         context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
4068         context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
4069 }
4070
4071 static int igb_tso(struct igb_ring *tx_ring,
4072                    struct igb_tx_buffer *first,
4073                    u8 *hdr_len)
4074 {
4075         struct sk_buff *skb = first->skb;
4076         u32 vlan_macip_lens, type_tucmd;
4077         u32 mss_l4len_idx, l4len;
4078
4079         if (!skb_is_gso(skb))
4080                 return 0;
4081
4082         if (skb_header_cloned(skb)) {
4083                 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4084                 if (err)
4085                         return err;
4086         }
4087
4088         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4089         type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4090
4091         if (first->protocol == __constant_htons(ETH_P_IP)) {
4092                 struct iphdr *iph = ip_hdr(skb);
4093                 iph->tot_len = 0;
4094                 iph->check = 0;
4095                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4096                                                          iph->daddr, 0,
4097                                                          IPPROTO_TCP,
4098                                                          0);
4099                 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4100                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4101                                    IGB_TX_FLAGS_CSUM |
4102                                    IGB_TX_FLAGS_IPV4;
4103         } else if (skb_is_gso_v6(skb)) {
4104                 ipv6_hdr(skb)->payload_len = 0;
4105                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4106                                                        &ipv6_hdr(skb)->daddr,
4107                                                        0, IPPROTO_TCP, 0);
4108                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4109                                    IGB_TX_FLAGS_CSUM;
4110         }
4111
4112         /* compute header lengths */
4113         l4len = tcp_hdrlen(skb);
4114         *hdr_len = skb_transport_offset(skb) + l4len;
4115
4116         /* update gso size and bytecount with header size */
4117         first->gso_segs = skb_shinfo(skb)->gso_segs;
4118         first->bytecount += (first->gso_segs - 1) * *hdr_len;
4119
4120         /* MSS L4LEN IDX */
4121         mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4122         mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4123
4124         /* VLAN MACLEN IPLEN */
4125         vlan_macip_lens = skb_network_header_len(skb);
4126         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4127         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4128
4129         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4130
4131         return 1;
4132 }
4133
4134 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4135 {
4136         struct sk_buff *skb = first->skb;
4137         u32 vlan_macip_lens = 0;
4138         u32 mss_l4len_idx = 0;
4139         u32 type_tucmd = 0;
4140
4141         if (skb->ip_summed != CHECKSUM_PARTIAL) {
4142                 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4143                         return;
4144         } else {
4145                 u8 l4_hdr = 0;
4146                 switch (first->protocol) {
4147                 case __constant_htons(ETH_P_IP):
4148                         vlan_macip_lens |= skb_network_header_len(skb);
4149                         type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4150                         l4_hdr = ip_hdr(skb)->protocol;
4151                         break;
4152                 case __constant_htons(ETH_P_IPV6):
4153                         vlan_macip_lens |= skb_network_header_len(skb);
4154                         l4_hdr = ipv6_hdr(skb)->nexthdr;
4155                         break;
4156                 default:
4157                         if (unlikely(net_ratelimit())) {
4158                                 dev_warn(tx_ring->dev,
4159                                  "partial checksum but proto=%x!\n",
4160                                  first->protocol);
4161                         }
4162                         break;
4163                 }
4164
4165                 switch (l4_hdr) {
4166                 case IPPROTO_TCP:
4167                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4168                         mss_l4len_idx = tcp_hdrlen(skb) <<
4169                                         E1000_ADVTXD_L4LEN_SHIFT;
4170                         break;
4171                 case IPPROTO_SCTP:
4172                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4173                         mss_l4len_idx = sizeof(struct sctphdr) <<
4174                                         E1000_ADVTXD_L4LEN_SHIFT;
4175                         break;
4176                 case IPPROTO_UDP:
4177                         mss_l4len_idx = sizeof(struct udphdr) <<
4178                                         E1000_ADVTXD_L4LEN_SHIFT;
4179                         break;
4180                 default:
4181                         if (unlikely(net_ratelimit())) {
4182                                 dev_warn(tx_ring->dev,
4183                                  "partial checksum but l4 proto=%x!\n",
4184                                  l4_hdr);
4185                         }
4186                         break;
4187                 }
4188
4189                 /* update TX checksum flag */
4190                 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4191         }
4192
4193         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4194         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4195
4196         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4197 }
4198
4199 static __le32 igb_tx_cmd_type(u32 tx_flags)
4200 {
4201         /* set type for advanced descriptor with frame checksum insertion */
4202         __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4203                                       E1000_ADVTXD_DCMD_IFCS |
4204                                       E1000_ADVTXD_DCMD_DEXT);
4205
4206         /* set HW vlan bit if vlan is present */
4207         if (tx_flags & IGB_TX_FLAGS_VLAN)
4208                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4209
4210         /* set timestamp bit if present */
4211         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4212                 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4213
4214         /* set segmentation bits for TSO */
4215         if (tx_flags & IGB_TX_FLAGS_TSO)
4216                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4217
4218         return cmd_type;
4219 }
4220
4221 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4222                                  union e1000_adv_tx_desc *tx_desc,
4223                                  u32 tx_flags, unsigned int paylen)
4224 {
4225         u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4226
4227         /* 82575 requires a unique index per ring if any offload is enabled */
4228         if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4229             test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4230                 olinfo_status |= tx_ring->reg_idx << 4;
4231
4232         /* insert L4 checksum */
4233         if (tx_flags & IGB_TX_FLAGS_CSUM) {
4234                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4235
4236                 /* insert IPv4 checksum */
4237                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4238                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4239         }
4240
4241         tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4242 }
4243
4244 /*
4245  * The largest size we can write to the descriptor is 65535.  In order to
4246  * maintain a power of two alignment we have to limit ourselves to 32K.
4247  */
4248 #define IGB_MAX_TXD_PWR 15
4249 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4250
4251 static void igb_tx_map(struct igb_ring *tx_ring,
4252                        struct igb_tx_buffer *first,
4253                        const u8 hdr_len)
4254 {
4255         struct sk_buff *skb = first->skb;
4256         struct igb_tx_buffer *tx_buffer_info;
4257         union e1000_adv_tx_desc *tx_desc;
4258         dma_addr_t dma;
4259         struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4260         unsigned int data_len = skb->data_len;
4261         unsigned int size = skb_headlen(skb);
4262         unsigned int paylen = skb->len - hdr_len;
4263         __le32 cmd_type;
4264         u32 tx_flags = first->tx_flags;
4265         u16 i = tx_ring->next_to_use;
4266
4267         tx_desc = IGB_TX_DESC(tx_ring, i);
4268
4269         igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4270         cmd_type = igb_tx_cmd_type(tx_flags);
4271
4272         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4273         if (dma_mapping_error(tx_ring->dev, dma))
4274                 goto dma_error;
4275
4276         /* record length, and DMA address */
4277         first->length = size;
4278         first->dma = dma;
4279         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4280
4281         for (;;) {
4282                 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4283                         tx_desc->read.cmd_type_len =
4284                                 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4285
4286                         i++;
4287                         tx_desc++;
4288                         if (i == tx_ring->count) {
4289                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
4290                                 i = 0;
4291                         }
4292
4293                         dma += IGB_MAX_DATA_PER_TXD;
4294                         size -= IGB_MAX_DATA_PER_TXD;
4295
4296                         tx_desc->read.olinfo_status = 0;
4297                         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4298                 }
4299
4300                 if (likely(!data_len))
4301                         break;
4302
4303                 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4304
4305                 i++;
4306                 tx_desc++;
4307                 if (i == tx_ring->count) {
4308                         tx_desc = IGB_TX_DESC(tx_ring, 0);
4309                         i = 0;
4310                 }
4311
4312                 size = skb_frag_size(frag);
4313                 data_len -= size;
4314
4315                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4316                                    size, DMA_TO_DEVICE);
4317                 if (dma_mapping_error(tx_ring->dev, dma))
4318                         goto dma_error;
4319
4320                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4321                 tx_buffer_info->length = size;
4322                 tx_buffer_info->dma = dma;
4323
4324                 tx_desc->read.olinfo_status = 0;
4325                 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4326
4327                 frag++;
4328         }
4329
4330         netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4331
4332         /* write last descriptor with RS and EOP bits */
4333         cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4334         if (unlikely(skb->no_fcs))
4335                 cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4336         tx_desc->read.cmd_type_len = cmd_type;
4337
4338         /* set the timestamp */
4339         first->time_stamp = jiffies;
4340
4341         /*
4342          * Force memory writes to complete before letting h/w know there
4343          * are new descriptors to fetch.  (Only applicable for weak-ordered
4344          * memory model archs, such as IA-64).
4345          *
4346          * We also need this memory barrier to make certain all of the
4347          * status bits have been updated before next_to_watch is written.
4348          */
4349         wmb();
4350
4351         /* set next_to_watch value indicating a packet is present */
4352         first->next_to_watch = tx_desc;
4353
4354         i++;
4355         if (i == tx_ring->count)
4356                 i = 0;
4357
4358         tx_ring->next_to_use = i;
4359
4360         writel(i, tx_ring->tail);
4361
4362         /* we need this if more than one processor can write to our tail
4363          * at a time, it syncronizes IO on IA64/Altix systems */
4364         mmiowb();
4365
4366         return;
4367
4368 dma_error:
4369         dev_err(tx_ring->dev, "TX DMA map failed\n");
4370
4371         /* clear dma mappings for failed tx_buffer_info map */
4372         for (;;) {
4373                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4374                 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4375                 if (tx_buffer_info == first)
4376                         break;
4377                 if (i == 0)
4378                         i = tx_ring->count;
4379                 i--;
4380         }
4381
4382         tx_ring->next_to_use = i;
4383 }
4384
4385 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4386 {
4387         struct net_device *netdev = tx_ring->netdev;
4388
4389         netif_stop_subqueue(netdev, tx_ring->queue_index);
4390
4391         /* Herbert's original patch had:
4392          *  smp_mb__after_netif_stop_queue();
4393          * but since that doesn't exist yet, just open code it. */
4394         smp_mb();
4395
4396         /* We need to check again in a case another CPU has just
4397          * made room available. */
4398         if (igb_desc_unused(tx_ring) < size)
4399                 return -EBUSY;
4400
4401         /* A reprieve! */
4402         netif_wake_subqueue(netdev, tx_ring->queue_index);
4403
4404         u64_stats_update_begin(&tx_ring->tx_syncp2);
4405         tx_ring->tx_stats.restart_queue2++;
4406         u64_stats_update_end(&tx_ring->tx_syncp2);
4407
4408         return 0;
4409 }
4410
4411 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4412 {
4413         if (igb_desc_unused(tx_ring) >= size)
4414                 return 0;
4415         return __igb_maybe_stop_tx(tx_ring, size);
4416 }
4417
4418 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4419                                 struct igb_ring *tx_ring)
4420 {
4421         struct igb_tx_buffer *first;
4422         int tso;
4423         u32 tx_flags = 0;
4424         __be16 protocol = vlan_get_protocol(skb);
4425         u8 hdr_len = 0;
4426
4427         /* need: 1 descriptor per page,
4428          *       + 2 desc gap to keep tail from touching head,
4429          *       + 1 desc for skb->data,
4430          *       + 1 desc for context descriptor,
4431          * otherwise try next time */
4432         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4433                 /* this is a hard error */
4434                 return NETDEV_TX_BUSY;
4435         }
4436
4437         /* record the location of the first descriptor for this packet */
4438         first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4439         first->skb = skb;
4440         first->bytecount = skb->len;
4441         first->gso_segs = 1;
4442
4443         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4444                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4445                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4446         }
4447
4448         if (vlan_tx_tag_present(skb)) {
4449                 tx_flags |= IGB_TX_FLAGS_VLAN;
4450                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4451         }
4452
4453         /* record initial flags and protocol */
4454         first->tx_flags = tx_flags;
4455         first->protocol = protocol;
4456
4457         tso = igb_tso(tx_ring, first, &hdr_len);
4458         if (tso < 0)
4459                 goto out_drop;
4460         else if (!tso)
4461                 igb_tx_csum(tx_ring, first);
4462
4463         igb_tx_map(tx_ring, first, hdr_len);
4464
4465         /* Make sure there is space in the ring for the next send. */
4466         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4467
4468         return NETDEV_TX_OK;
4469
4470 out_drop:
4471         igb_unmap_and_free_tx_resource(tx_ring, first);
4472
4473         return NETDEV_TX_OK;
4474 }
4475
4476 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4477                                                     struct sk_buff *skb)
4478 {
4479         unsigned int r_idx = skb->queue_mapping;
4480
4481         if (r_idx >= adapter->num_tx_queues)
4482                 r_idx = r_idx % adapter->num_tx_queues;
4483
4484         return adapter->tx_ring[r_idx];
4485 }
4486
4487 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4488                                   struct net_device *netdev)
4489 {
4490         struct igb_adapter *adapter = netdev_priv(netdev);
4491
4492         if (test_bit(__IGB_DOWN, &adapter->state)) {
4493                 dev_kfree_skb_any(skb);
4494                 return NETDEV_TX_OK;
4495         }
4496
4497         if (skb->len <= 0) {
4498                 dev_kfree_skb_any(skb);
4499                 return NETDEV_TX_OK;
4500         }
4501
4502         /*
4503          * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4504          * in order to meet this minimum size requirement.
4505          */
4506         if (skb->len < 17) {
4507                 if (skb_padto(skb, 17))
4508                         return NETDEV_TX_OK;
4509                 skb->len = 17;
4510         }
4511
4512         return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4513 }
4514
4515 /**
4516  * igb_tx_timeout - Respond to a Tx Hang
4517  * @netdev: network interface device structure
4518  **/
4519 static void igb_tx_timeout(struct net_device *netdev)
4520 {
4521         struct igb_adapter *adapter = netdev_priv(netdev);
4522         struct e1000_hw *hw = &adapter->hw;
4523
4524         /* Do the reset outside of interrupt context */
4525         adapter->tx_timeout_count++;
4526
4527         if (hw->mac.type >= e1000_82580)
4528                 hw->dev_spec._82575.global_device_reset = true;
4529
4530         schedule_work(&adapter->reset_task);
4531         wr32(E1000_EICS,
4532              (adapter->eims_enable_mask & ~adapter->eims_other));
4533 }
4534
4535 static void igb_reset_task(struct work_struct *work)
4536 {
4537         struct igb_adapter *adapter;
4538         adapter = container_of(work, struct igb_adapter, reset_task);
4539
4540         igb_dump(adapter);
4541         netdev_err(adapter->netdev, "Reset adapter\n");
4542         igb_reinit_locked(adapter);
4543 }
4544
4545 /**
4546  * igb_get_stats64 - Get System Network Statistics
4547  * @netdev: network interface device structure
4548  * @stats: rtnl_link_stats64 pointer
4549  *
4550  **/
4551 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4552                                                  struct rtnl_link_stats64 *stats)
4553 {
4554         struct igb_adapter *adapter = netdev_priv(netdev);
4555
4556         spin_lock(&adapter->stats64_lock);
4557         igb_update_stats(adapter, &adapter->stats64);
4558         memcpy(stats, &adapter->stats64, sizeof(*stats));
4559         spin_unlock(&adapter->stats64_lock);
4560
4561         return stats;
4562 }
4563
4564 /**
4565  * igb_change_mtu - Change the Maximum Transfer Unit
4566  * @netdev: network interface device structure
4567  * @new_mtu: new value for maximum frame size
4568  *
4569  * Returns 0 on success, negative on failure
4570  **/
4571 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4572 {
4573         struct igb_adapter *adapter = netdev_priv(netdev);
4574         struct pci_dev *pdev = adapter->pdev;
4575         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4576
4577         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4578                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4579                 return -EINVAL;
4580         }
4581
4582 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4583         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4584                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4585                 return -EINVAL;
4586         }
4587
4588         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4589                 msleep(1);
4590
4591         /* igb_down has a dependency on max_frame_size */
4592         adapter->max_frame_size = max_frame;
4593
4594         if (netif_running(netdev))
4595                 igb_down(adapter);
4596
4597         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4598                  netdev->mtu, new_mtu);
4599         netdev->mtu = new_mtu;
4600
4601         if (netif_running(netdev))
4602                 igb_up(adapter);
4603         else
4604                 igb_reset(adapter);
4605
4606         clear_bit(__IGB_RESETTING, &adapter->state);
4607
4608         return 0;
4609 }
4610
4611 /**
4612  * igb_update_stats - Update the board statistics counters
4613  * @adapter: board private structure
4614  **/
4615
4616 void igb_update_stats(struct igb_adapter *adapter,
4617                       struct rtnl_link_stats64 *net_stats)
4618 {
4619         struct e1000_hw *hw = &adapter->hw;
4620         struct pci_dev *pdev = adapter->pdev;
4621         u32 reg, mpc;
4622         u16 phy_tmp;
4623         int i;
4624         u64 bytes, packets;
4625         unsigned int start;
4626         u64 _bytes, _packets;
4627
4628 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4629
4630         /*
4631          * Prevent stats update while adapter is being reset, or if the pci
4632          * connection is down.
4633          */
4634         if (adapter->link_speed == 0)
4635                 return;
4636         if (pci_channel_offline(pdev))
4637                 return;
4638
4639         bytes = 0;
4640         packets = 0;
4641         for (i = 0; i < adapter->num_rx_queues; i++) {
4642                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4643                 struct igb_ring *ring = adapter->rx_ring[i];
4644
4645                 ring->rx_stats.drops += rqdpc_tmp;
4646                 net_stats->rx_fifo_errors += rqdpc_tmp;
4647
4648                 do {
4649                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4650                         _bytes = ring->rx_stats.bytes;
4651                         _packets = ring->rx_stats.packets;
4652                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4653                 bytes += _bytes;
4654                 packets += _packets;
4655         }
4656
4657         net_stats->rx_bytes = bytes;
4658         net_stats->rx_packets = packets;
4659
4660         bytes = 0;
4661         packets = 0;
4662         for (i = 0; i < adapter->num_tx_queues; i++) {
4663                 struct igb_ring *ring = adapter->tx_ring[i];
4664                 do {
4665                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4666                         _bytes = ring->tx_stats.bytes;
4667                         _packets = ring->tx_stats.packets;
4668                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4669                 bytes += _bytes;
4670                 packets += _packets;
4671         }
4672         net_stats->tx_bytes = bytes;
4673         net_stats->tx_packets = packets;
4674
4675         /* read stats registers */
4676         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4677         adapter->stats.gprc += rd32(E1000_GPRC);
4678         adapter->stats.gorc += rd32(E1000_GORCL);
4679         rd32(E1000_GORCH); /* clear GORCL */
4680         adapter->stats.bprc += rd32(E1000_BPRC);
4681         adapter->stats.mprc += rd32(E1000_MPRC);
4682         adapter->stats.roc += rd32(E1000_ROC);
4683
4684         adapter->stats.prc64 += rd32(E1000_PRC64);
4685         adapter->stats.prc127 += rd32(E1000_PRC127);
4686         adapter->stats.prc255 += rd32(E1000_PRC255);
4687         adapter->stats.prc511 += rd32(E1000_PRC511);
4688         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4689         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4690         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4691         adapter->stats.sec += rd32(E1000_SEC);
4692
4693         mpc = rd32(E1000_MPC);
4694         adapter->stats.mpc += mpc;
4695         net_stats->rx_fifo_errors += mpc;
4696         adapter->stats.scc += rd32(E1000_SCC);
4697         adapter->stats.ecol += rd32(E1000_ECOL);
4698         adapter->stats.mcc += rd32(E1000_MCC);
4699         adapter->stats.latecol += rd32(E1000_LATECOL);
4700         adapter->stats.dc += rd32(E1000_DC);
4701         adapter->stats.rlec += rd32(E1000_RLEC);
4702         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4703         adapter->stats.xontxc += rd32(E1000_XONTXC);
4704         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4705         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4706         adapter->stats.fcruc += rd32(E1000_FCRUC);
4707         adapter->stats.gptc += rd32(E1000_GPTC);
4708         adapter->stats.gotc += rd32(E1000_GOTCL);
4709         rd32(E1000_GOTCH); /* clear GOTCL */
4710         adapter->stats.rnbc += rd32(E1000_RNBC);
4711         adapter->stats.ruc += rd32(E1000_RUC);
4712         adapter->stats.rfc += rd32(E1000_RFC);
4713         adapter->stats.rjc += rd32(E1000_RJC);
4714         adapter->stats.tor += rd32(E1000_TORH);
4715         adapter->stats.tot += rd32(E1000_TOTH);
4716         adapter->stats.tpr += rd32(E1000_TPR);
4717
4718         adapter->stats.ptc64 += rd32(E1000_PTC64);
4719         adapter->stats.ptc127 += rd32(E1000_PTC127);
4720         adapter->stats.ptc255 += rd32(E1000_PTC255);
4721         adapter->stats.ptc511 += rd32(E1000_PTC511);
4722         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4723         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4724
4725         adapter->stats.mptc += rd32(E1000_MPTC);
4726         adapter->stats.bptc += rd32(E1000_BPTC);
4727
4728         adapter->stats.tpt += rd32(E1000_TPT);
4729         adapter->stats.colc += rd32(E1000_COLC);
4730
4731         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4732         /* read internal phy specific stats */
4733         reg = rd32(E1000_CTRL_EXT);
4734         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4735                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4736                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4737         }
4738
4739         adapter->stats.tsctc += rd32(E1000_TSCTC);
4740         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4741
4742         adapter->stats.iac += rd32(E1000_IAC);
4743         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4744         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4745         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4746         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4747         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4748         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4749         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4750         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4751
4752         /* Fill out the OS statistics structure */
4753         net_stats->multicast = adapter->stats.mprc;
4754         net_stats->collisions = adapter->stats.colc;
4755
4756         /* Rx Errors */
4757
4758         /* RLEC on some newer hardware can be incorrect so build
4759          * our own version based on RUC and ROC */
4760         net_stats->rx_errors = adapter->stats.rxerrc +
4761                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4762                 adapter->stats.ruc + adapter->stats.roc +
4763                 adapter->stats.cexterr;
4764         net_stats->rx_length_errors = adapter->stats.ruc +
4765                                       adapter->stats.roc;
4766         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4767         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4768         net_stats->rx_missed_errors = adapter->stats.mpc;
4769
4770         /* Tx Errors */
4771         net_stats->tx_errors = adapter->stats.ecol +
4772                                adapter->stats.latecol;
4773         net_stats->tx_aborted_errors = adapter->stats.ecol;
4774         net_stats->tx_window_errors = adapter->stats.latecol;
4775         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4776
4777         /* Tx Dropped needs to be maintained elsewhere */
4778
4779         /* Phy Stats */
4780         if (hw->phy.media_type == e1000_media_type_copper) {
4781                 if ((adapter->link_speed == SPEED_1000) &&
4782                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4783                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4784                         adapter->phy_stats.idle_errors += phy_tmp;
4785                 }
4786         }
4787
4788         /* Management Stats */
4789         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4790         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4791         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4792
4793         /* OS2BMC Stats */
4794         reg = rd32(E1000_MANC);
4795         if (reg & E1000_MANC_EN_BMC2OS) {
4796                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4797                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4798                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4799                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4800         }
4801 }
4802
4803 static irqreturn_t igb_msix_other(int irq, void *data)
4804 {
4805         struct igb_adapter *adapter = data;
4806         struct e1000_hw *hw = &adapter->hw;
4807         u32 icr = rd32(E1000_ICR);
4808         /* reading ICR causes bit 31 of EICR to be cleared */
4809
4810         if (icr & E1000_ICR_DRSTA)
4811                 schedule_work(&adapter->reset_task);
4812
4813         if (icr & E1000_ICR_DOUTSYNC) {
4814                 /* HW is reporting DMA is out of sync */
4815                 adapter->stats.doosync++;
4816                 /* The DMA Out of Sync is also indication of a spoof event
4817                  * in IOV mode. Check the Wrong VM Behavior register to
4818                  * see if it is really a spoof event. */
4819                 igb_check_wvbr(adapter);
4820         }
4821
4822         /* Check for a mailbox event */
4823         if (icr & E1000_ICR_VMMB)
4824                 igb_msg_task(adapter);
4825
4826         if (icr & E1000_ICR_LSC) {
4827                 hw->mac.get_link_status = 1;
4828                 /* guard against interrupt when we're going down */
4829                 if (!test_bit(__IGB_DOWN, &adapter->state))
4830                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4831         }
4832
4833         wr32(E1000_EIMS, adapter->eims_other);
4834
4835         return IRQ_HANDLED;
4836 }
4837
4838 static void igb_write_itr(struct igb_q_vector *q_vector)
4839 {
4840         struct igb_adapter *adapter = q_vector->adapter;
4841         u32 itr_val = q_vector->itr_val & 0x7FFC;
4842
4843         if (!q_vector->set_itr)
4844                 return;
4845
4846         if (!itr_val)
4847                 itr_val = 0x4;
4848
4849         if (adapter->hw.mac.type == e1000_82575)
4850                 itr_val |= itr_val << 16;
4851         else
4852                 itr_val |= E1000_EITR_CNT_IGNR;
4853
4854         writel(itr_val, q_vector->itr_register);
4855         q_vector->set_itr = 0;
4856 }
4857
4858 static irqreturn_t igb_msix_ring(int irq, void *data)
4859 {
4860         struct igb_q_vector *q_vector = data;
4861
4862         /* Write the ITR value calculated from the previous interrupt. */
4863         igb_write_itr(q_vector);
4864
4865         napi_schedule(&q_vector->napi);
4866
4867         return IRQ_HANDLED;
4868 }
4869
4870 #ifdef CONFIG_IGB_DCA
4871 static void igb_update_dca(struct igb_q_vector *q_vector)
4872 {
4873         struct igb_adapter *adapter = q_vector->adapter;
4874         struct e1000_hw *hw = &adapter->hw;
4875         int cpu = get_cpu();
4876
4877         if (q_vector->cpu == cpu)
4878                 goto out_no_update;
4879
4880         if (q_vector->tx.ring) {
4881                 int q = q_vector->tx.ring->reg_idx;
4882                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4883                 if (hw->mac.type == e1000_82575) {
4884                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4885                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4886                 } else {
4887                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4888                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4889                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4890                 }
4891                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4892                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4893         }
4894         if (q_vector->rx.ring) {
4895                 int q = q_vector->rx.ring->reg_idx;
4896                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4897                 if (hw->mac.type == e1000_82575) {
4898                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4899                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4900                 } else {
4901                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4902                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4903                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4904                 }
4905                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4906                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4907                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4908                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4909         }
4910         q_vector->cpu = cpu;
4911 out_no_update:
4912         put_cpu();
4913 }
4914
4915 static void igb_setup_dca(struct igb_adapter *adapter)
4916 {
4917         struct e1000_hw *hw = &adapter->hw;
4918         int i;
4919
4920         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4921                 return;
4922
4923         /* Always use CB2 mode, difference is masked in the CB driver. */
4924         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4925
4926         for (i = 0; i < adapter->num_q_vectors; i++) {
4927                 adapter->q_vector[i]->cpu = -1;
4928                 igb_update_dca(adapter->q_vector[i]);
4929         }
4930 }
4931
4932 static int __igb_notify_dca(struct device *dev, void *data)
4933 {
4934         struct net_device *netdev = dev_get_drvdata(dev);
4935         struct igb_adapter *adapter = netdev_priv(netdev);
4936         struct pci_dev *pdev = adapter->pdev;
4937         struct e1000_hw *hw = &adapter->hw;
4938         unsigned long event = *(unsigned long *)data;
4939
4940         switch (event) {
4941         case DCA_PROVIDER_ADD:
4942                 /* if already enabled, don't do it again */
4943                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4944                         break;
4945                 if (dca_add_requester(dev) == 0) {
4946                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4947                         dev_info(&pdev->dev, "DCA enabled\n");
4948                         igb_setup_dca(adapter);
4949                         break;
4950                 }
4951                 /* Fall Through since DCA is disabled. */
4952         case DCA_PROVIDER_REMOVE:
4953                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4954                         /* without this a class_device is left
4955                          * hanging around in the sysfs model */
4956                         dca_remove_requester(dev);
4957                         dev_info(&pdev->dev, "DCA disabled\n");
4958                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4959                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4960                 }
4961                 break;
4962         }
4963
4964         return 0;
4965 }
4966
4967 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4968                           void *p)
4969 {
4970         int ret_val;
4971
4972         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4973                                          __igb_notify_dca);
4974
4975         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4976 }
4977 #endif /* CONFIG_IGB_DCA */
4978
4979 #ifdef CONFIG_PCI_IOV
4980 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4981 {
4982         unsigned char mac_addr[ETH_ALEN];
4983         struct pci_dev *pdev = adapter->pdev;
4984         struct e1000_hw *hw = &adapter->hw;
4985         struct pci_dev *pvfdev;
4986         unsigned int device_id;
4987         u16 thisvf_devfn;
4988
4989         random_ether_addr(mac_addr);
4990         igb_set_vf_mac(adapter, vf, mac_addr);
4991
4992         switch (adapter->hw.mac.type) {
4993         case e1000_82576:
4994                 device_id = IGB_82576_VF_DEV_ID;
4995                 /* VF Stride for 82576 is 2 */
4996                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4997                         (pdev->devfn & 1);
4998                 break;
4999         case e1000_i350:
5000                 device_id = IGB_I350_VF_DEV_ID;
5001                 /* VF Stride for I350 is 4 */
5002                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
5003                                 (pdev->devfn & 3);
5004                 break;
5005         default:
5006                 device_id = 0;
5007                 thisvf_devfn = 0;
5008                 break;
5009         }
5010
5011         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5012         while (pvfdev) {
5013                 if (pvfdev->devfn == thisvf_devfn)
5014                         break;
5015                 pvfdev = pci_get_device(hw->vendor_id,
5016                                         device_id, pvfdev);
5017         }
5018
5019         if (pvfdev)
5020                 adapter->vf_data[vf].vfdev = pvfdev;
5021         else
5022                 dev_err(&pdev->dev,
5023                         "Couldn't find pci dev ptr for VF %4.4x\n",
5024                         thisvf_devfn);
5025         return pvfdev != NULL;
5026 }
5027
5028 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5029 {
5030         struct e1000_hw *hw = &adapter->hw;
5031         struct pci_dev *pdev = adapter->pdev;
5032         struct pci_dev *pvfdev;
5033         u16 vf_devfn = 0;
5034         u16 vf_stride;
5035         unsigned int device_id;
5036         int vfs_found = 0;
5037
5038         switch (adapter->hw.mac.type) {
5039         case e1000_82576:
5040                 device_id = IGB_82576_VF_DEV_ID;
5041                 /* VF Stride for 82576 is 2 */
5042                 vf_stride = 2;
5043                 break;
5044         case e1000_i350:
5045                 device_id = IGB_I350_VF_DEV_ID;
5046                 /* VF Stride for I350 is 4 */
5047                 vf_stride = 4;
5048                 break;
5049         default:
5050                 device_id = 0;
5051                 vf_stride = 0;
5052                 break;
5053         }
5054
5055         vf_devfn = pdev->devfn + 0x80;
5056         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5057         while (pvfdev) {
5058                 if (pvfdev->devfn == vf_devfn &&
5059                     (pvfdev->bus->number >= pdev->bus->number))
5060                         vfs_found++;
5061                 vf_devfn += vf_stride;
5062                 pvfdev = pci_get_device(hw->vendor_id,
5063                                         device_id, pvfdev);
5064         }
5065
5066         return vfs_found;
5067 }
5068
5069 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5070 {
5071         int i;
5072         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5073                 if (adapter->vf_data[i].vfdev) {
5074                         if (adapter->vf_data[i].vfdev->dev_flags &
5075                             PCI_DEV_FLAGS_ASSIGNED)
5076                                 return true;
5077                 }
5078         }
5079         return false;
5080 }
5081
5082 #endif
5083 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5084 {
5085         struct e1000_hw *hw = &adapter->hw;
5086         u32 ping;
5087         int i;
5088
5089         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5090                 ping = E1000_PF_CONTROL_MSG;
5091                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5092                         ping |= E1000_VT_MSGTYPE_CTS;
5093                 igb_write_mbx(hw, &ping, 1, i);
5094         }
5095 }
5096
5097 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5098 {
5099         struct e1000_hw *hw = &adapter->hw;
5100         u32 vmolr = rd32(E1000_VMOLR(vf));
5101         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5102
5103         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5104                             IGB_VF_FLAG_MULTI_PROMISC);
5105         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5106
5107         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5108                 vmolr |= E1000_VMOLR_MPME;
5109                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5110                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5111         } else {
5112                 /*
5113                  * if we have hashes and we are clearing a multicast promisc
5114                  * flag we need to write the hashes to the MTA as this step
5115                  * was previously skipped
5116                  */
5117                 if (vf_data->num_vf_mc_hashes > 30) {
5118                         vmolr |= E1000_VMOLR_MPME;
5119                 } else if (vf_data->num_vf_mc_hashes) {
5120                         int j;
5121                         vmolr |= E1000_VMOLR_ROMPE;
5122                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5123                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5124                 }
5125         }
5126
5127         wr32(E1000_VMOLR(vf), vmolr);
5128
5129         /* there are flags left unprocessed, likely not supported */
5130         if (*msgbuf & E1000_VT_MSGINFO_MASK)
5131                 return -EINVAL;
5132
5133         return 0;
5134
5135 }
5136
5137 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5138                                   u32 *msgbuf, u32 vf)
5139 {
5140         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5141         u16 *hash_list = (u16 *)&msgbuf[1];
5142         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5143         int i;
5144
5145         /* salt away the number of multicast addresses assigned
5146          * to this VF for later use to restore when the PF multi cast
5147          * list changes
5148          */
5149         vf_data->num_vf_mc_hashes = n;
5150
5151         /* only up to 30 hash values supported */
5152         if (n > 30)
5153                 n = 30;
5154
5155         /* store the hashes for later use */
5156         for (i = 0; i < n; i++)
5157                 vf_data->vf_mc_hashes[i] = hash_list[i];
5158
5159         /* Flush and reset the mta with the new values */
5160         igb_set_rx_mode(adapter->netdev);
5161
5162         return 0;
5163 }
5164
5165 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5166 {
5167         struct e1000_hw *hw = &adapter->hw;
5168         struct vf_data_storage *vf_data;
5169         int i, j;
5170
5171         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5172                 u32 vmolr = rd32(E1000_VMOLR(i));
5173                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5174
5175                 vf_data = &adapter->vf_data[i];
5176
5177                 if ((vf_data->num_vf_mc_hashes > 30) ||
5178                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5179                         vmolr |= E1000_VMOLR_MPME;
5180                 } else if (vf_data->num_vf_mc_hashes) {
5181                         vmolr |= E1000_VMOLR_ROMPE;
5182                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5183                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5184                 }
5185                 wr32(E1000_VMOLR(i), vmolr);
5186         }
5187 }
5188
5189 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5190 {
5191         struct e1000_hw *hw = &adapter->hw;
5192         u32 pool_mask, reg, vid;
5193         int i;
5194
5195         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5196
5197         /* Find the vlan filter for this id */
5198         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5199                 reg = rd32(E1000_VLVF(i));
5200
5201                 /* remove the vf from the pool */
5202                 reg &= ~pool_mask;
5203
5204                 /* if pool is empty then remove entry from vfta */
5205                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5206                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5207                         reg = 0;
5208                         vid = reg & E1000_VLVF_VLANID_MASK;
5209                         igb_vfta_set(hw, vid, false);
5210                 }
5211
5212                 wr32(E1000_VLVF(i), reg);
5213         }
5214
5215         adapter->vf_data[vf].vlans_enabled = 0;
5216 }
5217
5218 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5219 {
5220         struct e1000_hw *hw = &adapter->hw;
5221         u32 reg, i;
5222
5223         /* The vlvf table only exists on 82576 hardware and newer */
5224         if (hw->mac.type < e1000_82576)
5225                 return -1;
5226
5227         /* we only need to do this if VMDq is enabled */
5228         if (!adapter->vfs_allocated_count)
5229                 return -1;
5230
5231         /* Find the vlan filter for this id */
5232         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5233                 reg = rd32(E1000_VLVF(i));
5234                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5235                     vid == (reg & E1000_VLVF_VLANID_MASK))
5236                         break;
5237         }
5238
5239         if (add) {
5240                 if (i == E1000_VLVF_ARRAY_SIZE) {
5241                         /* Did not find a matching VLAN ID entry that was
5242                          * enabled.  Search for a free filter entry, i.e.
5243                          * one without the enable bit set
5244                          */
5245                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5246                                 reg = rd32(E1000_VLVF(i));
5247                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5248                                         break;
5249                         }
5250                 }
5251                 if (i < E1000_VLVF_ARRAY_SIZE) {
5252                         /* Found an enabled/available entry */
5253                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5254
5255                         /* if !enabled we need to set this up in vfta */
5256                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5257                                 /* add VID to filter table */
5258                                 igb_vfta_set(hw, vid, true);
5259                                 reg |= E1000_VLVF_VLANID_ENABLE;
5260                         }
5261                         reg &= ~E1000_VLVF_VLANID_MASK;
5262                         reg |= vid;
5263                         wr32(E1000_VLVF(i), reg);
5264
5265                         /* do not modify RLPML for PF devices */
5266                         if (vf >= adapter->vfs_allocated_count)
5267                                 return 0;
5268
5269                         if (!adapter->vf_data[vf].vlans_enabled) {
5270                                 u32 size;
5271                                 reg = rd32(E1000_VMOLR(vf));
5272                                 size = reg & E1000_VMOLR_RLPML_MASK;
5273                                 size += 4;
5274                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5275                                 reg |= size;
5276                                 wr32(E1000_VMOLR(vf), reg);
5277                         }
5278
5279                         adapter->vf_data[vf].vlans_enabled++;
5280                 }
5281         } else {
5282                 if (i < E1000_VLVF_ARRAY_SIZE) {
5283                         /* remove vf from the pool */
5284                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5285                         /* if pool is empty then remove entry from vfta */
5286                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5287                                 reg = 0;
5288                                 igb_vfta_set(hw, vid, false);
5289                         }
5290                         wr32(E1000_VLVF(i), reg);
5291
5292                         /* do not modify RLPML for PF devices */
5293                         if (vf >= adapter->vfs_allocated_count)
5294                                 return 0;
5295
5296                         adapter->vf_data[vf].vlans_enabled--;
5297                         if (!adapter->vf_data[vf].vlans_enabled) {
5298                                 u32 size;
5299                                 reg = rd32(E1000_VMOLR(vf));
5300                                 size = reg & E1000_VMOLR_RLPML_MASK;
5301                                 size -= 4;
5302                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5303                                 reg |= size;
5304                                 wr32(E1000_VMOLR(vf), reg);
5305                         }
5306                 }
5307         }
5308         return 0;
5309 }
5310
5311 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5312 {
5313         struct e1000_hw *hw = &adapter->hw;
5314
5315         if (vid)
5316                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5317         else
5318                 wr32(E1000_VMVIR(vf), 0);
5319 }
5320
5321 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5322                                int vf, u16 vlan, u8 qos)
5323 {
5324         int err = 0;
5325         struct igb_adapter *adapter = netdev_priv(netdev);
5326
5327         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5328                 return -EINVAL;
5329         if (vlan || qos) {
5330                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5331                 if (err)
5332                         goto out;
5333                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5334                 igb_set_vmolr(adapter, vf, !vlan);
5335                 adapter->vf_data[vf].pf_vlan = vlan;
5336                 adapter->vf_data[vf].pf_qos = qos;
5337                 dev_info(&adapter->pdev->dev,
5338                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5339                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5340                         dev_warn(&adapter->pdev->dev,
5341                                  "The VF VLAN has been set,"
5342                                  " but the PF device is not up.\n");
5343                         dev_warn(&adapter->pdev->dev,
5344                                  "Bring the PF device up before"
5345                                  " attempting to use the VF device.\n");
5346                 }
5347         } else {
5348                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5349                                    false, vf);
5350                 igb_set_vmvir(adapter, vlan, vf);
5351                 igb_set_vmolr(adapter, vf, true);
5352                 adapter->vf_data[vf].pf_vlan = 0;
5353                 adapter->vf_data[vf].pf_qos = 0;
5354        }
5355 out:
5356        return err;
5357 }
5358
5359 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5360 {
5361         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5362         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5363
5364         return igb_vlvf_set(adapter, vid, add, vf);
5365 }
5366
5367 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5368 {
5369         /* clear flags - except flag that indicates PF has set the MAC */
5370         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5371         adapter->vf_data[vf].last_nack = jiffies;
5372
5373         /* reset offloads to defaults */
5374         igb_set_vmolr(adapter, vf, true);
5375
5376         /* reset vlans for device */
5377         igb_clear_vf_vfta(adapter, vf);
5378         if (adapter->vf_data[vf].pf_vlan)
5379                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5380                                     adapter->vf_data[vf].pf_vlan,
5381                                     adapter->vf_data[vf].pf_qos);
5382         else
5383                 igb_clear_vf_vfta(adapter, vf);
5384
5385         /* reset multicast table array for vf */
5386         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5387
5388         /* Flush and reset the mta with the new values */
5389         igb_set_rx_mode(adapter->netdev);
5390 }
5391
5392 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5393 {
5394         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5395
5396         /* generate a new mac address as we were hotplug removed/added */
5397         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5398                 random_ether_addr(vf_mac);
5399
5400         /* process remaining reset events */
5401         igb_vf_reset(adapter, vf);
5402 }
5403
5404 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5405 {
5406         struct e1000_hw *hw = &adapter->hw;
5407         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5408         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5409         u32 reg, msgbuf[3];
5410         u8 *addr = (u8 *)(&msgbuf[1]);
5411
5412         /* process all the same items cleared in a function level reset */
5413         igb_vf_reset(adapter, vf);
5414
5415         /* set vf mac address */
5416         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5417
5418         /* enable transmit and receive for vf */
5419         reg = rd32(E1000_VFTE);
5420         wr32(E1000_VFTE, reg | (1 << vf));
5421         reg = rd32(E1000_VFRE);
5422         wr32(E1000_VFRE, reg | (1 << vf));
5423
5424         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5425
5426         /* reply to reset with ack and vf mac address */
5427         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5428         memcpy(addr, vf_mac, 6);
5429         igb_write_mbx(hw, msgbuf, 3, vf);
5430 }
5431
5432 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5433 {
5434         /*
5435          * The VF MAC Address is stored in a packed array of bytes
5436          * starting at the second 32 bit word of the msg array
5437          */
5438         unsigned char *addr = (char *)&msg[1];
5439         int err = -1;
5440
5441         if (is_valid_ether_addr(addr))
5442                 err = igb_set_vf_mac(adapter, vf, addr);
5443
5444         return err;
5445 }
5446
5447 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5448 {
5449         struct e1000_hw *hw = &adapter->hw;
5450         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5451         u32 msg = E1000_VT_MSGTYPE_NACK;
5452
5453         /* if device isn't clear to send it shouldn't be reading either */
5454         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5455             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5456                 igb_write_mbx(hw, &msg, 1, vf);
5457                 vf_data->last_nack = jiffies;
5458         }
5459 }
5460
5461 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5462 {
5463         struct pci_dev *pdev = adapter->pdev;
5464         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5465         struct e1000_hw *hw = &adapter->hw;
5466         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5467         s32 retval;
5468
5469         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5470
5471         if (retval) {
5472                 /* if receive failed revoke VF CTS stats and restart init */
5473                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5474                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5475                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5476                         return;
5477                 goto out;
5478         }
5479
5480         /* this is a message we already processed, do nothing */
5481         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5482                 return;
5483
5484         /*
5485          * until the vf completes a reset it should not be
5486          * allowed to start any configuration.
5487          */
5488
5489         if (msgbuf[0] == E1000_VF_RESET) {
5490                 igb_vf_reset_msg(adapter, vf);
5491                 return;
5492         }
5493
5494         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5495                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5496                         return;
5497                 retval = -1;
5498                 goto out;
5499         }
5500
5501         switch ((msgbuf[0] & 0xFFFF)) {
5502         case E1000_VF_SET_MAC_ADDR:
5503                 retval = -EINVAL;
5504                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5505                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5506                 else
5507                         dev_warn(&pdev->dev,
5508                                  "VF %d attempted to override administratively "
5509                                  "set MAC address\nReload the VF driver to "
5510                                  "resume operations\n", vf);
5511                 break;
5512         case E1000_VF_SET_PROMISC:
5513                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5514                 break;
5515         case E1000_VF_SET_MULTICAST:
5516                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5517                 break;
5518         case E1000_VF_SET_LPE:
5519                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5520                 break;
5521         case E1000_VF_SET_VLAN:
5522                 retval = -1;
5523                 if (vf_data->pf_vlan)
5524                         dev_warn(&pdev->dev,
5525                                  "VF %d attempted to override administratively "
5526                                  "set VLAN tag\nReload the VF driver to "
5527                                  "resume operations\n", vf);
5528                 else
5529                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5530                 break;
5531         default:
5532                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5533                 retval = -1;
5534                 break;
5535         }
5536
5537         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5538 out:
5539         /* notify the VF of the results of what it sent us */
5540         if (retval)
5541                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5542         else
5543                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5544
5545         igb_write_mbx(hw, msgbuf, 1, vf);
5546 }
5547
5548 static void igb_msg_task(struct igb_adapter *adapter)
5549 {
5550         struct e1000_hw *hw = &adapter->hw;
5551         u32 vf;
5552
5553         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5554                 /* process any reset requests */
5555                 if (!igb_check_for_rst(hw, vf))
5556                         igb_vf_reset_event(adapter, vf);
5557
5558                 /* process any messages pending */
5559                 if (!igb_check_for_msg(hw, vf))
5560                         igb_rcv_msg_from_vf(adapter, vf);
5561
5562                 /* process any acks */
5563                 if (!igb_check_for_ack(hw, vf))
5564                         igb_rcv_ack_from_vf(adapter, vf);
5565         }
5566 }
5567
5568 /**
5569  *  igb_set_uta - Set unicast filter table address
5570  *  @adapter: board private structure
5571  *
5572  *  The unicast table address is a register array of 32-bit registers.
5573  *  The table is meant to be used in a way similar to how the MTA is used
5574  *  however due to certain limitations in the hardware it is necessary to
5575  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5576  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5577  **/
5578 static void igb_set_uta(struct igb_adapter *adapter)
5579 {
5580         struct e1000_hw *hw = &adapter->hw;
5581         int i;
5582
5583         /* The UTA table only exists on 82576 hardware and newer */
5584         if (hw->mac.type < e1000_82576)
5585                 return;
5586
5587         /* we only need to do this if VMDq is enabled */
5588         if (!adapter->vfs_allocated_count)
5589                 return;
5590
5591         for (i = 0; i < hw->mac.uta_reg_count; i++)
5592                 array_wr32(E1000_UTA, i, ~0);
5593 }
5594
5595 /**
5596  * igb_intr_msi - Interrupt Handler
5597  * @irq: interrupt number
5598  * @data: pointer to a network interface device structure
5599  **/
5600 static irqreturn_t igb_intr_msi(int irq, void *data)
5601 {
5602         struct igb_adapter *adapter = data;
5603         struct igb_q_vector *q_vector = adapter->q_vector[0];
5604         struct e1000_hw *hw = &adapter->hw;
5605         /* read ICR disables interrupts using IAM */
5606         u32 icr = rd32(E1000_ICR);
5607
5608         igb_write_itr(q_vector);
5609
5610         if (icr & E1000_ICR_DRSTA)
5611                 schedule_work(&adapter->reset_task);
5612
5613         if (icr & E1000_ICR_DOUTSYNC) {
5614                 /* HW is reporting DMA is out of sync */
5615                 adapter->stats.doosync++;
5616         }
5617
5618         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5619                 hw->mac.get_link_status = 1;
5620                 if (!test_bit(__IGB_DOWN, &adapter->state))
5621                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5622         }
5623
5624         napi_schedule(&q_vector->napi);
5625
5626         return IRQ_HANDLED;
5627 }
5628
5629 /**
5630  * igb_intr - Legacy Interrupt Handler
5631  * @irq: interrupt number
5632  * @data: pointer to a network interface device structure
5633  **/
5634 static irqreturn_t igb_intr(int irq, void *data)
5635 {
5636         struct igb_adapter *adapter = data;
5637         struct igb_q_vector *q_vector = adapter->q_vector[0];
5638         struct e1000_hw *hw = &adapter->hw;
5639         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5640          * need for the IMC write */
5641         u32 icr = rd32(E1000_ICR);
5642
5643         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5644          * not set, then the adapter didn't send an interrupt */
5645         if (!(icr & E1000_ICR_INT_ASSERTED))
5646                 return IRQ_NONE;
5647
5648         igb_write_itr(q_vector);
5649
5650         if (icr & E1000_ICR_DRSTA)
5651                 schedule_work(&adapter->reset_task);
5652
5653         if (icr & E1000_ICR_DOUTSYNC) {
5654                 /* HW is reporting DMA is out of sync */
5655                 adapter->stats.doosync++;
5656         }
5657
5658         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5659                 hw->mac.get_link_status = 1;
5660                 /* guard against interrupt when we're going down */
5661                 if (!test_bit(__IGB_DOWN, &adapter->state))
5662                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5663         }
5664
5665         napi_schedule(&q_vector->napi);
5666
5667         return IRQ_HANDLED;
5668 }
5669
5670 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5671 {
5672         struct igb_adapter *adapter = q_vector->adapter;
5673         struct e1000_hw *hw = &adapter->hw;
5674
5675         if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5676             (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5677                 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5678                         igb_set_itr(q_vector);
5679                 else
5680                         igb_update_ring_itr(q_vector);
5681         }
5682
5683         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5684                 if (adapter->msix_entries)
5685                         wr32(E1000_EIMS, q_vector->eims_value);
5686                 else
5687                         igb_irq_enable(adapter);
5688         }
5689 }
5690
5691 /**
5692  * igb_poll - NAPI Rx polling callback
5693  * @napi: napi polling structure
5694  * @budget: count of how many packets we should handle
5695  **/
5696 static int igb_poll(struct napi_struct *napi, int budget)
5697 {
5698         struct igb_q_vector *q_vector = container_of(napi,
5699                                                      struct igb_q_vector,
5700                                                      napi);
5701         bool clean_complete = true;
5702
5703 #ifdef CONFIG_IGB_DCA
5704         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5705                 igb_update_dca(q_vector);
5706 #endif
5707         if (q_vector->tx.ring)
5708                 clean_complete = igb_clean_tx_irq(q_vector);
5709
5710         if (q_vector->rx.ring)
5711                 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5712
5713         /* If all work not completed, return budget and keep polling */
5714         if (!clean_complete)
5715                 return budget;
5716
5717         /* If not enough Rx work done, exit the polling mode */
5718         napi_complete(napi);
5719         igb_ring_irq_enable(q_vector);
5720
5721         return 0;
5722 }
5723
5724 /**
5725  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5726  * @adapter: board private structure
5727  * @shhwtstamps: timestamp structure to update
5728  * @regval: unsigned 64bit system time value.
5729  *
5730  * We need to convert the system time value stored in the RX/TXSTMP registers
5731  * into a hwtstamp which can be used by the upper level timestamping functions
5732  */
5733 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5734                                    struct skb_shared_hwtstamps *shhwtstamps,
5735                                    u64 regval)
5736 {
5737         u64 ns;
5738
5739         /*
5740          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5741          * 24 to match clock shift we setup earlier.
5742          */
5743         if (adapter->hw.mac.type >= e1000_82580)
5744                 regval <<= IGB_82580_TSYNC_SHIFT;
5745
5746         ns = timecounter_cyc2time(&adapter->clock, regval);
5747         timecompare_update(&adapter->compare, ns);
5748         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5749         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5750         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5751 }
5752
5753 /**
5754  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5755  * @q_vector: pointer to q_vector containing needed info
5756  * @buffer: pointer to igb_tx_buffer structure
5757  *
5758  * If we were asked to do hardware stamping and such a time stamp is
5759  * available, then it must have been for this skb here because we only
5760  * allow only one such packet into the queue.
5761  */
5762 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5763                             struct igb_tx_buffer *buffer_info)
5764 {
5765         struct igb_adapter *adapter = q_vector->adapter;
5766         struct e1000_hw *hw = &adapter->hw;
5767         struct skb_shared_hwtstamps shhwtstamps;
5768         u64 regval;
5769
5770         /* if skb does not support hw timestamp or TX stamp not valid exit */
5771         if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5772             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5773                 return;
5774
5775         regval = rd32(E1000_TXSTMPL);
5776         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5777
5778         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5779         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5780 }
5781
5782 /**
5783  * igb_clean_tx_irq - Reclaim resources after transmit completes
5784  * @q_vector: pointer to q_vector containing needed info
5785  * returns true if ring is completely cleaned
5786  **/
5787 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5788 {
5789         struct igb_adapter *adapter = q_vector->adapter;
5790         struct igb_ring *tx_ring = q_vector->tx.ring;
5791         struct igb_tx_buffer *tx_buffer;
5792         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5793         unsigned int total_bytes = 0, total_packets = 0;
5794         unsigned int budget = q_vector->tx.work_limit;
5795         unsigned int i = tx_ring->next_to_clean;
5796
5797         if (test_bit(__IGB_DOWN, &adapter->state))
5798                 return true;
5799
5800         tx_buffer = &tx_ring->tx_buffer_info[i];
5801         tx_desc = IGB_TX_DESC(tx_ring, i);
5802         i -= tx_ring->count;
5803
5804         for (; budget; budget--) {
5805                 eop_desc = tx_buffer->next_to_watch;
5806
5807                 /* prevent any other reads prior to eop_desc */
5808                 rmb();
5809
5810                 /* if next_to_watch is not set then there is no work pending */
5811                 if (!eop_desc)
5812                         break;
5813
5814                 /* if DD is not set pending work has not been completed */
5815                 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5816                         break;
5817
5818                 /* clear next_to_watch to prevent false hangs */
5819                 tx_buffer->next_to_watch = NULL;
5820
5821                 /* update the statistics for this packet */
5822                 total_bytes += tx_buffer->bytecount;
5823                 total_packets += tx_buffer->gso_segs;
5824
5825                 /* retrieve hardware timestamp */
5826                 igb_tx_hwtstamp(q_vector, tx_buffer);
5827
5828                 /* free the skb */
5829                 dev_kfree_skb_any(tx_buffer->skb);
5830                 tx_buffer->skb = NULL;
5831
5832                 /* unmap skb header data */
5833                 dma_unmap_single(tx_ring->dev,
5834                                  tx_buffer->dma,
5835                                  tx_buffer->length,
5836                                  DMA_TO_DEVICE);
5837
5838                 /* clear last DMA location and unmap remaining buffers */
5839                 while (tx_desc != eop_desc) {
5840                         tx_buffer->dma = 0;
5841
5842                         tx_buffer++;
5843                         tx_desc++;
5844                         i++;
5845                         if (unlikely(!i)) {
5846                                 i -= tx_ring->count;
5847                                 tx_buffer = tx_ring->tx_buffer_info;
5848                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
5849                         }
5850
5851                         /* unmap any remaining paged data */
5852                         if (tx_buffer->dma) {
5853                                 dma_unmap_page(tx_ring->dev,
5854                                                tx_buffer->dma,
5855                                                tx_buffer->length,
5856                                                DMA_TO_DEVICE);
5857                         }
5858                 }
5859
5860                 /* clear last DMA location */
5861                 tx_buffer->dma = 0;
5862
5863                 /* move us one more past the eop_desc for start of next pkt */
5864                 tx_buffer++;
5865                 tx_desc++;
5866                 i++;
5867                 if (unlikely(!i)) {
5868                         i -= tx_ring->count;
5869                         tx_buffer = tx_ring->tx_buffer_info;
5870                         tx_desc = IGB_TX_DESC(tx_ring, 0);
5871                 }
5872         }
5873
5874         netdev_tx_completed_queue(txring_txq(tx_ring),
5875                                   total_packets, total_bytes);
5876         i += tx_ring->count;
5877         tx_ring->next_to_clean = i;
5878         u64_stats_update_begin(&tx_ring->tx_syncp);
5879         tx_ring->tx_stats.bytes += total_bytes;
5880         tx_ring->tx_stats.packets += total_packets;
5881         u64_stats_update_end(&tx_ring->tx_syncp);
5882         q_vector->tx.total_bytes += total_bytes;
5883         q_vector->tx.total_packets += total_packets;
5884
5885         if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5886                 struct e1000_hw *hw = &adapter->hw;
5887
5888                 eop_desc = tx_buffer->next_to_watch;
5889
5890                 /* Detect a transmit hang in hardware, this serializes the
5891                  * check with the clearing of time_stamp and movement of i */
5892                 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5893                 if (eop_desc &&
5894                     time_after(jiffies, tx_buffer->time_stamp +
5895                                (adapter->tx_timeout_factor * HZ)) &&
5896                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5897
5898                         /* detected Tx unit hang */
5899                         dev_err(tx_ring->dev,
5900                                 "Detected Tx Unit Hang\n"
5901                                 "  Tx Queue             <%d>\n"
5902                                 "  TDH                  <%x>\n"
5903                                 "  TDT                  <%x>\n"
5904                                 "  next_to_use          <%x>\n"
5905                                 "  next_to_clean        <%x>\n"
5906                                 "buffer_info[next_to_clean]\n"
5907                                 "  time_stamp           <%lx>\n"
5908                                 "  next_to_watch        <%p>\n"
5909                                 "  jiffies              <%lx>\n"
5910                                 "  desc.status          <%x>\n",
5911                                 tx_ring->queue_index,
5912                                 rd32(E1000_TDH(tx_ring->reg_idx)),
5913                                 readl(tx_ring->tail),
5914                                 tx_ring->next_to_use,
5915                                 tx_ring->next_to_clean,
5916                                 tx_buffer->time_stamp,
5917                                 eop_desc,
5918                                 jiffies,
5919                                 eop_desc->wb.status);
5920                         netif_stop_subqueue(tx_ring->netdev,
5921                                             tx_ring->queue_index);
5922
5923                         /* we are about to reset, no point in enabling stuff */
5924                         return true;
5925                 }
5926         }
5927
5928         if (unlikely(total_packets &&
5929                      netif_carrier_ok(tx_ring->netdev) &&
5930                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5931                 /* Make sure that anybody stopping the queue after this
5932                  * sees the new next_to_clean.
5933                  */
5934                 smp_mb();
5935                 if (__netif_subqueue_stopped(tx_ring->netdev,
5936                                              tx_ring->queue_index) &&
5937                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5938                         netif_wake_subqueue(tx_ring->netdev,
5939                                             tx_ring->queue_index);
5940
5941                         u64_stats_update_begin(&tx_ring->tx_syncp);
5942                         tx_ring->tx_stats.restart_queue++;
5943                         u64_stats_update_end(&tx_ring->tx_syncp);
5944                 }
5945         }
5946
5947         return !!budget;
5948 }
5949
5950 static inline void igb_rx_checksum(struct igb_ring *ring,
5951                                    union e1000_adv_rx_desc *rx_desc,
5952                                    struct sk_buff *skb)
5953 {
5954         skb_checksum_none_assert(skb);
5955
5956         /* Ignore Checksum bit is set */
5957         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5958                 return;
5959
5960         /* Rx checksum disabled via ethtool */
5961         if (!(ring->netdev->features & NETIF_F_RXCSUM))
5962                 return;
5963
5964         /* TCP/UDP checksum error bit is set */
5965         if (igb_test_staterr(rx_desc,
5966                              E1000_RXDEXT_STATERR_TCPE |
5967                              E1000_RXDEXT_STATERR_IPE)) {
5968                 /*
5969                  * work around errata with sctp packets where the TCPE aka
5970                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5971                  * packets, (aka let the stack check the crc32c)
5972                  */
5973                 if (!((skb->len == 60) &&
5974                       test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5975                         u64_stats_update_begin(&ring->rx_syncp);
5976                         ring->rx_stats.csum_err++;
5977                         u64_stats_update_end(&ring->rx_syncp);
5978                 }
5979                 /* let the stack verify checksum errors */
5980                 return;
5981         }
5982         /* It must be a TCP or UDP packet with a valid checksum */
5983         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5984                                       E1000_RXD_STAT_UDPCS))
5985                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5986
5987         dev_dbg(ring->dev, "cksum success: bits %08X\n",
5988                 le32_to_cpu(rx_desc->wb.upper.status_error));
5989 }
5990
5991 static inline void igb_rx_hash(struct igb_ring *ring,
5992                                union e1000_adv_rx_desc *rx_desc,
5993                                struct sk_buff *skb)
5994 {
5995         if (ring->netdev->features & NETIF_F_RXHASH)
5996                 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5997 }
5998
5999 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
6000                             union e1000_adv_rx_desc *rx_desc,
6001                             struct sk_buff *skb)
6002 {
6003         struct igb_adapter *adapter = q_vector->adapter;
6004         struct e1000_hw *hw = &adapter->hw;
6005         u64 regval;
6006
6007         if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
6008                                        E1000_RXDADV_STAT_TS))
6009                 return;
6010
6011         /*
6012          * If this bit is set, then the RX registers contain the time stamp. No
6013          * other packet will be time stamped until we read these registers, so
6014          * read the registers to make them available again. Because only one
6015          * packet can be time stamped at a time, we know that the register
6016          * values must belong to this one here and therefore we don't need to
6017          * compare any of the additional attributes stored for it.
6018          *
6019          * If nothing went wrong, then it should have a shared tx_flags that we
6020          * can turn into a skb_shared_hwtstamps.
6021          */
6022         if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
6023                 u32 *stamp = (u32 *)skb->data;
6024                 regval = le32_to_cpu(*(stamp + 2));
6025                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
6026                 skb_pull(skb, IGB_TS_HDR_LEN);
6027         } else {
6028                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
6029                         return;
6030
6031                 regval = rd32(E1000_RXSTMPL);
6032                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
6033         }
6034
6035         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
6036 }
6037
6038 static void igb_rx_vlan(struct igb_ring *ring,
6039                         union e1000_adv_rx_desc *rx_desc,
6040                         struct sk_buff *skb)
6041 {
6042         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6043                 u16 vid;
6044                 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6045                     test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6046                         vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6047                 else
6048                         vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6049
6050                 __vlan_hwaccel_put_tag(skb, vid);
6051         }
6052 }
6053
6054 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6055 {
6056         /* HW will not DMA in data larger than the given buffer, even if it
6057          * parses the (NFS, of course) header to be larger.  In that case, it
6058          * fills the header buffer and spills the rest into the page.
6059          */
6060         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6061                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6062         if (hlen > IGB_RX_HDR_LEN)
6063                 hlen = IGB_RX_HDR_LEN;
6064         return hlen;
6065 }
6066
6067 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6068 {
6069         struct igb_ring *rx_ring = q_vector->rx.ring;
6070         union e1000_adv_rx_desc *rx_desc;
6071         const int current_node = numa_node_id();
6072         unsigned int total_bytes = 0, total_packets = 0;
6073         u16 cleaned_count = igb_desc_unused(rx_ring);
6074         u16 i = rx_ring->next_to_clean;
6075
6076         rx_desc = IGB_RX_DESC(rx_ring, i);
6077
6078         while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6079                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6080                 struct sk_buff *skb = buffer_info->skb;
6081                 union e1000_adv_rx_desc *next_rxd;
6082
6083                 buffer_info->skb = NULL;
6084                 prefetch(skb->data);
6085
6086                 i++;
6087                 if (i == rx_ring->count)
6088                         i = 0;
6089
6090                 next_rxd = IGB_RX_DESC(rx_ring, i);
6091                 prefetch(next_rxd);
6092
6093                 /*
6094                  * This memory barrier is needed to keep us from reading
6095                  * any other fields out of the rx_desc until we know the
6096                  * RXD_STAT_DD bit is set
6097                  */
6098                 rmb();
6099
6100                 if (!skb_is_nonlinear(skb)) {
6101                         __skb_put(skb, igb_get_hlen(rx_desc));
6102                         dma_unmap_single(rx_ring->dev, buffer_info->dma,
6103                                          IGB_RX_HDR_LEN,
6104                                          DMA_FROM_DEVICE);
6105                         buffer_info->dma = 0;
6106                 }
6107
6108                 if (rx_desc->wb.upper.length) {
6109                         u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6110
6111                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6112                                                 buffer_info->page,
6113                                                 buffer_info->page_offset,
6114                                                 length);
6115
6116                         skb->len += length;
6117                         skb->data_len += length;
6118                         skb->truesize += PAGE_SIZE / 2;
6119
6120                         if ((page_count(buffer_info->page) != 1) ||
6121                             (page_to_nid(buffer_info->page) != current_node))
6122                                 buffer_info->page = NULL;
6123                         else
6124                                 get_page(buffer_info->page);
6125
6126                         dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6127                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
6128                         buffer_info->page_dma = 0;
6129                 }
6130
6131                 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6132                         struct igb_rx_buffer *next_buffer;
6133                         next_buffer = &rx_ring->rx_buffer_info[i];
6134                         buffer_info->skb = next_buffer->skb;
6135                         buffer_info->dma = next_buffer->dma;
6136                         next_buffer->skb = skb;
6137                         next_buffer->dma = 0;
6138                         goto next_desc;
6139                 }
6140
6141                 if (unlikely((igb_test_staterr(rx_desc,
6142                                                E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6143                              && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6144                         dev_kfree_skb_any(skb);
6145                         goto next_desc;
6146                 }
6147
6148                 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6149                 igb_rx_hash(rx_ring, rx_desc, skb);
6150                 igb_rx_checksum(rx_ring, rx_desc, skb);
6151                 igb_rx_vlan(rx_ring, rx_desc, skb);
6152
6153                 total_bytes += skb->len;
6154                 total_packets++;
6155
6156                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6157
6158                 napi_gro_receive(&q_vector->napi, skb);
6159
6160                 budget--;
6161 next_desc:
6162                 if (!budget)
6163                         break;
6164
6165                 cleaned_count++;
6166                 /* return some buffers to hardware, one at a time is too slow */
6167                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6168                         igb_alloc_rx_buffers(rx_ring, cleaned_count);
6169                         cleaned_count = 0;
6170                 }
6171
6172                 /* use prefetched values */
6173                 rx_desc = next_rxd;
6174         }
6175
6176         rx_ring->next_to_clean = i;
6177         u64_stats_update_begin(&rx_ring->rx_syncp);
6178         rx_ring->rx_stats.packets += total_packets;
6179         rx_ring->rx_stats.bytes += total_bytes;
6180         u64_stats_update_end(&rx_ring->rx_syncp);
6181         q_vector->rx.total_packets += total_packets;
6182         q_vector->rx.total_bytes += total_bytes;
6183
6184         if (cleaned_count)
6185                 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6186
6187         return !!budget;
6188 }
6189
6190 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6191                                  struct igb_rx_buffer *bi)
6192 {
6193         struct sk_buff *skb = bi->skb;
6194         dma_addr_t dma = bi->dma;
6195
6196         if (dma)
6197                 return true;
6198
6199         if (likely(!skb)) {
6200                 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6201                                                 IGB_RX_HDR_LEN);
6202                 bi->skb = skb;
6203                 if (!skb) {
6204                         rx_ring->rx_stats.alloc_failed++;
6205                         return false;
6206                 }
6207
6208                 /* initialize skb for ring */
6209                 skb_record_rx_queue(skb, rx_ring->queue_index);
6210         }
6211
6212         dma = dma_map_single(rx_ring->dev, skb->data,
6213                              IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6214
6215         if (dma_mapping_error(rx_ring->dev, dma)) {
6216                 rx_ring->rx_stats.alloc_failed++;
6217                 return false;
6218         }
6219
6220         bi->dma = dma;
6221         return true;
6222 }
6223
6224 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6225                                   struct igb_rx_buffer *bi)
6226 {
6227         struct page *page = bi->page;
6228         dma_addr_t page_dma = bi->page_dma;
6229         unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6230
6231         if (page_dma)
6232                 return true;
6233
6234         if (!page) {
6235                 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6236                 bi->page = page;
6237                 if (unlikely(!page)) {
6238                         rx_ring->rx_stats.alloc_failed++;
6239                         return false;
6240                 }
6241         }
6242
6243         page_dma = dma_map_page(rx_ring->dev, page,
6244                                 page_offset, PAGE_SIZE / 2,
6245                                 DMA_FROM_DEVICE);
6246
6247         if (dma_mapping_error(rx_ring->dev, page_dma)) {
6248                 rx_ring->rx_stats.alloc_failed++;
6249                 return false;
6250         }
6251
6252         bi->page_dma = page_dma;
6253         bi->page_offset = page_offset;
6254         return true;
6255 }
6256
6257 /**
6258  * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6259  * @adapter: address of board private structure
6260  **/
6261 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6262 {
6263         union e1000_adv_rx_desc *rx_desc;
6264         struct igb_rx_buffer *bi;
6265         u16 i = rx_ring->next_to_use;
6266
6267         rx_desc = IGB_RX_DESC(rx_ring, i);
6268         bi = &rx_ring->rx_buffer_info[i];
6269         i -= rx_ring->count;
6270
6271         while (cleaned_count--) {
6272                 if (!igb_alloc_mapped_skb(rx_ring, bi))
6273                         break;
6274
6275                 /* Refresh the desc even if buffer_addrs didn't change
6276                  * because each write-back erases this info. */
6277                 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6278
6279                 if (!igb_alloc_mapped_page(rx_ring, bi))
6280                         break;
6281
6282                 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6283
6284                 rx_desc++;
6285                 bi++;
6286                 i++;
6287                 if (unlikely(!i)) {
6288                         rx_desc = IGB_RX_DESC(rx_ring, 0);
6289                         bi = rx_ring->rx_buffer_info;
6290                         i -= rx_ring->count;
6291                 }
6292
6293                 /* clear the hdr_addr for the next_to_use descriptor */
6294                 rx_desc->read.hdr_addr = 0;
6295         }
6296
6297         i += rx_ring->count;
6298
6299         if (rx_ring->next_to_use != i) {
6300                 rx_ring->next_to_use = i;
6301
6302                 /* Force memory writes to complete before letting h/w
6303                  * know there are new descriptors to fetch.  (Only
6304                  * applicable for weak-ordered memory model archs,
6305                  * such as IA-64). */
6306                 wmb();
6307                 writel(i, rx_ring->tail);
6308         }
6309 }
6310
6311 /**
6312  * igb_mii_ioctl -
6313  * @netdev:
6314  * @ifreq:
6315  * @cmd:
6316  **/
6317 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6318 {
6319         struct igb_adapter *adapter = netdev_priv(netdev);
6320         struct mii_ioctl_data *data = if_mii(ifr);
6321
6322         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6323                 return -EOPNOTSUPP;
6324
6325         switch (cmd) {
6326         case SIOCGMIIPHY:
6327                 data->phy_id = adapter->hw.phy.addr;
6328                 break;
6329         case SIOCGMIIREG:
6330                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6331                                      &data->val_out))
6332                         return -EIO;
6333                 break;
6334         case SIOCSMIIREG:
6335         default:
6336                 return -EOPNOTSUPP;
6337         }
6338         return 0;
6339 }
6340
6341 /**
6342  * igb_hwtstamp_ioctl - control hardware time stamping
6343  * @netdev:
6344  * @ifreq:
6345  * @cmd:
6346  *
6347  * Outgoing time stamping can be enabled and disabled. Play nice and
6348  * disable it when requested, although it shouldn't case any overhead
6349  * when no packet needs it. At most one packet in the queue may be
6350  * marked for time stamping, otherwise it would be impossible to tell
6351  * for sure to which packet the hardware time stamp belongs.
6352  *
6353  * Incoming time stamping has to be configured via the hardware
6354  * filters. Not all combinations are supported, in particular event
6355  * type has to be specified. Matching the kind of event packet is
6356  * not supported, with the exception of "all V2 events regardless of
6357  * level 2 or 4".
6358  *
6359  **/
6360 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6361                               struct ifreq *ifr, int cmd)
6362 {
6363         struct igb_adapter *adapter = netdev_priv(netdev);
6364         struct e1000_hw *hw = &adapter->hw;
6365         struct hwtstamp_config config;
6366         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6367         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6368         u32 tsync_rx_cfg = 0;
6369         bool is_l4 = false;
6370         bool is_l2 = false;
6371         u32 regval;
6372
6373         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6374                 return -EFAULT;
6375
6376         /* reserved for future extensions */
6377         if (config.flags)
6378                 return -EINVAL;
6379
6380         switch (config.tx_type) {
6381         case HWTSTAMP_TX_OFF:
6382                 tsync_tx_ctl = 0;
6383         case HWTSTAMP_TX_ON:
6384                 break;
6385         default:
6386                 return -ERANGE;
6387         }
6388
6389         switch (config.rx_filter) {
6390         case HWTSTAMP_FILTER_NONE:
6391                 tsync_rx_ctl = 0;
6392                 break;
6393         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6394         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6395         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6396         case HWTSTAMP_FILTER_ALL:
6397                 /*
6398                  * register TSYNCRXCFG must be set, therefore it is not
6399                  * possible to time stamp both Sync and Delay_Req messages
6400                  * => fall back to time stamping all packets
6401                  */
6402                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6403                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6404                 break;
6405         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6406                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6407                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6408                 is_l4 = true;
6409                 break;
6410         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6411                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6412                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6413                 is_l4 = true;
6414                 break;
6415         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6416         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6417                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6418                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6419                 is_l2 = true;
6420                 is_l4 = true;
6421                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6422                 break;
6423         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6424         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6425                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6426                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6427                 is_l2 = true;
6428                 is_l4 = true;
6429                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6430                 break;
6431         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6432         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6433         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6434                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6435                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6436                 is_l2 = true;
6437                 is_l4 = true;
6438                 break;
6439         default:
6440                 return -ERANGE;
6441         }
6442
6443         if (hw->mac.type == e1000_82575) {
6444                 if (tsync_rx_ctl | tsync_tx_ctl)
6445                         return -EINVAL;
6446                 return 0;
6447         }
6448
6449         /*
6450          * Per-packet timestamping only works if all packets are
6451          * timestamped, so enable timestamping in all packets as
6452          * long as one rx filter was configured.
6453          */
6454         if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6455                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6456                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6457         }
6458
6459         /* enable/disable TX */
6460         regval = rd32(E1000_TSYNCTXCTL);
6461         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6462         regval |= tsync_tx_ctl;
6463         wr32(E1000_TSYNCTXCTL, regval);
6464
6465         /* enable/disable RX */
6466         regval = rd32(E1000_TSYNCRXCTL);
6467         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6468         regval |= tsync_rx_ctl;
6469         wr32(E1000_TSYNCRXCTL, regval);
6470
6471         /* define which PTP packets are time stamped */
6472         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6473
6474         /* define ethertype filter for timestamped packets */
6475         if (is_l2)
6476                 wr32(E1000_ETQF(3),
6477                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6478                                  E1000_ETQF_1588 | /* enable timestamping */
6479                                  ETH_P_1588));     /* 1588 eth protocol type */
6480         else
6481                 wr32(E1000_ETQF(3), 0);
6482
6483 #define PTP_PORT 319
6484         /* L4 Queue Filter[3]: filter by destination port and protocol */
6485         if (is_l4) {
6486                 u32 ftqf = (IPPROTO_UDP /* UDP */
6487                         | E1000_FTQF_VF_BP /* VF not compared */
6488                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6489                         | E1000_FTQF_MASK); /* mask all inputs */
6490                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6491
6492                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6493                 wr32(E1000_IMIREXT(3),
6494                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6495                 if (hw->mac.type == e1000_82576) {
6496                         /* enable source port check */
6497                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6498                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6499                 }
6500                 wr32(E1000_FTQF(3), ftqf);
6501         } else {
6502                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6503         }
6504         wrfl();
6505
6506         adapter->hwtstamp_config = config;
6507
6508         /* clear TX/RX time stamp registers, just to be sure */
6509         regval = rd32(E1000_TXSTMPH);
6510         regval = rd32(E1000_RXSTMPH);
6511
6512         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6513                 -EFAULT : 0;
6514 }
6515
6516 /**
6517  * igb_ioctl -
6518  * @netdev:
6519  * @ifreq:
6520  * @cmd:
6521  **/
6522 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6523 {
6524         switch (cmd) {
6525         case SIOCGMIIPHY:
6526         case SIOCGMIIREG:
6527         case SIOCSMIIREG:
6528                 return igb_mii_ioctl(netdev, ifr, cmd);
6529         case SIOCSHWTSTAMP:
6530                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6531         default:
6532                 return -EOPNOTSUPP;
6533         }
6534 }
6535
6536 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6537 {
6538         struct igb_adapter *adapter = hw->back;
6539         u16 cap_offset;
6540
6541         cap_offset = adapter->pdev->pcie_cap;
6542         if (!cap_offset)
6543                 return -E1000_ERR_CONFIG;
6544
6545         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6546
6547         return 0;
6548 }
6549
6550 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6551 {
6552         struct igb_adapter *adapter = hw->back;
6553         u16 cap_offset;
6554
6555         cap_offset = adapter->pdev->pcie_cap;
6556         if (!cap_offset)
6557                 return -E1000_ERR_CONFIG;
6558
6559         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6560
6561         return 0;
6562 }
6563
6564 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6565 {
6566         struct igb_adapter *adapter = netdev_priv(netdev);
6567         struct e1000_hw *hw = &adapter->hw;
6568         u32 ctrl, rctl;
6569         bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6570
6571         if (enable) {
6572                 /* enable VLAN tag insert/strip */
6573                 ctrl = rd32(E1000_CTRL);
6574                 ctrl |= E1000_CTRL_VME;
6575                 wr32(E1000_CTRL, ctrl);
6576
6577                 /* Disable CFI check */
6578                 rctl = rd32(E1000_RCTL);
6579                 rctl &= ~E1000_RCTL_CFIEN;
6580                 wr32(E1000_RCTL, rctl);
6581         } else {
6582                 /* disable VLAN tag insert/strip */
6583                 ctrl = rd32(E1000_CTRL);
6584                 ctrl &= ~E1000_CTRL_VME;
6585                 wr32(E1000_CTRL, ctrl);
6586         }
6587
6588         igb_rlpml_set(adapter);
6589 }
6590
6591 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6592 {
6593         struct igb_adapter *adapter = netdev_priv(netdev);
6594         struct e1000_hw *hw = &adapter->hw;
6595         int pf_id = adapter->vfs_allocated_count;
6596
6597         /* attempt to add filter to vlvf array */
6598         igb_vlvf_set(adapter, vid, true, pf_id);
6599
6600         /* add the filter since PF can receive vlans w/o entry in vlvf */
6601         igb_vfta_set(hw, vid, true);
6602
6603         set_bit(vid, adapter->active_vlans);
6604
6605         return 0;
6606 }
6607
6608 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6609 {
6610         struct igb_adapter *adapter = netdev_priv(netdev);
6611         struct e1000_hw *hw = &adapter->hw;
6612         int pf_id = adapter->vfs_allocated_count;
6613         s32 err;
6614
6615         /* remove vlan from VLVF table array */
6616         err = igb_vlvf_set(adapter, vid, false, pf_id);
6617
6618         /* if vid was not present in VLVF just remove it from table */
6619         if (err)
6620                 igb_vfta_set(hw, vid, false);
6621
6622         clear_bit(vid, adapter->active_vlans);
6623
6624         return 0;
6625 }
6626
6627 static void igb_restore_vlan(struct igb_adapter *adapter)
6628 {
6629         u16 vid;
6630
6631         igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6632
6633         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6634                 igb_vlan_rx_add_vid(adapter->netdev, vid);
6635 }
6636
6637 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6638 {
6639         struct pci_dev *pdev = adapter->pdev;
6640         struct e1000_mac_info *mac = &adapter->hw.mac;
6641
6642         mac->autoneg = 0;
6643
6644         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6645          * for the switch() below to work */
6646         if ((spd & 1) || (dplx & ~1))
6647                 goto err_inval;
6648
6649         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6650         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6651             spd != SPEED_1000 &&
6652             dplx != DUPLEX_FULL)
6653                 goto err_inval;
6654
6655         switch (spd + dplx) {
6656         case SPEED_10 + DUPLEX_HALF:
6657                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6658                 break;
6659         case SPEED_10 + DUPLEX_FULL:
6660                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6661                 break;
6662         case SPEED_100 + DUPLEX_HALF:
6663                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6664                 break;
6665         case SPEED_100 + DUPLEX_FULL:
6666                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6667                 break;
6668         case SPEED_1000 + DUPLEX_FULL:
6669                 mac->autoneg = 1;
6670                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6671                 break;
6672         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6673         default:
6674                 goto err_inval;
6675         }
6676         return 0;
6677
6678 err_inval:
6679         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6680         return -EINVAL;
6681 }
6682
6683 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6684                           bool runtime)
6685 {
6686         struct net_device *netdev = pci_get_drvdata(pdev);
6687         struct igb_adapter *adapter = netdev_priv(netdev);
6688         struct e1000_hw *hw = &adapter->hw;
6689         u32 ctrl, rctl, status;
6690         u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6691 #ifdef CONFIG_PM
6692         int retval = 0;
6693 #endif
6694
6695         netif_device_detach(netdev);
6696
6697         if (netif_running(netdev))
6698                 __igb_close(netdev, true);
6699
6700         igb_clear_interrupt_scheme(adapter);
6701
6702 #ifdef CONFIG_PM
6703         retval = pci_save_state(pdev);
6704         if (retval)
6705                 return retval;
6706 #endif
6707
6708         status = rd32(E1000_STATUS);
6709         if (status & E1000_STATUS_LU)
6710                 wufc &= ~E1000_WUFC_LNKC;
6711
6712         if (wufc) {
6713                 igb_setup_rctl(adapter);
6714                 igb_set_rx_mode(netdev);
6715
6716                 /* turn on all-multi mode if wake on multicast is enabled */
6717                 if (wufc & E1000_WUFC_MC) {
6718                         rctl = rd32(E1000_RCTL);
6719                         rctl |= E1000_RCTL_MPE;
6720                         wr32(E1000_RCTL, rctl);
6721                 }
6722
6723                 ctrl = rd32(E1000_CTRL);
6724                 /* advertise wake from D3Cold */
6725                 #define E1000_CTRL_ADVD3WUC 0x00100000
6726                 /* phy power management enable */
6727                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6728                 ctrl |= E1000_CTRL_ADVD3WUC;
6729                 wr32(E1000_CTRL, ctrl);
6730
6731                 /* Allow time for pending master requests to run */
6732                 igb_disable_pcie_master(hw);
6733
6734                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6735                 wr32(E1000_WUFC, wufc);
6736         } else {
6737                 wr32(E1000_WUC, 0);
6738                 wr32(E1000_WUFC, 0);
6739         }
6740
6741         *enable_wake = wufc || adapter->en_mng_pt;
6742         if (!*enable_wake)
6743                 igb_power_down_link(adapter);
6744         else
6745                 igb_power_up_link(adapter);
6746
6747         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6748          * would have already happened in close and is redundant. */
6749         igb_release_hw_control(adapter);
6750
6751         pci_disable_device(pdev);
6752
6753         return 0;
6754 }
6755
6756 #ifdef CONFIG_PM
6757 #ifdef CONFIG_PM_SLEEP
6758 static int igb_suspend(struct device *dev)
6759 {
6760         int retval;
6761         bool wake;
6762         struct pci_dev *pdev = to_pci_dev(dev);
6763
6764         retval = __igb_shutdown(pdev, &wake, 0);
6765         if (retval)
6766                 return retval;
6767
6768         if (wake) {
6769                 pci_prepare_to_sleep(pdev);
6770         } else {
6771                 pci_wake_from_d3(pdev, false);
6772                 pci_set_power_state(pdev, PCI_D3hot);
6773         }
6774
6775         return 0;
6776 }
6777 #endif /* CONFIG_PM_SLEEP */
6778
6779 static int igb_resume(struct device *dev)
6780 {
6781         struct pci_dev *pdev = to_pci_dev(dev);
6782         struct net_device *netdev = pci_get_drvdata(pdev);
6783         struct igb_adapter *adapter = netdev_priv(netdev);
6784         struct e1000_hw *hw = &adapter->hw;
6785         u32 err;
6786
6787         pci_set_power_state(pdev, PCI_D0);
6788         pci_restore_state(pdev);
6789         pci_save_state(pdev);
6790
6791         err = pci_enable_device_mem(pdev);
6792         if (err) {
6793                 dev_err(&pdev->dev,
6794                         "igb: Cannot enable PCI device from suspend\n");
6795                 return err;
6796         }
6797         pci_set_master(pdev);
6798
6799         pci_enable_wake(pdev, PCI_D3hot, 0);
6800         pci_enable_wake(pdev, PCI_D3cold, 0);
6801
6802         if (igb_init_interrupt_scheme(adapter)) {
6803                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6804                 return -ENOMEM;
6805         }
6806
6807         igb_reset(adapter);
6808
6809         /* let the f/w know that the h/w is now under the control of the
6810          * driver. */
6811         igb_get_hw_control(adapter);
6812
6813         wr32(E1000_WUS, ~0);
6814
6815         if (netdev->flags & IFF_UP) {
6816                 err = __igb_open(netdev, true);
6817                 if (err)
6818                         return err;
6819         }
6820
6821         netif_device_attach(netdev);
6822         return 0;
6823 }
6824
6825 #ifdef CONFIG_PM_RUNTIME
6826 static int igb_runtime_idle(struct device *dev)
6827 {
6828         struct pci_dev *pdev = to_pci_dev(dev);
6829         struct net_device *netdev = pci_get_drvdata(pdev);
6830         struct igb_adapter *adapter = netdev_priv(netdev);
6831
6832         if (!igb_has_link(adapter))
6833                 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6834
6835         return -EBUSY;
6836 }
6837
6838 static int igb_runtime_suspend(struct device *dev)
6839 {
6840         struct pci_dev *pdev = to_pci_dev(dev);
6841         int retval;
6842         bool wake;
6843
6844         retval = __igb_shutdown(pdev, &wake, 1);
6845         if (retval)
6846                 return retval;
6847
6848         if (wake) {
6849                 pci_prepare_to_sleep(pdev);
6850         } else {
6851                 pci_wake_from_d3(pdev, false);
6852                 pci_set_power_state(pdev, PCI_D3hot);
6853         }
6854
6855         return 0;
6856 }
6857
6858 static int igb_runtime_resume(struct device *dev)
6859 {
6860         return igb_resume(dev);
6861 }
6862 #endif /* CONFIG_PM_RUNTIME */
6863 #endif
6864
6865 static void igb_shutdown(struct pci_dev *pdev)
6866 {
6867         bool wake;
6868
6869         __igb_shutdown(pdev, &wake, 0);
6870
6871         if (system_state == SYSTEM_POWER_OFF) {
6872                 pci_wake_from_d3(pdev, wake);
6873                 pci_set_power_state(pdev, PCI_D3hot);
6874         }
6875 }
6876
6877 #ifdef CONFIG_NET_POLL_CONTROLLER
6878 /*
6879  * Polling 'interrupt' - used by things like netconsole to send skbs
6880  * without having to re-enable interrupts. It's not called while
6881  * the interrupt routine is executing.
6882  */
6883 static void igb_netpoll(struct net_device *netdev)
6884 {
6885         struct igb_adapter *adapter = netdev_priv(netdev);
6886         struct e1000_hw *hw = &adapter->hw;
6887         struct igb_q_vector *q_vector;
6888         int i;
6889
6890         for (i = 0; i < adapter->num_q_vectors; i++) {
6891                 q_vector = adapter->q_vector[i];
6892                 if (adapter->msix_entries)
6893                         wr32(E1000_EIMC, q_vector->eims_value);
6894                 else
6895                         igb_irq_disable(adapter);
6896                 napi_schedule(&q_vector->napi);
6897         }
6898 }
6899 #endif /* CONFIG_NET_POLL_CONTROLLER */
6900
6901 /**
6902  * igb_io_error_detected - called when PCI error is detected
6903  * @pdev: Pointer to PCI device
6904  * @state: The current pci connection state
6905  *
6906  * This function is called after a PCI bus error affecting
6907  * this device has been detected.
6908  */
6909 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6910                                               pci_channel_state_t state)
6911 {
6912         struct net_device *netdev = pci_get_drvdata(pdev);
6913         struct igb_adapter *adapter = netdev_priv(netdev);
6914
6915         netif_device_detach(netdev);
6916
6917         if (state == pci_channel_io_perm_failure)
6918                 return PCI_ERS_RESULT_DISCONNECT;
6919
6920         if (netif_running(netdev))
6921                 igb_down(adapter);
6922         pci_disable_device(pdev);
6923
6924         /* Request a slot slot reset. */
6925         return PCI_ERS_RESULT_NEED_RESET;
6926 }
6927
6928 /**
6929  * igb_io_slot_reset - called after the pci bus has been reset.
6930  * @pdev: Pointer to PCI device
6931  *
6932  * Restart the card from scratch, as if from a cold-boot. Implementation
6933  * resembles the first-half of the igb_resume routine.
6934  */
6935 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6936 {
6937         struct net_device *netdev = pci_get_drvdata(pdev);
6938         struct igb_adapter *adapter = netdev_priv(netdev);
6939         struct e1000_hw *hw = &adapter->hw;
6940         pci_ers_result_t result;
6941         int err;
6942
6943         if (pci_enable_device_mem(pdev)) {
6944                 dev_err(&pdev->dev,
6945                         "Cannot re-enable PCI device after reset.\n");
6946                 result = PCI_ERS_RESULT_DISCONNECT;
6947         } else {
6948                 pci_set_master(pdev);
6949                 pci_restore_state(pdev);
6950                 pci_save_state(pdev);
6951
6952                 pci_enable_wake(pdev, PCI_D3hot, 0);
6953                 pci_enable_wake(pdev, PCI_D3cold, 0);
6954
6955                 igb_reset(adapter);
6956                 wr32(E1000_WUS, ~0);
6957                 result = PCI_ERS_RESULT_RECOVERED;
6958         }
6959
6960         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6961         if (err) {
6962                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6963                         "failed 0x%0x\n", err);
6964                 /* non-fatal, continue */
6965         }
6966
6967         return result;
6968 }
6969
6970 /**
6971  * igb_io_resume - called when traffic can start flowing again.
6972  * @pdev: Pointer to PCI device
6973  *
6974  * This callback is called when the error recovery driver tells us that
6975  * its OK to resume normal operation. Implementation resembles the
6976  * second-half of the igb_resume routine.
6977  */
6978 static void igb_io_resume(struct pci_dev *pdev)
6979 {
6980         struct net_device *netdev = pci_get_drvdata(pdev);
6981         struct igb_adapter *adapter = netdev_priv(netdev);
6982
6983         if (netif_running(netdev)) {
6984                 if (igb_up(adapter)) {
6985                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6986                         return;
6987                 }
6988         }
6989
6990         netif_device_attach(netdev);
6991
6992         /* let the f/w know that the h/w is now under the control of the
6993          * driver. */
6994         igb_get_hw_control(adapter);
6995 }
6996
6997 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6998                              u8 qsel)
6999 {
7000         u32 rar_low, rar_high;
7001         struct e1000_hw *hw = &adapter->hw;
7002
7003         /* HW expects these in little endian so we reverse the byte order
7004          * from network order (big endian) to little endian
7005          */
7006         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
7007                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
7008         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
7009
7010         /* Indicate to hardware the Address is Valid. */
7011         rar_high |= E1000_RAH_AV;
7012
7013         if (hw->mac.type == e1000_82575)
7014                 rar_high |= E1000_RAH_POOL_1 * qsel;
7015         else
7016                 rar_high |= E1000_RAH_POOL_1 << qsel;
7017
7018         wr32(E1000_RAL(index), rar_low);
7019         wrfl();
7020         wr32(E1000_RAH(index), rar_high);
7021         wrfl();
7022 }
7023
7024 static int igb_set_vf_mac(struct igb_adapter *adapter,
7025                           int vf, unsigned char *mac_addr)
7026 {
7027         struct e1000_hw *hw = &adapter->hw;
7028         /* VF MAC addresses start at end of receive addresses and moves
7029          * torwards the first, as a result a collision should not be possible */
7030         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7031
7032         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7033
7034         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7035
7036         return 0;
7037 }
7038
7039 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7040 {
7041         struct igb_adapter *adapter = netdev_priv(netdev);
7042         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7043                 return -EINVAL;
7044         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7045         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7046         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7047                                       " change effective.");
7048         if (test_bit(__IGB_DOWN, &adapter->state)) {
7049                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7050                          " but the PF device is not up.\n");
7051                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7052                          " attempting to use the VF device.\n");
7053         }
7054         return igb_set_vf_mac(adapter, vf, mac);
7055 }
7056
7057 static int igb_link_mbps(int internal_link_speed)
7058 {
7059         switch (internal_link_speed) {
7060         case SPEED_100:
7061                 return 100;
7062         case SPEED_1000:
7063                 return 1000;
7064         default:
7065                 return 0;
7066         }
7067 }
7068
7069 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7070                                   int link_speed)
7071 {
7072         int rf_dec, rf_int;
7073         u32 bcnrc_val;
7074
7075         if (tx_rate != 0) {
7076                 /* Calculate the rate factor values to set */
7077                 rf_int = link_speed / tx_rate;
7078                 rf_dec = (link_speed - (rf_int * tx_rate));
7079                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7080
7081                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7082                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7083                                E1000_RTTBCNRC_RF_INT_MASK);
7084                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7085         } else {
7086                 bcnrc_val = 0;
7087         }
7088
7089         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7090         wr32(E1000_RTTBCNRC, bcnrc_val);
7091 }
7092
7093 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7094 {
7095         int actual_link_speed, i;
7096         bool reset_rate = false;
7097
7098         /* VF TX rate limit was not set or not supported */
7099         if ((adapter->vf_rate_link_speed == 0) ||
7100             (adapter->hw.mac.type != e1000_82576))
7101                 return;
7102
7103         actual_link_speed = igb_link_mbps(adapter->link_speed);
7104         if (actual_link_speed != adapter->vf_rate_link_speed) {
7105                 reset_rate = true;
7106                 adapter->vf_rate_link_speed = 0;
7107                 dev_info(&adapter->pdev->dev,
7108                          "Link speed has been changed. VF Transmit "
7109                          "rate is disabled\n");
7110         }
7111
7112         for (i = 0; i < adapter->vfs_allocated_count; i++) {
7113                 if (reset_rate)
7114                         adapter->vf_data[i].tx_rate = 0;
7115
7116                 igb_set_vf_rate_limit(&adapter->hw, i,
7117                                       adapter->vf_data[i].tx_rate,
7118                                       actual_link_speed);
7119         }
7120 }
7121
7122 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7123 {
7124         struct igb_adapter *adapter = netdev_priv(netdev);
7125         struct e1000_hw *hw = &adapter->hw;
7126         int actual_link_speed;
7127
7128         if (hw->mac.type != e1000_82576)
7129                 return -EOPNOTSUPP;
7130
7131         actual_link_speed = igb_link_mbps(adapter->link_speed);
7132         if ((vf >= adapter->vfs_allocated_count) ||
7133             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7134             (tx_rate < 0) || (tx_rate > actual_link_speed))
7135                 return -EINVAL;
7136
7137         adapter->vf_rate_link_speed = actual_link_speed;
7138         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7139         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7140
7141         return 0;
7142 }
7143
7144 static int igb_ndo_get_vf_config(struct net_device *netdev,
7145                                  int vf, struct ifla_vf_info *ivi)
7146 {
7147         struct igb_adapter *adapter = netdev_priv(netdev);
7148         if (vf >= adapter->vfs_allocated_count)
7149                 return -EINVAL;
7150         ivi->vf = vf;
7151         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7152         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7153         ivi->vlan = adapter->vf_data[vf].pf_vlan;
7154         ivi->qos = adapter->vf_data[vf].pf_qos;
7155         return 0;
7156 }
7157
7158 static void igb_vmm_control(struct igb_adapter *adapter)
7159 {
7160         struct e1000_hw *hw = &adapter->hw;
7161         u32 reg;
7162
7163         switch (hw->mac.type) {
7164         case e1000_82575:
7165         default:
7166                 /* replication is not supported for 82575 */
7167                 return;
7168         case e1000_82576:
7169                 /* notify HW that the MAC is adding vlan tags */
7170                 reg = rd32(E1000_DTXCTL);
7171                 reg |= E1000_DTXCTL_VLAN_ADDED;
7172                 wr32(E1000_DTXCTL, reg);
7173         case e1000_82580:
7174                 /* enable replication vlan tag stripping */
7175                 reg = rd32(E1000_RPLOLR);
7176                 reg |= E1000_RPLOLR_STRVLAN;
7177                 wr32(E1000_RPLOLR, reg);
7178         case e1000_i350:
7179                 /* none of the above registers are supported by i350 */
7180                 break;
7181         }
7182
7183         if (adapter->vfs_allocated_count) {
7184                 igb_vmdq_set_loopback_pf(hw, true);
7185                 igb_vmdq_set_replication_pf(hw, true);
7186                 igb_vmdq_set_anti_spoofing_pf(hw, true,
7187                                                 adapter->vfs_allocated_count);
7188         } else {
7189                 igb_vmdq_set_loopback_pf(hw, false);
7190                 igb_vmdq_set_replication_pf(hw, false);
7191         }
7192 }
7193
7194 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7195 {
7196         struct e1000_hw *hw = &adapter->hw;
7197         u32 dmac_thr;
7198         u16 hwm;
7199
7200         if (hw->mac.type > e1000_82580) {
7201                 if (adapter->flags & IGB_FLAG_DMAC) {
7202                         u32 reg;
7203
7204                         /* force threshold to 0. */
7205                         wr32(E1000_DMCTXTH, 0);
7206
7207                         /*
7208                          * DMA Coalescing high water mark needs to be greater
7209                          * than the Rx threshold. Set hwm to PBA - max frame
7210                          * size in 16B units, capping it at PBA - 6KB.
7211                          */
7212                         hwm = 64 * pba - adapter->max_frame_size / 16;
7213                         if (hwm < 64 * (pba - 6))
7214                                 hwm = 64 * (pba - 6);
7215                         reg = rd32(E1000_FCRTC);
7216                         reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7217                         reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7218                                 & E1000_FCRTC_RTH_COAL_MASK);
7219                         wr32(E1000_FCRTC, reg);
7220
7221                         /*
7222                          * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7223                          * frame size, capping it at PBA - 10KB.
7224                          */
7225                         dmac_thr = pba - adapter->max_frame_size / 512;
7226                         if (dmac_thr < pba - 10)
7227                                 dmac_thr = pba - 10;
7228                         reg = rd32(E1000_DMACR);
7229                         reg &= ~E1000_DMACR_DMACTHR_MASK;
7230                         reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7231                                 & E1000_DMACR_DMACTHR_MASK);
7232
7233                         /* transition to L0x or L1 if available..*/
7234                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7235
7236                         /* watchdog timer= +-1000 usec in 32usec intervals */
7237                         reg |= (1000 >> 5);
7238                         wr32(E1000_DMACR, reg);
7239
7240                         /*
7241                          * no lower threshold to disable
7242                          * coalescing(smart fifb)-UTRESH=0
7243                          */
7244                         wr32(E1000_DMCRTRH, 0);
7245
7246                         reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7247
7248                         wr32(E1000_DMCTLX, reg);
7249
7250                         /*
7251                          * free space in tx packet buffer to wake from
7252                          * DMA coal
7253                          */
7254                         wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7255                              (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7256
7257                         /*
7258                          * make low power state decision controlled
7259                          * by DMA coal
7260                          */
7261                         reg = rd32(E1000_PCIEMISC);
7262                         reg &= ~E1000_PCIEMISC_LX_DECISION;
7263                         wr32(E1000_PCIEMISC, reg);
7264                 } /* endif adapter->dmac is not disabled */
7265         } else if (hw->mac.type == e1000_82580) {
7266                 u32 reg = rd32(E1000_PCIEMISC);
7267                 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7268                 wr32(E1000_DMACR, 0);
7269         }
7270 }
7271
7272 /* igb_main.c */