rt2x00: Serialize TX operations on a queue.
[linux-flexiantxendom0-natty.git] / drivers / net / wireless / rt2x00 / rt2x00queue.c
index 7f908a1..b1a9078 100644 (file)
@@ -1,5 +1,7 @@
 /*
-       Copyright (C) 2004 - 2008 rt2x00 SourceForge Project
+       Copyright (C) 2010 Willow Garage <http://www.willowgarage.com>
+       Copyright (C) 2004 - 2010 Ivo van Doorn <IvDoorn@gmail.com>
+       Copyright (C) 2004 - 2009 Gertjan van Wingerde <gwingerde@gmail.com>
        <http://rt2x00.serialmonkey.com>
 
        This program is free software; you can redistribute it and/or modify
@@ -23,6 +25,7 @@
        Abstract: rt2x00 queue specific routines.
  */
 
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/dma-mapping.h>
@@ -30,9 +33,9 @@
 #include "rt2x00.h"
 #include "rt2x00lib.h"
 
-struct sk_buff *rt2x00queue_alloc_rxskb(struct rt2x00_dev *rt2x00dev,
-                                       struct queue_entry *entry)
+struct sk_buff *rt2x00queue_alloc_rxskb(struct queue_entry *entry)
 {
+       struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
        struct sk_buff *skb;
        struct skb_frame_desc *skbdesc;
        unsigned int frame_size;
@@ -55,14 +58,12 @@ struct sk_buff *rt2x00queue_alloc_rxskb(struct rt2x00_dev *rt2x00dev,
        /*
         * For IV/EIV/ICV assembly we must make sure there is
         * at least 8 bytes bytes available in headroom for IV/EIV
-        * and 4 bytes for ICV data as tailroon.
+        * and 8 bytes for ICV data as tailroon.
         */
-#ifdef CONFIG_RT2X00_LIB_CRYPTO
        if (test_bit(CONFIG_SUPPORT_HW_CRYPTO, &rt2x00dev->flags)) {
                head_size += 8;
-               tail_size += 4;
+               tail_size += 8;
        }
-#endif /* CONFIG_RT2X00_LIB_CRYPTO */
 
        /*
         * Allocate skbuffer.
@@ -96,121 +97,235 @@ struct sk_buff *rt2x00queue_alloc_rxskb(struct rt2x00_dev *rt2x00dev,
        return skb;
 }
 
-void rt2x00queue_map_txskb(struct rt2x00_dev *rt2x00dev, struct sk_buff *skb)
+void rt2x00queue_map_txskb(struct queue_entry *entry)
 {
-       struct skb_frame_desc *skbdesc = get_skb_frame_desc(skb);
-
-       /*
-        * If device has requested headroom, we should make sure that
-        * is also mapped to the DMA so it can be used for transfering
-        * additional descriptor information to the hardware.
-        */
-       skb_push(skb, rt2x00dev->hw->extra_tx_headroom);
+       struct device *dev = entry->queue->rt2x00dev->dev;
+       struct skb_frame_desc *skbdesc = get_skb_frame_desc(entry->skb);
 
        skbdesc->skb_dma =
-           dma_map_single(rt2x00dev->dev, skb->data, skb->len, DMA_TO_DEVICE);
-
-       /*
-        * Restore data pointer to original location again.
-        */
-       skb_pull(skb, rt2x00dev->hw->extra_tx_headroom);
-
+           dma_map_single(dev, entry->skb->data, entry->skb->len, DMA_TO_DEVICE);
        skbdesc->flags |= SKBDESC_DMA_MAPPED_TX;
 }
 EXPORT_SYMBOL_GPL(rt2x00queue_map_txskb);
 
-void rt2x00queue_unmap_skb(struct rt2x00_dev *rt2x00dev, struct sk_buff *skb)
+void rt2x00queue_unmap_skb(struct queue_entry *entry)
 {
-       struct skb_frame_desc *skbdesc = get_skb_frame_desc(skb);
+       struct device *dev = entry->queue->rt2x00dev->dev;
+       struct skb_frame_desc *skbdesc = get_skb_frame_desc(entry->skb);
 
        if (skbdesc->flags & SKBDESC_DMA_MAPPED_RX) {
-               dma_unmap_single(rt2x00dev->dev, skbdesc->skb_dma, skb->len,
+               dma_unmap_single(dev, skbdesc->skb_dma, entry->skb->len,
                                 DMA_FROM_DEVICE);
                skbdesc->flags &= ~SKBDESC_DMA_MAPPED_RX;
-       }
-
-       if (skbdesc->flags & SKBDESC_DMA_MAPPED_TX) {
-               /*
-                * Add headroom to the skb length, it has been removed
-                * by the driver, but it was actually mapped to DMA.
-                */
-               dma_unmap_single(rt2x00dev->dev, skbdesc->skb_dma,
-                                skb->len + rt2x00dev->hw->extra_tx_headroom,
+       } else if (skbdesc->flags & SKBDESC_DMA_MAPPED_TX) {
+               dma_unmap_single(dev, skbdesc->skb_dma, entry->skb->len,
                                 DMA_TO_DEVICE);
                skbdesc->flags &= ~SKBDESC_DMA_MAPPED_TX;
        }
 }
+EXPORT_SYMBOL_GPL(rt2x00queue_unmap_skb);
 
-void rt2x00queue_free_skb(struct rt2x00_dev *rt2x00dev, struct sk_buff *skb)
+void rt2x00queue_free_skb(struct queue_entry *entry)
 {
-       if (!skb)
+       if (!entry->skb)
                return;
 
-       rt2x00queue_unmap_skb(rt2x00dev, skb);
-       dev_kfree_skb_any(skb);
+       rt2x00queue_unmap_skb(entry);
+       dev_kfree_skb_any(entry->skb);
+       entry->skb = NULL;
 }
 
-static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
-                                            struct txentry_desc *txdesc)
+void rt2x00queue_align_frame(struct sk_buff *skb)
+{
+       unsigned int frame_length = skb->len;
+       unsigned int align = ALIGN_SIZE(skb, 0);
+
+       if (!align)
+               return;
+
+       skb_push(skb, align);
+       memmove(skb->data, skb->data + align, frame_length);
+       skb_trim(skb, frame_length);
+}
+
+void rt2x00queue_align_payload(struct sk_buff *skb, unsigned int header_length)
+{
+       unsigned int frame_length = skb->len;
+       unsigned int align = ALIGN_SIZE(skb, header_length);
+
+       if (!align)
+               return;
+
+       skb_push(skb, align);
+       memmove(skb->data, skb->data + align, frame_length);
+       skb_trim(skb, frame_length);
+}
+
+void rt2x00queue_insert_l2pad(struct sk_buff *skb, unsigned int header_length)
+{
+       unsigned int payload_length = skb->len - header_length;
+       unsigned int header_align = ALIGN_SIZE(skb, 0);
+       unsigned int payload_align = ALIGN_SIZE(skb, header_length);
+       unsigned int l2pad = payload_length ? L2PAD_SIZE(header_length) : 0;
+
+       /*
+        * Adjust the header alignment if the payload needs to be moved more
+        * than the header.
+        */
+       if (payload_align > header_align)
+               header_align += 4;
+
+       /* There is nothing to do if no alignment is needed */
+       if (!header_align)
+               return;
+
+       /* Reserve the amount of space needed in front of the frame */
+       skb_push(skb, header_align);
+
+       /*
+        * Move the header.
+        */
+       memmove(skb->data, skb->data + header_align, header_length);
+
+       /* Move the payload, if present and if required */
+       if (payload_length && payload_align)
+               memmove(skb->data + header_length + l2pad,
+                       skb->data + header_length + l2pad + payload_align,
+                       payload_length);
+
+       /* Trim the skb to the correct size */
+       skb_trim(skb, header_length + l2pad + payload_length);
+}
+
+void rt2x00queue_remove_l2pad(struct sk_buff *skb, unsigned int header_length)
+{
+       /*
+        * L2 padding is only present if the skb contains more than just the
+        * IEEE 802.11 header.
+        */
+       unsigned int l2pad = (skb->len > header_length) ?
+                               L2PAD_SIZE(header_length) : 0;
+
+       if (!l2pad)
+               return;
+
+       memmove(skb->data + l2pad, skb->data, header_length);
+       skb_pull(skb, l2pad);
+}
+
+static void rt2x00queue_create_tx_descriptor_seq(struct queue_entry *entry,
+                                                struct txentry_desc *txdesc)
 {
-       struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
        struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(entry->skb);
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)entry->skb->data;
-       struct ieee80211_rate *rate =
-           ieee80211_get_tx_rate(rt2x00dev->hw, tx_info);
-       const struct rt2x00_rate *hwrate;
-       unsigned int data_length;
-       unsigned int duration;
-       unsigned int residual;
+       struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif);
        unsigned long irqflags;
 
-       memset(txdesc, 0, sizeof(*txdesc));
+       if (!(tx_info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) ||
+           unlikely(!tx_info->control.vif))
+               return;
 
        /*
-        * Initialize information from queue
+        * Hardware should insert sequence counter.
+        * FIXME: We insert a software sequence counter first for
+        * hardware that doesn't support hardware sequence counting.
+        *
+        * This is wrong because beacons are not getting sequence
+        * numbers assigned properly.
+        *
+        * A secondary problem exists for drivers that cannot toggle
+        * sequence counting per-frame, since those will override the
+        * sequence counter given by mac80211.
         */
-       txdesc->queue = entry->queue->qid;
-       txdesc->cw_min = entry->queue->cw_min;
-       txdesc->cw_max = entry->queue->cw_max;
-       txdesc->aifs = entry->queue->aifs;
+       spin_lock_irqsave(&intf->seqlock, irqflags);
 
-       /* Data length + CRC + IV/EIV/ICV/MMIC (when using encryption) */
+       if (test_bit(ENTRY_TXD_FIRST_FRAGMENT, &txdesc->flags))
+               intf->seqno += 0x10;
+       hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG);
+       hdr->seq_ctrl |= cpu_to_le16(intf->seqno);
+
+       spin_unlock_irqrestore(&intf->seqlock, irqflags);
+
+       __set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
+}
+
+static void rt2x00queue_create_tx_descriptor_plcp(struct queue_entry *entry,
+                                                 struct txentry_desc *txdesc,
+                                                 const struct rt2x00_rate *hwrate)
+{
+       struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
+       struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(entry->skb);
+       struct ieee80211_tx_rate *txrate = &tx_info->control.rates[0];
+       unsigned int data_length;
+       unsigned int duration;
+       unsigned int residual;
+
+       /* Data length + CRC + Crypto overhead (IV/EIV/ICV/MIC) */
        data_length = entry->skb->len + 4;
+       data_length += rt2x00crypto_tx_overhead(rt2x00dev, entry->skb);
 
        /*
-        * Check whether this frame is to be acked.
+        * PLCP setup
+        * Length calculation depends on OFDM/CCK rate.
         */
-       if (!(tx_info->flags & IEEE80211_TX_CTL_NO_ACK))
-               __set_bit(ENTRY_TXD_ACK, &txdesc->flags);
-
-#ifdef CONFIG_RT2X00_LIB_CRYPTO
-       if (test_bit(CONFIG_SUPPORT_HW_CRYPTO, &rt2x00dev->flags) &&
-           !entry->skb->do_not_encrypt) {
-               struct ieee80211_key_conf *hw_key = tx_info->control.hw_key;
+       txdesc->signal = hwrate->plcp;
+       txdesc->service = 0x04;
 
-               __set_bit(ENTRY_TXD_ENCRYPT, &txdesc->flags);
+       if (hwrate->flags & DEV_RATE_OFDM) {
+               txdesc->length_high = (data_length >> 6) & 0x3f;
+               txdesc->length_low = data_length & 0x3f;
+       } else {
+               /*
+                * Convert length to microseconds.
+                */
+               residual = GET_DURATION_RES(data_length, hwrate->bitrate);
+               duration = GET_DURATION(data_length, hwrate->bitrate);
 
-               txdesc->cipher = rt2x00crypto_key_to_cipher(hw_key);
+               if (residual != 0) {
+                       duration++;
 
-               if (hw_key->flags & IEEE80211_KEY_FLAG_PAIRWISE)
-                       __set_bit(ENTRY_TXD_ENCRYPT_PAIRWISE, &txdesc->flags);
+                       /*
+                        * Check if we need to set the Length Extension
+                        */
+                       if (hwrate->bitrate == 110 && residual <= 30)
+                               txdesc->service |= 0x80;
+               }
 
-               txdesc->key_idx = hw_key->hw_key_idx;
-               txdesc->iv_offset = ieee80211_get_hdrlen_from_skb(entry->skb);
+               txdesc->length_high = (duration >> 8) & 0xff;
+               txdesc->length_low = duration & 0xff;
 
                /*
-                * Extend frame length to include all encryption overhead
-                * that will be added by the hardware.
+                * When preamble is enabled we should set the
+                * preamble bit for the signal.
                 */
-               data_length += rt2x00crypto_tx_overhead(tx_info);
+               if (txrate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)
+                       txdesc->signal |= 0x08;
+       }
+}
 
-               if (!(hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV))
-                       __set_bit(ENTRY_TXD_ENCRYPT_IV, &txdesc->flags);
+static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
+                                            struct txentry_desc *txdesc)
+{
+       struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
+       struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(entry->skb);
+       struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)entry->skb->data;
+       struct ieee80211_rate *rate =
+           ieee80211_get_tx_rate(rt2x00dev->hw, tx_info);
+       const struct rt2x00_rate *hwrate;
 
-               if (!(hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_MMIC))
-                       __set_bit(ENTRY_TXD_ENCRYPT_MMIC, &txdesc->flags);
-       }
-#endif /* CONFIG_RT2X00_LIB_CRYPTO */
+       memset(txdesc, 0, sizeof(*txdesc));
+
+       /*
+        * Header and frame information.
+        */
+       txdesc->length = entry->skb->len;
+       txdesc->header_length = ieee80211_get_hdrlen_from_skb(entry->skb);
+
+       /*
+        * Check whether this frame is to be acked.
+        */
+       if (!(tx_info->flags & IEEE80211_TX_CTL_NO_ACK))
+               __set_bit(ENTRY_TXD_ACK, &txdesc->flags);
 
        /*
         * Check if this is a RTS/CTS frame
@@ -243,11 +358,20 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
        }
 
        /*
+        * Check if more frames (!= fragments) are pending
+        */
+       if (tx_info->flags & IEEE80211_TX_CTL_MORE_FRAMES)
+               __set_bit(ENTRY_TXD_BURST, &txdesc->flags);
+
+       /*
         * Beacons and probe responses require the tsf timestamp
-        * to be inserted into the frame.
+        * to be inserted into the frame, except for a frame that has been injected
+        * through a monitor interface. This latter is needed for testing a
+        * monitor interface.
         */
-       if (ieee80211_is_beacon(hdr->frame_control) ||
-           ieee80211_is_probe_resp(hdr->frame_control))
+       if ((ieee80211_is_beacon(hdr->frame_control) ||
+           ieee80211_is_probe_resp(hdr->frame_control)) &&
+           (!(tx_info->flags & IEEE80211_TX_CTL_INJECTED)))
                __set_bit(ENTRY_TXD_REQ_TIMESTAMP, &txdesc->flags);
 
        /*
@@ -255,138 +379,130 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
         * Set ifs to IFS_SIFS when the this is not the first fragment,
         * or this fragment came after RTS/CTS.
         */
-       if (test_bit(ENTRY_TXD_RTS_FRAME, &txdesc->flags)) {
-               txdesc->ifs = IFS_SIFS;
-       } else if (tx_info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT) {
+       if ((tx_info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT) &&
+           !test_bit(ENTRY_TXD_RTS_FRAME, &txdesc->flags)) {
                __set_bit(ENTRY_TXD_FIRST_FRAGMENT, &txdesc->flags);
                txdesc->ifs = IFS_BACKOFF;
-       } else {
+       } else
                txdesc->ifs = IFS_SIFS;
-       }
 
        /*
-        * Hardware should insert sequence counter.
-        * FIXME: We insert a software sequence counter first for
-        * hardware that doesn't support hardware sequence counting.
-        *
-        * This is wrong because beacons are not getting sequence
-        * numbers assigned properly.
-        *
-        * A secondary problem exists for drivers that cannot toggle
-        * sequence counting per-frame, since those will override the
-        * sequence counter given by mac80211.
+        * Determine rate modulation.
         */
-       if (tx_info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) {
-               if (likely(tx_info->control.vif)) {
-                       struct rt2x00_intf *intf;
-
-                       intf = vif_to_intf(tx_info->control.vif);
-
-                       spin_lock_irqsave(&intf->seqlock, irqflags);
+       hwrate = rt2x00_get_rate(rate->hw_value);
+       txdesc->rate_mode = RATE_MODE_CCK;
+       if (hwrate->flags & DEV_RATE_OFDM)
+               txdesc->rate_mode = RATE_MODE_OFDM;
 
-                       if (test_bit(ENTRY_TXD_FIRST_FRAGMENT, &txdesc->flags))
-                               intf->seqno += 0x10;
-                       hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG);
-                       hdr->seq_ctrl |= cpu_to_le16(intf->seqno);
+       /*
+        * Apply TX descriptor handling by components
+        */
+       rt2x00crypto_create_tx_descriptor(entry, txdesc);
+       rt2x00ht_create_tx_descriptor(entry, txdesc, hwrate);
+       rt2x00queue_create_tx_descriptor_seq(entry, txdesc);
+       rt2x00queue_create_tx_descriptor_plcp(entry, txdesc, hwrate);
+}
 
-                       spin_unlock_irqrestore(&intf->seqlock, irqflags);
+static int rt2x00queue_write_tx_data(struct queue_entry *entry,
+                                    struct txentry_desc *txdesc)
+{
+       struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
 
-                       __set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
-               }
+       /*
+        * This should not happen, we already checked the entry
+        * was ours. When the hardware disagrees there has been
+        * a queue corruption!
+        */
+       if (unlikely(rt2x00dev->ops->lib->get_entry_state &&
+                    rt2x00dev->ops->lib->get_entry_state(entry))) {
+               ERROR(rt2x00dev,
+                     "Corrupt queue %d, accessing entry which is not ours.\n"
+                     "Please file bug report to %s.\n",
+                     entry->queue->qid, DRV_PROJECT);
+               return -EINVAL;
        }
 
        /*
-        * PLCP setup
-        * Length calculation depends on OFDM/CCK rate.
+        * Add the requested extra tx headroom in front of the skb.
         */
-       hwrate = rt2x00_get_rate(rate->hw_value);
-       txdesc->signal = hwrate->plcp;
-       txdesc->service = 0x04;
-
-       if (hwrate->flags & DEV_RATE_OFDM) {
-               __set_bit(ENTRY_TXD_OFDM_RATE, &txdesc->flags);
-
-               txdesc->length_high = (data_length >> 6) & 0x3f;
-               txdesc->length_low = data_length & 0x3f;
-       } else {
-               /*
-                * Convert length to microseconds.
-                */
-               residual = GET_DURATION_RES(data_length, hwrate->bitrate);
-               duration = GET_DURATION(data_length, hwrate->bitrate);
+       skb_push(entry->skb, rt2x00dev->ops->extra_tx_headroom);
+       memset(entry->skb->data, 0, rt2x00dev->ops->extra_tx_headroom);
 
-               if (residual != 0) {
-                       duration++;
-
-                       /*
-                        * Check if we need to set the Length Extension
-                        */
-                       if (hwrate->bitrate == 110 && residual <= 30)
-                               txdesc->service |= 0x80;
-               }
+       /*
+        * Call the driver's write_tx_data function, if it exists.
+        */
+       if (rt2x00dev->ops->lib->write_tx_data)
+               rt2x00dev->ops->lib->write_tx_data(entry, txdesc);
 
-               txdesc->length_high = (duration >> 8) & 0xff;
-               txdesc->length_low = duration & 0xff;
+       /*
+        * Map the skb to DMA.
+        */
+       if (test_bit(DRIVER_REQUIRE_DMA, &rt2x00dev->flags))
+               rt2x00queue_map_txskb(entry);
 
-               /*
-                * When preamble is enabled we should set the
-                * preamble bit for the signal.
-                */
-               if (rt2x00_get_rate_preamble(rate->hw_value))
-                       txdesc->signal |= 0x08;
-       }
+       return 0;
 }
 
 static void rt2x00queue_write_tx_descriptor(struct queue_entry *entry,
                                            struct txentry_desc *txdesc)
 {
        struct data_queue *queue = entry->queue;
-       struct rt2x00_dev *rt2x00dev = queue->rt2x00dev;
 
-       rt2x00dev->ops->lib->write_tx_desc(rt2x00dev, entry->skb, txdesc);
+       queue->rt2x00dev->ops->lib->write_tx_desc(entry, txdesc);
 
        /*
         * All processing on the frame has been completed, this means
         * it is now ready to be dumped to userspace through debugfs.
         */
-       rt2x00debug_dump_frame(rt2x00dev, DUMP_FRAME_TX, entry->skb);
+       rt2x00debug_dump_frame(queue->rt2x00dev, DUMP_FRAME_TX, entry->skb);
+}
 
+static void rt2x00queue_kick_tx_queue(struct data_queue *queue,
+                                     struct txentry_desc *txdesc)
+{
        /*
         * Check if we need to kick the queue, there are however a few rules
-        *      1) Don't kick beacon queue
-        *      2) Don't kick unless this is the last in frame in a burst.
+        *      1) Don't kick unless this is the last in frame in a burst.
         *         When the burst flag is set, this frame is always followed
         *         by another frame which in some way are related to eachother.
         *         This is true for fragments, RTS or CTS-to-self frames.
-        *      3) Rule 2 can be broken when the available entries
+        *      2) Rule 1 can be broken when the available entries
         *         in the queue are less then a certain threshold.
         */
-       if (entry->queue->qid == QID_BEACON)
-               return;
-
        if (rt2x00queue_threshold(queue) ||
            !test_bit(ENTRY_TXD_BURST, &txdesc->flags))
-               rt2x00dev->ops->lib->kick_tx_queue(rt2x00dev, queue->qid);
+               queue->rt2x00dev->ops->lib->kick_queue(queue);
 }
 
-int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb)
+int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb,
+                              bool local)
 {
        struct ieee80211_tx_info *tx_info;
-       struct queue_entry *entry = rt2x00queue_get_entry(queue, Q_INDEX);
+       struct queue_entry *entry;
        struct txentry_desc txdesc;
        struct skb_frame_desc *skbdesc;
-       unsigned int iv_len = 0;
        u8 rate_idx, rate_flags;
+       int ret = 0;
 
-       if (unlikely(rt2x00queue_full(queue)))
-               return -ENOBUFS;
+       spin_lock(&queue->tx_lock);
 
-       if (test_and_set_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags)) {
+       entry = rt2x00queue_get_entry(queue, Q_INDEX);
+
+       if (unlikely(rt2x00queue_full(queue))) {
+               ERROR(queue->rt2x00dev,
+                     "Dropping frame due to full tx queue %d.\n", queue->qid);
+               ret = -ENOBUFS;
+               goto out;
+       }
+
+       if (unlikely(test_and_set_bit(ENTRY_OWNER_DEVICE_DATA,
+                                     &entry->flags))) {
                ERROR(queue->rt2x00dev,
                      "Arrived at non-free entry in the non-full queue %d.\n"
                      "Please file bug report to %s.\n",
                      queue->qid, DRV_PROJECT);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out;
        }
 
        /*
@@ -397,9 +513,6 @@ int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb)
        entry->skb = skb;
        rt2x00queue_create_tx_descriptor(entry, &txdesc);
 
-       if (IEEE80211_SKB_CB(skb)->control.hw_key != NULL)
-               iv_len = IEEE80211_SKB_CB(skb)->control.hw_key->iv_len;
-
        /*
         * All information is retrieved from the skb->cb array,
         * now we should claim ownership of the driver part of that
@@ -414,51 +527,87 @@ int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb)
        skbdesc->tx_rate_idx = rate_idx;
        skbdesc->tx_rate_flags = rate_flags;
 
+       if (local)
+               skbdesc->flags |= SKBDESC_NOT_MAC80211;
+
        /*
         * When hardware encryption is supported, and this frame
         * is to be encrypted, we should strip the IV/EIV data from
-        * the frame so we can provide it to the driver seperately.
+        * the frame so we can provide it to the driver separately.
         */
        if (test_bit(ENTRY_TXD_ENCRYPT, &txdesc.flags) &&
-           !test_bit(ENTRY_TXD_ENCRYPT_IV, &txdesc.flags))
-               rt2x00crypto_tx_remove_iv(skb, iv_len);
+           !test_bit(ENTRY_TXD_ENCRYPT_IV, &txdesc.flags)) {
+               if (test_bit(DRIVER_REQUIRE_COPY_IV, &queue->rt2x00dev->flags))
+                       rt2x00crypto_tx_copy_iv(skb, &txdesc);
+               else
+                       rt2x00crypto_tx_remove_iv(skb, &txdesc);
+       }
+
+       /*
+        * When DMA allocation is required we should guarentee to the
+        * driver that the DMA is aligned to a 4-byte boundary.
+        * However some drivers require L2 padding to pad the payload
+        * rather then the header. This could be a requirement for
+        * PCI and USB devices, while header alignment only is valid
+        * for PCI devices.
+        */
+       if (test_bit(DRIVER_REQUIRE_L2PAD, &queue->rt2x00dev->flags))
+               rt2x00queue_insert_l2pad(entry->skb, txdesc.header_length);
+       else if (test_bit(DRIVER_REQUIRE_DMA, &queue->rt2x00dev->flags))
+               rt2x00queue_align_frame(entry->skb);
 
        /*
         * It could be possible that the queue was corrupted and this
         * call failed. Since we always return NETDEV_TX_OK to mac80211,
         * this frame will simply be dropped.
         */
-       if (unlikely(queue->rt2x00dev->ops->lib->write_tx_data(entry))) {
+       if (unlikely(rt2x00queue_write_tx_data(entry, &txdesc))) {
                clear_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags);
                entry->skb = NULL;
-               return -EIO;
+               ret = -EIO;
+               goto out;
        }
 
-       if (test_bit(DRIVER_REQUIRE_DMA, &queue->rt2x00dev->flags))
-               rt2x00queue_map_txskb(queue->rt2x00dev, skb);
-
        set_bit(ENTRY_DATA_PENDING, &entry->flags);
 
        rt2x00queue_index_inc(queue, Q_INDEX);
        rt2x00queue_write_tx_descriptor(entry, &txdesc);
+       rt2x00queue_kick_tx_queue(queue, &txdesc);
 
-       return 0;
+out:
+       spin_unlock(&queue->tx_lock);
+       return ret;
 }
 
 int rt2x00queue_update_beacon(struct rt2x00_dev *rt2x00dev,
-                             struct ieee80211_vif *vif)
+                             struct ieee80211_vif *vif,
+                             const bool enable_beacon)
 {
        struct rt2x00_intf *intf = vif_to_intf(vif);
        struct skb_frame_desc *skbdesc;
        struct txentry_desc txdesc;
-       __le32 desc[16];
 
        if (unlikely(!intf->beacon))
                return -ENOBUFS;
 
+       mutex_lock(&intf->beacon_skb_mutex);
+
+       /*
+        * Clean up the beacon skb.
+        */
+       rt2x00queue_free_skb(intf->beacon);
+
+       if (!enable_beacon) {
+               rt2x00queue_stop_queue(intf->beacon->queue);
+               mutex_unlock(&intf->beacon_skb_mutex);
+               return 0;
+       }
+
        intf->beacon->skb = ieee80211_beacon_get(rt2x00dev->hw, vif);
-       if (!intf->beacon->skb)
+       if (!intf->beacon->skb) {
+               mutex_unlock(&intf->beacon_skb_mutex);
                return -ENOMEM;
+       }
 
        /*
         * Copy all TX descriptor information into txdesc,
@@ -468,42 +617,75 @@ int rt2x00queue_update_beacon(struct rt2x00_dev *rt2x00dev,
        rt2x00queue_create_tx_descriptor(intf->beacon, &txdesc);
 
        /*
-        * For the descriptor we use a local array from where the
-        * driver can move it to the correct location required for
-        * the hardware.
-        */
-       memset(desc, 0, sizeof(desc));
-
-       /*
         * Fill in skb descriptor
         */
        skbdesc = get_skb_frame_desc(intf->beacon->skb);
        memset(skbdesc, 0, sizeof(*skbdesc));
-       skbdesc->desc = desc;
-       skbdesc->desc_len = intf->beacon->queue->desc_size;
        skbdesc->entry = intf->beacon;
 
        /*
-        * Write TX descriptor into reserved room in front of the beacon.
+        * Send beacon to hardware and enable beacon genaration..
         */
-       rt2x00queue_write_tx_descriptor(intf->beacon, &txdesc);
+       rt2x00dev->ops->lib->write_beacon(intf->beacon, &txdesc);
+
+       mutex_unlock(&intf->beacon_skb_mutex);
+
+       return 0;
+}
+
+void rt2x00queue_for_each_entry(struct data_queue *queue,
+                               enum queue_index start,
+                               enum queue_index end,
+                               void (*fn)(struct queue_entry *entry))
+{
+       unsigned long irqflags;
+       unsigned int index_start;
+       unsigned int index_end;
+       unsigned int i;
+
+       if (unlikely(start >= Q_INDEX_MAX || end >= Q_INDEX_MAX)) {
+               ERROR(queue->rt2x00dev,
+                     "Entry requested from invalid index range (%d - %d)\n",
+                     start, end);
+               return;
+       }
 
        /*
-        * Send beacon to hardware.
-        * Also enable beacon generation, which might have been disabled
-        * by the driver during the config_beacon() callback function.
+        * Only protect the range we are going to loop over,
+        * if during our loop a extra entry is set to pending
+        * it should not be kicked during this run, since it
+        * is part of another TX operation.
         */
-       rt2x00dev->ops->lib->write_beacon(intf->beacon);
-       rt2x00dev->ops->lib->kick_tx_queue(rt2x00dev, QID_BEACON);
+       spin_lock_irqsave(&queue->index_lock, irqflags);
+       index_start = queue->index[start];
+       index_end = queue->index[end];
+       spin_unlock_irqrestore(&queue->index_lock, irqflags);
 
-       return 0;
+       /*
+        * Start from the TX done pointer, this guarentees that we will
+        * send out all frames in the correct order.
+        */
+       if (index_start < index_end) {
+               for (i = index_start; i < index_end; i++)
+                       fn(&queue->entries[i]);
+       } else {
+               for (i = index_start; i < queue->limit; i++)
+                       fn(&queue->entries[i]);
+
+               for (i = 0; i < index_end; i++)
+                       fn(&queue->entries[i]);
+       }
 }
+EXPORT_SYMBOL_GPL(rt2x00queue_for_each_entry);
 
 struct data_queue *rt2x00queue_get_queue(struct rt2x00_dev *rt2x00dev,
                                         const enum data_queue_qid queue)
 {
        int atim = test_bit(DRIVER_REQUIRE_ATIM_QUEUE, &rt2x00dev->flags);
 
+       if (queue == QID_RX)
+               return rt2x00dev->rx;
+
        if (queue < rt2x00dev->ops->tx_queues && rt2x00dev->tx)
                return &rt2x00dev->tx[queue];
 
@@ -531,11 +713,11 @@ struct queue_entry *rt2x00queue_get_entry(struct data_queue *queue,
                return NULL;
        }
 
-       spin_lock_irqsave(&queue->lock, irqflags);
+       spin_lock_irqsave(&queue->index_lock, irqflags);
 
        entry = &queue->entries[queue->index[index]];
 
-       spin_unlock_irqrestore(&queue->lock, irqflags);
+       spin_unlock_irqrestore(&queue->index_lock, irqflags);
 
        return entry;
 }
@@ -551,12 +733,14 @@ void rt2x00queue_index_inc(struct data_queue *queue, enum queue_index index)
                return;
        }
 
-       spin_lock_irqsave(&queue->lock, irqflags);
+       spin_lock_irqsave(&queue->index_lock, irqflags);
 
        queue->index[index]++;
        if (queue->index[index] >= queue->limit)
                queue->index[index] = 0;
 
+       queue->last_action[index] = jiffies;
+
        if (index == Q_INDEX) {
                queue->length++;
        } else if (index == Q_INDEX_DONE) {
@@ -564,20 +748,229 @@ void rt2x00queue_index_inc(struct data_queue *queue, enum queue_index index)
                queue->count++;
        }
 
-       spin_unlock_irqrestore(&queue->lock, irqflags);
+       spin_unlock_irqrestore(&queue->index_lock, irqflags);
+}
+
+void rt2x00queue_pause_queue(struct data_queue *queue)
+{
+       if (!test_bit(DEVICE_STATE_PRESENT, &queue->rt2x00dev->flags) ||
+           !test_bit(QUEUE_STARTED, &queue->flags) ||
+           test_and_set_bit(QUEUE_PAUSED, &queue->flags))
+               return;
+
+       switch (queue->qid) {
+       case QID_AC_VO:
+       case QID_AC_VI:
+       case QID_AC_BE:
+       case QID_AC_BK:
+               /*
+                * For TX queues, we have to disable the queue
+                * inside mac80211.
+                */
+               ieee80211_stop_queue(queue->rt2x00dev->hw, queue->qid);
+               break;
+       default:
+               break;
+       }
+}
+EXPORT_SYMBOL_GPL(rt2x00queue_pause_queue);
+
+void rt2x00queue_unpause_queue(struct data_queue *queue)
+{
+       if (!test_bit(DEVICE_STATE_PRESENT, &queue->rt2x00dev->flags) ||
+           !test_bit(QUEUE_STARTED, &queue->flags) ||
+           !test_and_clear_bit(QUEUE_PAUSED, &queue->flags))
+               return;
+
+       switch (queue->qid) {
+       case QID_AC_VO:
+       case QID_AC_VI:
+       case QID_AC_BE:
+       case QID_AC_BK:
+               /*
+                * For TX queues, we have to enable the queue
+                * inside mac80211.
+                */
+               ieee80211_wake_queue(queue->rt2x00dev->hw, queue->qid);
+               break;
+       case QID_RX:
+               /*
+                * For RX we need to kick the queue now in order to
+                * receive frames.
+                */
+               queue->rt2x00dev->ops->lib->kick_queue(queue);
+       default:
+               break;
+       }
+}
+EXPORT_SYMBOL_GPL(rt2x00queue_unpause_queue);
+
+void rt2x00queue_start_queue(struct data_queue *queue)
+{
+       mutex_lock(&queue->status_lock);
+
+       if (!test_bit(DEVICE_STATE_PRESENT, &queue->rt2x00dev->flags) ||
+           test_and_set_bit(QUEUE_STARTED, &queue->flags)) {
+               mutex_unlock(&queue->status_lock);
+               return;
+       }
+
+       set_bit(QUEUE_PAUSED, &queue->flags);
+
+       queue->rt2x00dev->ops->lib->start_queue(queue);
+
+       rt2x00queue_unpause_queue(queue);
+
+       mutex_unlock(&queue->status_lock);
+}
+EXPORT_SYMBOL_GPL(rt2x00queue_start_queue);
+
+void rt2x00queue_stop_queue(struct data_queue *queue)
+{
+       mutex_lock(&queue->status_lock);
+
+       if (!test_and_clear_bit(QUEUE_STARTED, &queue->flags)) {
+               mutex_unlock(&queue->status_lock);
+               return;
+       }
+
+       rt2x00queue_pause_queue(queue);
+
+       queue->rt2x00dev->ops->lib->stop_queue(queue);
+
+       mutex_unlock(&queue->status_lock);
+}
+EXPORT_SYMBOL_GPL(rt2x00queue_stop_queue);
+
+void rt2x00queue_flush_queue(struct data_queue *queue, bool drop)
+{
+       unsigned int i;
+       bool started;
+       bool tx_queue =
+               (queue->qid == QID_AC_VO) ||
+               (queue->qid == QID_AC_VI) ||
+               (queue->qid == QID_AC_BE) ||
+               (queue->qid == QID_AC_BK);
+
+       mutex_lock(&queue->status_lock);
+
+       /*
+        * If the queue has been started, we must stop it temporarily
+        * to prevent any new frames to be queued on the device. If
+        * we are not dropping the pending frames, the queue must
+        * only be stopped in the software and not the hardware,
+        * otherwise the queue will never become empty on its own.
+        */
+       started = test_bit(QUEUE_STARTED, &queue->flags);
+       if (started) {
+               /*
+                * Pause the queue
+                */
+               rt2x00queue_pause_queue(queue);
+
+               /*
+                * If we are not supposed to drop any pending
+                * frames, this means we must force a start (=kick)
+                * to the queue to make sure the hardware will
+                * start transmitting.
+                */
+               if (!drop && tx_queue)
+                       queue->rt2x00dev->ops->lib->kick_queue(queue);
+       }
+
+       /*
+        * Check if driver supports flushing, we can only guarentee
+        * full support for flushing if the driver is able
+        * to cancel all pending frames (drop = true).
+        */
+       if (drop && queue->rt2x00dev->ops->lib->flush_queue)
+               queue->rt2x00dev->ops->lib->flush_queue(queue);
+
+       /*
+        * When we don't want to drop any frames, or when
+        * the driver doesn't fully flush the queue correcly,
+        * we must wait for the queue to become empty.
+        */
+       for (i = 0; !rt2x00queue_empty(queue) && i < 100; i++)
+               msleep(10);
+
+       /*
+        * The queue flush has failed...
+        */
+       if (unlikely(!rt2x00queue_empty(queue)))
+               WARNING(queue->rt2x00dev, "Queue %d failed to flush", queue->qid);
+
+       /*
+        * Restore the queue to the previous status
+        */
+       if (started)
+               rt2x00queue_unpause_queue(queue);
+
+       mutex_unlock(&queue->status_lock);
 }
+EXPORT_SYMBOL_GPL(rt2x00queue_flush_queue);
+
+void rt2x00queue_start_queues(struct rt2x00_dev *rt2x00dev)
+{
+       struct data_queue *queue;
+
+       /*
+        * rt2x00queue_start_queue will call ieee80211_wake_queue
+        * for each queue after is has been properly initialized.
+        */
+       tx_queue_for_each(rt2x00dev, queue)
+               rt2x00queue_start_queue(queue);
+
+       rt2x00queue_start_queue(rt2x00dev->rx);
+}
+EXPORT_SYMBOL_GPL(rt2x00queue_start_queues);
+
+void rt2x00queue_stop_queues(struct rt2x00_dev *rt2x00dev)
+{
+       struct data_queue *queue;
+
+       /*
+        * rt2x00queue_stop_queue will call ieee80211_stop_queue
+        * as well, but we are completely shutting doing everything
+        * now, so it is much safer to stop all TX queues at once,
+        * and use rt2x00queue_stop_queue for cleaning up.
+        */
+       ieee80211_stop_queues(rt2x00dev->hw);
+
+       tx_queue_for_each(rt2x00dev, queue)
+               rt2x00queue_stop_queue(queue);
+
+       rt2x00queue_stop_queue(rt2x00dev->rx);
+}
+EXPORT_SYMBOL_GPL(rt2x00queue_stop_queues);
+
+void rt2x00queue_flush_queues(struct rt2x00_dev *rt2x00dev, bool drop)
+{
+       struct data_queue *queue;
+
+       tx_queue_for_each(rt2x00dev, queue)
+               rt2x00queue_flush_queue(queue, drop);
+
+       rt2x00queue_flush_queue(rt2x00dev->rx, drop);
+}
+EXPORT_SYMBOL_GPL(rt2x00queue_flush_queues);
 
 static void rt2x00queue_reset(struct data_queue *queue)
 {
        unsigned long irqflags;
+       unsigned int i;
 
-       spin_lock_irqsave(&queue->lock, irqflags);
+       spin_lock_irqsave(&queue->index_lock, irqflags);
 
        queue->count = 0;
        queue->length = 0;
-       memset(queue->index, 0, sizeof(queue->index));
 
-       spin_unlock_irqrestore(&queue->lock, irqflags);
+       for (i = 0; i < Q_INDEX_MAX; i++) {
+               queue->index[i] = 0;
+               queue->last_action[i] = jiffies;
+       }
+
+       spin_unlock_irqrestore(&queue->index_lock, irqflags);
 }
 
 void rt2x00queue_init_queues(struct rt2x00_dev *rt2x00dev)
@@ -588,11 +981,8 @@ void rt2x00queue_init_queues(struct rt2x00_dev *rt2x00dev)
        queue_for_each(rt2x00dev, queue) {
                rt2x00queue_reset(queue);
 
-               for (i = 0; i < queue->limit; i++) {
-                       queue->entries[i].flags = 0;
-
+               for (i = 0; i < queue->limit; i++)
                        rt2x00dev->ops->lib->clear_entry(&queue->entries[i]);
-               }
        }
 }
 
@@ -614,13 +1004,13 @@ static int rt2x00queue_alloc_entries(struct data_queue *queue,
         * Allocate all queue entries.
         */
        entry_size = sizeof(*entries) + qdesc->priv_size;
-       entries = kzalloc(queue->limit * entry_size, GFP_KERNEL);
+       entries = kcalloc(queue->limit, entry_size, GFP_KERNEL);
        if (!entries)
                return -ENOMEM;
 
 #define QUEUE_ENTRY_PRIV_OFFSET(__base, __index, __limit, __esize, __psize) \
-       ( ((char *)(__base)) + ((__limit) * (__esize)) + \
-           ((__index) * (__psize)) )
+       (((char *)(__base)) + ((__limit) * (__esize)) + \
+           ((__index) * (__psize)))
 
        for (i = 0; i < queue->limit; i++) {
                entries[i].flags = 0;
@@ -639,8 +1029,7 @@ static int rt2x00queue_alloc_entries(struct data_queue *queue,
        return 0;
 }
 
-static void rt2x00queue_free_skbs(struct rt2x00_dev *rt2x00dev,
-                                 struct data_queue *queue)
+static void rt2x00queue_free_skbs(struct data_queue *queue)
 {
        unsigned int i;
 
@@ -648,19 +1037,17 @@ static void rt2x00queue_free_skbs(struct rt2x00_dev *rt2x00dev,
                return;
 
        for (i = 0; i < queue->limit; i++) {
-               if (queue->entries[i].skb)
-                       rt2x00queue_free_skb(rt2x00dev, queue->entries[i].skb);
+               rt2x00queue_free_skb(&queue->entries[i]);
        }
 }
 
-static int rt2x00queue_alloc_rxskbs(struct rt2x00_dev *rt2x00dev,
-                                   struct data_queue *queue)
+static int rt2x00queue_alloc_rxskbs(struct data_queue *queue)
 {
        unsigned int i;
        struct sk_buff *skb;
 
        for (i = 0; i < queue->limit; i++) {
-               skb = rt2x00queue_alloc_rxskb(rt2x00dev, &queue->entries[i]);
+               skb = rt2x00queue_alloc_rxskb(&queue->entries[i]);
                if (!skb)
                        return -ENOMEM;
                queue->entries[i].skb = skb;
@@ -695,7 +1082,7 @@ int rt2x00queue_initialize(struct rt2x00_dev *rt2x00dev)
                        goto exit;
        }
 
-       status = rt2x00queue_alloc_rxskbs(rt2x00dev, rt2x00dev->rx);
+       status = rt2x00queue_alloc_rxskbs(rt2x00dev->rx);
        if (status)
                goto exit;
 
@@ -713,7 +1100,7 @@ void rt2x00queue_uninitialize(struct rt2x00_dev *rt2x00dev)
 {
        struct data_queue *queue;
 
-       rt2x00queue_free_skbs(rt2x00dev, rt2x00dev->rx);
+       rt2x00queue_free_skbs(rt2x00dev->rx);
 
        queue_for_each(rt2x00dev, queue) {
                kfree(queue->entries);
@@ -724,7 +1111,9 @@ void rt2x00queue_uninitialize(struct rt2x00_dev *rt2x00dev)
 static void rt2x00queue_init(struct rt2x00_dev *rt2x00dev,
                             struct data_queue *queue, enum data_queue_qid qid)
 {
-       spin_lock_init(&queue->lock);
+       mutex_init(&queue->status_lock);
+       spin_lock_init(&queue->tx_lock);
+       spin_lock_init(&queue->index_lock);
 
        queue->rt2x00dev = rt2x00dev;
        queue->qid = qid;
@@ -750,7 +1139,7 @@ int rt2x00queue_allocate(struct rt2x00_dev *rt2x00dev)
         */
        rt2x00dev->data_queues = 2 + rt2x00dev->ops->tx_queues + req_atim;
 
-       queue = kzalloc(rt2x00dev->data_queues * sizeof(*queue), GFP_KERNEL);
+       queue = kcalloc(rt2x00dev->data_queues, sizeof(*queue), GFP_KERNEL);
        if (!queue) {
                ERROR(rt2x00dev, "Queue allocation failed.\n");
                return -ENOMEM;
@@ -766,7 +1155,7 @@ int rt2x00queue_allocate(struct rt2x00_dev *rt2x00dev)
        /*
         * Initialize queue parameters.
         * RX: qid = QID_RX
-        * TX: qid = QID_AC_BE + index
+        * TX: qid = QID_AC_VO + index
         * TX: cw_min: 2^5 = 32.
         * TX: cw_max: 2^10 = 1024.
         * BCN: qid = QID_BEACON
@@ -774,7 +1163,7 @@ int rt2x00queue_allocate(struct rt2x00_dev *rt2x00dev)
         */
        rt2x00queue_init(rt2x00dev, rt2x00dev->rx, QID_RX);
 
-       qid = QID_AC_BE;
+       qid = QID_AC_VO;
        tx_queue_for_each(rt2x00dev, queue)
                rt2x00queue_init(rt2x00dev, queue, qid++);