b43: Optimize DMA buffers

In the old days we used one slot per frame. But when we changed that to 2,
we didn't raise the overall slot count. Which resulted in an effective
division of two to the number of slots.

Double the number of TX slots, so we have an effective hardware queue
of 128 frames per QoS queue.

Also optimize the TX header cache handling. We don't need a cached TX header
for slots that will never carry an actual header.
So we reduce the memory consumption of the cache by 50%.

So as a net result we end up with more or less the same memory usage before
and after this patch (except a few tiny meta structures), but have twice
the number of TX slots available.

Signed-off-by: Michael Buesch <mb@bu3sch.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c
index 3824604..189b2ec 100644
--- a/drivers/net/wireless/b43/dma.c
+++ b/drivers/net/wireless/b43/dma.c
@@ -41,6 +41,12 @@
 #include <asm/div64.h>
 
 
+/* Required number of TX DMA slots per TX frame.
+ * This currently is 2, because we put the header and the ieee80211 frame
+ * into separate slots. */
+#define TX_SLOTS_PER_FRAME	2
+
+
 /* 32bit DMA ops. */
 static
 struct b43_dmadesc_generic *op32_idx2desc(struct b43_dmaring *ring,
@@ -574,12 +580,11 @@
 			return -ENOMEM;
 		dmaaddr = map_descbuffer(ring, skb->data,
 					 ring->rx_buffersize, 0);
-	}
-
-	if (b43_dma_mapping_error(ring, dmaaddr, ring->rx_buffersize, 0)) {
-		b43err(ring->dev->wl, "RX DMA buffer allocation failed\n");
-		dev_kfree_skb_any(skb);
-		return -EIO;
+		if (b43_dma_mapping_error(ring, dmaaddr, ring->rx_buffersize, 0)) {
+			b43err(ring->dev->wl, "RX DMA buffer allocation failed\n");
+			dev_kfree_skb_any(skb);
+			return -EIO;
+		}
 	}
 
 	meta->skb = skb;
@@ -837,7 +842,7 @@
 #endif
 
 	if (for_tx) {
-		ring->txhdr_cache = kcalloc(ring->nr_slots,
+		ring->txhdr_cache = kcalloc(ring->nr_slots / TX_SLOTS_PER_FRAME,
 					    b43_txhdr_size(dev),
 					    GFP_KERNEL);
 		if (!ring->txhdr_cache)
@@ -853,7 +858,7 @@
 					  b43_txhdr_size(dev), 1)) {
 			/* ugh realloc */
 			kfree(ring->txhdr_cache);
-			ring->txhdr_cache = kcalloc(ring->nr_slots,
+			ring->txhdr_cache = kcalloc(ring->nr_slots / TX_SLOTS_PER_FRAME,
 						    b43_txhdr_size(dev),
 						    GFP_KERNEL | GFP_DMA);
 			if (!ring->txhdr_cache)
@@ -1144,7 +1149,10 @@
 	u16 cookie;
 	size_t hdrsize = b43_txhdr_size(ring->dev);
 
-#define SLOTS_PER_PACKET  2
+	/* Important note: If the number of used DMA slots per TX frame
+	 * is changed here, the TX_SLOTS_PER_FRAME definition at the top of
+	 * the file has to be updated, too!
+	 */
 
 	old_top_slot = ring->current_slot;
 	old_used_slots = ring->used_slots;
@@ -1154,7 +1162,7 @@
 	desc = ops->idx2desc(ring, slot, &meta_hdr);
 	memset(meta_hdr, 0, sizeof(*meta_hdr));
 
-	header = &(ring->txhdr_cache[slot * hdrsize]);
+	header = &(ring->txhdr_cache[(slot / TX_SLOTS_PER_FRAME) * hdrsize]);
 	cookie = generate_cookie(ring, slot);
 	err = b43_generate_txhdr(ring->dev, header,
 				 skb->data, skb->len, info, cookie);
@@ -1308,7 +1316,7 @@
 	 * That would be a mac80211 bug. */
 	B43_WARN_ON(ring->stopped);
 
-	if (unlikely(free_slots(ring) < SLOTS_PER_PACKET)) {
+	if (unlikely(free_slots(ring) < TX_SLOTS_PER_FRAME)) {
 		b43warn(dev->wl, "DMA queue overflow\n");
 		err = -ENOSPC;
 		goto out_unlock;
@@ -1332,7 +1340,7 @@
 		goto out_unlock;
 	}
 	ring->nr_tx_packets++;
-	if ((free_slots(ring) < SLOTS_PER_PACKET) ||
+	if ((free_slots(ring) < TX_SLOTS_PER_FRAME) ||
 	    should_inject_overflow(ring)) {
 		/* This TX ring is full. */
 		ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb));
@@ -1416,7 +1424,7 @@
 	}
 	dev->stats.last_tx = jiffies;
 	if (ring->stopped) {
-		B43_WARN_ON(free_slots(ring) < SLOTS_PER_PACKET);
+		B43_WARN_ON(free_slots(ring) < TX_SLOTS_PER_FRAME);
 		ieee80211_wake_queue(dev->wl->hw, ring->queue_prio);
 		ring->stopped = 0;
 		if (b43_debug(dev, B43_DBG_DMAVERBOSE)) {
@@ -1439,8 +1447,8 @@
 		ring = select_ring_by_priority(dev, i);
 
 		spin_lock_irqsave(&ring->lock, flags);
-		stats[i].len = ring->used_slots / SLOTS_PER_PACKET;
-		stats[i].limit = ring->nr_slots / SLOTS_PER_PACKET;
+		stats[i].len = ring->used_slots / TX_SLOTS_PER_FRAME;
+		stats[i].limit = ring->nr_slots / TX_SLOTS_PER_FRAME;
 		stats[i].count = ring->nr_tx_packets;
 		spin_unlock_irqrestore(&ring->lock, flags);
 	}