iwlwifi: improve TX cache footprint

Having cmd[], meta[] and skbs[] as separate arrays
in the TX queue structure is cache inefficient as
we need the data for a given entry together.

To improve this, create an array with these three
members (allocate meta as part of that struct) so
we have the data we need together located together
improving cache footprint.

The downside is that we need to allocate a lot of
memory in one chunk, about 10KiB (on 64-bit) which
isn't very efficient.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-tx.c b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-tx.c
index bb0a314..1b2aed6 100644
--- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-tx.c
@@ -58,7 +58,7 @@
 	u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE;
 	__le16 bc_ent;
 	struct iwl_tx_cmd *tx_cmd =
-		(struct iwl_tx_cmd *) txq->cmd[txq->q.write_ptr]->payload;
+		(void *) txq->entries[txq->q.write_ptr].cmd->payload;
 
 	scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
 
@@ -221,13 +221,14 @@
 
 	lockdep_assert_held(&txq->lock);
 
-	iwlagn_unmap_tfd(trans, &txq->meta[index], &tfd_tmp[index], dma_dir);
+	iwlagn_unmap_tfd(trans, &txq->entries[index].meta,
+			 &tfd_tmp[index], dma_dir);
 
 	/* free SKB */
-	if (txq->skbs) {
+	if (txq->entries) {
 		struct sk_buff *skb;
 
-		skb = txq->skbs[index];
+		skb = txq->entries[index].skb;
 
 		/* Can be called from irqs-disabled context
 		 * If skb is not NULL, it means that the whole queue is being
@@ -235,7 +236,7 @@
 		 */
 		if (skb) {
 			iwl_op_mode_free_skb(trans->op_mode, skb);
-			txq->skbs[index] = NULL;
+			txq->entries[index].skb = NULL;
 		}
 	}
 }
@@ -358,7 +359,7 @@
 	u8 sta_id = 0;
 	__le16 bc_ent;
 	struct iwl_tx_cmd *tx_cmd =
-		(struct iwl_tx_cmd *) txq->cmd[txq->q.read_ptr]->payload;
+		(void *)txq->entries[txq->q.read_ptr].cmd->payload;
 
 	WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX);
 
@@ -578,8 +579,8 @@
 	}
 
 	idx = get_cmd_index(q, q->write_ptr);
-	out_cmd = txq->cmd[idx];
-	out_meta = &txq->meta[idx];
+	out_cmd = txq->entries[idx].cmd;
+	out_meta = &txq->entries[idx].meta;
 
 	memset(out_meta, 0, sizeof(*out_meta));	/* re-initialize to NULL */
 	if (cmd->flags & CMD_WANT_SKB)
@@ -772,8 +773,8 @@
 	spin_lock(&txq->lock);
 
 	cmd_index = get_cmd_index(&txq->q, index);
-	cmd = txq->cmd[cmd_index];
-	meta = &txq->meta[cmd_index];
+	cmd = txq->entries[cmd_index].cmd;
+	meta = &txq->entries[cmd_index].meta;
 
 	iwlagn_unmap_tfd(trans, meta, &txq->tfds[index],
 			 DMA_BIDIRECTIONAL);
@@ -905,8 +906,8 @@
 		 * in later, it will possibly set an invalid
 		 * address (cmd->meta.source).
 		 */
-		trans_pcie->txq[trans_pcie->cmd_queue].meta[cmd_idx].flags &=
-							~CMD_WANT_SKB;
+		trans_pcie->txq[trans_pcie->cmd_queue].
+			entries[cmd_idx].meta.flags &= ~CMD_WANT_SKB;
 	}
 
 	if (cmd->resp_pkt) {
@@ -961,12 +962,12 @@
 	     q->read_ptr != index;
 	     q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {
 
-		if (WARN_ON_ONCE(txq->skbs[txq->q.read_ptr] == NULL))
+		if (WARN_ON_ONCE(txq->entries[txq->q.read_ptr].skb == NULL))
 			continue;
 
-		__skb_queue_tail(skbs, txq->skbs[txq->q.read_ptr]);
+		__skb_queue_tail(skbs, txq->entries[txq->q.read_ptr].skb);
 
-		txq->skbs[txq->q.read_ptr] = NULL;
+		txq->entries[txq->q.read_ptr].skb = NULL;
 
 		iwlagn_txq_inval_byte_cnt_tbl(trans, txq);