bnx2x: Multiple concurrent l2 traffic classes

Overview:
 Support mapping of priorities to traffic classes and
 traffic classes to transmission queues ranges in the net device.
 The queue ranges are (count, offset) pairs relating to the txq
 array.
 This can be done via DCBX negotiation or by kernel.
 As a result Enhanced Transmission Selection (ETS) and Priority Flow
 Control (PFC) are supported between L2 network traffic classes.

 Mapping:
 This patch uses the netdev_set_num_tc, netdev_set_prio_tc_map and
 netdev_set_tc_queue functions to map priorities to traffic classes
 and traffic classes to transmission queue ranges.
 This mapping is performed by bnx2x_setup_tc function which is
 connected to the ndo_setup_tc.
 This function is always called at nic load where by default it
 maps all priorities to tc 0, and it may also be called by the
 kernel or by the bnx2x upon DCBX negotiation to modify the mapping.

 rtnl lock:
 When the ndo_setup_tc is called at nic load or by kernel the rtnl
 lock is already taken. However, when DCBX negotiation takes place
 the lock is not taken. The work is therefore scheduled to be
 handled by the sp_rtnl task.

 Fastpath:
 The fastpath structure of the bnx2x which was previously used
 to hold the information of one tx queue and one rx queue was
 redesigned to represent multiple tx queues, one for each traffic
 class.
 The transmission queue supplied in the skb by the kernel can no
 longer be interpreted as a straightforward index into the fastpath
 structure array, but it must rather be decoded to the appropriate
 fastpath index and the tc within that fastpath.

 Slowpath:
 The bnx2x's queue object was redesigned to accommodate multiple
 transmission queues. The queue object's state machine was enhanced
 to allow opening multiple transmission-only connections on top of
 the regular tx-rx connection.

 Firmware:
 This feature relies on the tx-only queue feature introduced in the
 bnx2x 7.0.23 firmware and the FW likewise must have the bnx2x multi
 cos support.

 Signed-off-by: Ariel Elior <ariele@broadcom.com>
 Signed-off-by: Eilon Greenstein <eilong@broadcom.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/bnx2x/bnx2x_sp.c b/drivers/net/bnx2x/bnx2x_sp.c
index f6322a1..358c339 100644
--- a/drivers/net/bnx2x/bnx2x_sp.c
+++ b/drivers/net/bnx2x/bnx2x_sp.c
@@ -4195,15 +4195,29 @@
 
 	if (!test_and_clear_bit(cmd, &cur_pending)) {
 		BNX2X_ERR("Bad MC reply %d for queue %d in state %d "
-			  "pending 0x%lx, next_state %d\n", cmd, o->cid,
+			  "pending 0x%lx, next_state %d\n", cmd,
+			  o->cids[BNX2X_PRIMARY_CID_INDEX],
 			  o->state, cur_pending, o->next_state);
 		return -EINVAL;
 	}
 
+	if (o->next_tx_only >= o->max_cos)
+		/* >= becuase tx only must always be smaller than cos since the
+		 * primary connection suports COS 0
+		 */
+		BNX2X_ERR("illegal value for next tx_only: %d. max cos was %d",
+			   o->next_tx_only, o->max_cos);
+
 	DP(BNX2X_MSG_SP, "Completing command %d for queue %d, "
-			 "setting state to %d\n", cmd, o->cid, o->next_state);
+			 "setting state to %d\n", cmd,
+			 o->cids[BNX2X_PRIMARY_CID_INDEX], o->next_state);
+
+	if (o->next_tx_only)  /* print num tx-only if any exist */
+		DP(BNX2X_MSG_SP, "primary cid %d: num tx-only cons %d",
+			   o->cids[BNX2X_PRIMARY_CID_INDEX], o->next_tx_only);
 
 	o->state = o->next_state;
+	o->num_tx_only = o->next_tx_only;
 	o->next_state = BNX2X_Q_STATE_MAX;
 
 	/* It's important that o->state and o->next_state are
@@ -4230,135 +4244,193 @@
 				CLIENT_INIT_RX_DATA_TPA_EN_IPV6;
 }
 
+static void bnx2x_q_fill_init_general_data(struct bnx2x *bp,
+				struct bnx2x_queue_sp_obj *o,
+				struct bnx2x_general_setup_params *params,
+				struct client_init_general_data *gen_data,
+				unsigned long *flags)
+{
+	gen_data->client_id = o->cl_id;
+
+	if (test_bit(BNX2X_Q_FLG_STATS, flags)) {
+		gen_data->statistics_counter_id =
+					params->stat_id;
+		gen_data->statistics_en_flg = 1;
+		gen_data->statistics_zero_flg =
+			test_bit(BNX2X_Q_FLG_ZERO_STATS, flags);
+	} else
+		gen_data->statistics_counter_id =
+					DISABLE_STATISTIC_COUNTER_ID_VALUE;
+
+	gen_data->is_fcoe_flg = test_bit(BNX2X_Q_FLG_FCOE, flags);
+	gen_data->activate_flg = test_bit(BNX2X_Q_FLG_ACTIVE, flags);
+	gen_data->sp_client_id = params->spcl_id;
+	gen_data->mtu = cpu_to_le16(params->mtu);
+	gen_data->func_id = o->func_id;
+
+
+	gen_data->cos = params->cos;
+
+	gen_data->traffic_type =
+		test_bit(BNX2X_Q_FLG_FCOE, flags) ?
+		LLFC_TRAFFIC_TYPE_FCOE : LLFC_TRAFFIC_TYPE_NW;
+
+	DP(BNX2X_MSG_SP, "flags: active %d, cos %d, stats en %d",
+	   gen_data->activate_flg, gen_data->cos, gen_data->statistics_en_flg);
+}
+
+static void bnx2x_q_fill_init_tx_data(struct bnx2x_queue_sp_obj *o,
+				struct bnx2x_txq_setup_params *params,
+				struct client_init_tx_data *tx_data,
+				unsigned long *flags)
+{
+	tx_data->enforce_security_flg =
+		test_bit(BNX2X_Q_FLG_TX_SEC, flags);
+	tx_data->default_vlan =
+		cpu_to_le16(params->default_vlan);
+	tx_data->default_vlan_flg =
+		test_bit(BNX2X_Q_FLG_DEF_VLAN, flags);
+	tx_data->tx_switching_flg =
+		test_bit(BNX2X_Q_FLG_TX_SWITCH, flags);
+	tx_data->anti_spoofing_flg =
+		test_bit(BNX2X_Q_FLG_ANTI_SPOOF, flags);
+	tx_data->tx_status_block_id = params->fw_sb_id;
+	tx_data->tx_sb_index_number = params->sb_cq_index;
+	tx_data->tss_leading_client_id = params->tss_leading_cl_id;
+
+	tx_data->tx_bd_page_base.lo =
+		cpu_to_le32(U64_LO(params->dscr_map));
+	tx_data->tx_bd_page_base.hi =
+		cpu_to_le32(U64_HI(params->dscr_map));
+
+	/* Don't configure any Tx switching mode during queue SETUP */
+	tx_data->state = 0;
+}
+
+static void bnx2x_q_fill_init_pause_data(struct bnx2x_queue_sp_obj *o,
+				struct rxq_pause_params *params,
+				struct client_init_rx_data *rx_data)
+{
+	/* flow control data */
+	rx_data->cqe_pause_thr_low = cpu_to_le16(params->rcq_th_lo);
+	rx_data->cqe_pause_thr_high = cpu_to_le16(params->rcq_th_hi);
+	rx_data->bd_pause_thr_low = cpu_to_le16(params->bd_th_lo);
+	rx_data->bd_pause_thr_high = cpu_to_le16(params->bd_th_hi);
+	rx_data->sge_pause_thr_low = cpu_to_le16(params->sge_th_lo);
+	rx_data->sge_pause_thr_high = cpu_to_le16(params->sge_th_hi);
+	rx_data->rx_cos_mask = cpu_to_le16(params->pri_map);
+}
+
+static void bnx2x_q_fill_init_rx_data(struct bnx2x_queue_sp_obj *o,
+				struct bnx2x_rxq_setup_params *params,
+				struct client_init_rx_data *rx_data,
+				unsigned long *flags)
+{
+		/* Rx data */
+	rx_data->tpa_en = test_bit(BNX2X_Q_FLG_TPA, flags) *
+				CLIENT_INIT_RX_DATA_TPA_EN_IPV4;
+	rx_data->vmqueue_mode_en_flg = 0;
+
+	rx_data->cache_line_alignment_log_size =
+		params->cache_line_log;
+	rx_data->enable_dynamic_hc =
+		test_bit(BNX2X_Q_FLG_DHC, flags);
+	rx_data->max_sges_for_packet = params->max_sges_pkt;
+	rx_data->client_qzone_id = params->cl_qzone_id;
+	rx_data->max_agg_size = cpu_to_le16(params->tpa_agg_sz);
+
+	/* Always start in DROP_ALL mode */
+	rx_data->state = cpu_to_le16(CLIENT_INIT_RX_DATA_UCAST_DROP_ALL |
+				     CLIENT_INIT_RX_DATA_MCAST_DROP_ALL);
+
+	/* We don't set drop flags */
+	rx_data->drop_ip_cs_err_flg = 0;
+	rx_data->drop_tcp_cs_err_flg = 0;
+	rx_data->drop_ttl0_flg = 0;
+	rx_data->drop_udp_cs_err_flg = 0;
+	rx_data->inner_vlan_removal_enable_flg =
+		test_bit(BNX2X_Q_FLG_VLAN, flags);
+	rx_data->outer_vlan_removal_enable_flg =
+		test_bit(BNX2X_Q_FLG_OV, flags);
+	rx_data->status_block_id = params->fw_sb_id;
+	rx_data->rx_sb_index_number = params->sb_cq_index;
+	rx_data->max_tpa_queues = params->max_tpa_queues;
+	rx_data->max_bytes_on_bd = cpu_to_le16(params->buf_sz);
+	rx_data->sge_buff_size = cpu_to_le16(params->sge_buf_sz);
+	rx_data->bd_page_base.lo =
+		cpu_to_le32(U64_LO(params->dscr_map));
+	rx_data->bd_page_base.hi =
+		cpu_to_le32(U64_HI(params->dscr_map));
+	rx_data->sge_page_base.lo =
+		cpu_to_le32(U64_LO(params->sge_map));
+	rx_data->sge_page_base.hi =
+		cpu_to_le32(U64_HI(params->sge_map));
+	rx_data->cqe_page_base.lo =
+		cpu_to_le32(U64_LO(params->rcq_map));
+	rx_data->cqe_page_base.hi =
+		cpu_to_le32(U64_HI(params->rcq_map));
+	rx_data->is_leading_rss = test_bit(BNX2X_Q_FLG_LEADING_RSS, flags);
+
+	if (test_bit(BNX2X_Q_FLG_MCAST, flags)) {
+		rx_data->approx_mcast_engine_id = o->func_id;
+		rx_data->is_approx_mcast = 1;
+	}
+
+	rx_data->rss_engine_id = params->rss_engine_id;
+
+	/* silent vlan removal */
+	rx_data->silent_vlan_removal_flg =
+		test_bit(BNX2X_Q_FLG_SILENT_VLAN_REM, flags);
+	rx_data->silent_vlan_value =
+		cpu_to_le16(params->silent_removal_value);
+	rx_data->silent_vlan_mask =
+		cpu_to_le16(params->silent_removal_mask);
+
+}
+
+/* initialize the general, tx and rx parts of a queue object */
 static void bnx2x_q_fill_setup_data_cmn(struct bnx2x *bp,
 				struct bnx2x_queue_state_params *cmd_params,
 				struct client_init_ramrod_data *data)
 {
-	struct bnx2x_queue_sp_obj *o = cmd_params->q_obj;
-	struct bnx2x_queue_setup_params *params = &cmd_params->params.setup;
+	bnx2x_q_fill_init_general_data(bp, cmd_params->q_obj,
+				       &cmd_params->params.setup.gen_params,
+				       &data->general,
+				       &cmd_params->params.setup.flags);
 
+	bnx2x_q_fill_init_tx_data(cmd_params->q_obj,
+				  &cmd_params->params.setup.txq_params,
+				  &data->tx,
+				  &cmd_params->params.setup.flags);
 
-	/* general */
-	data->general.client_id = o->cl_id;
+	bnx2x_q_fill_init_rx_data(cmd_params->q_obj,
+				  &cmd_params->params.setup.rxq_params,
+				  &data->rx,
+				  &cmd_params->params.setup.flags);
 
-	if (test_bit(BNX2X_Q_FLG_STATS, &params->flags)) {
-		data->general.statistics_counter_id =
-					params->gen_params.stat_id;
-		data->general.statistics_en_flg = 1;
-		data->general.statistics_zero_flg =
-			test_bit(BNX2X_Q_FLG_ZERO_STATS, &params->flags);
-	} else
-		data->general.statistics_counter_id =
-					DISABLE_STATISTIC_COUNTER_ID_VALUE;
-
-	data->general.is_fcoe_flg = test_bit(BNX2X_Q_FLG_FCOE, &params->flags);
-	data->general.activate_flg = test_bit(BNX2X_Q_FLG_ACTIVE,
-					      &params->flags);
-	data->general.sp_client_id = params->gen_params.spcl_id;
-	data->general.mtu = cpu_to_le16(params->gen_params.mtu);
-	data->general.func_id = o->func_id;
-
-
-	data->general.cos = params->txq_params.cos;
-
-	data->general.traffic_type =
-		test_bit(BNX2X_Q_FLG_FCOE, &params->flags) ?
-		LLFC_TRAFFIC_TYPE_FCOE : LLFC_TRAFFIC_TYPE_NW;
-
-	/* Rx data */
-	data->rx.tpa_en = test_bit(BNX2X_Q_FLG_TPA, &params->flags) *
-				CLIENT_INIT_RX_DATA_TPA_EN_IPV4;
-	data->rx.vmqueue_mode_en_flg = 0;
-
-	data->rx.cache_line_alignment_log_size =
-		params->rxq_params.cache_line_log;
-	data->rx.enable_dynamic_hc =
-		test_bit(BNX2X_Q_FLG_DHC, &params->flags);
-	data->rx.max_sges_for_packet = params->rxq_params.max_sges_pkt;
-	data->rx.client_qzone_id = params->rxq_params.cl_qzone_id;
-	data->rx.max_agg_size = cpu_to_le16(params->rxq_params.tpa_agg_sz);
-
-	/* Always start in DROP_ALL mode */
-	data->rx.state = cpu_to_le16(CLIENT_INIT_RX_DATA_UCAST_DROP_ALL |
-				     CLIENT_INIT_RX_DATA_MCAST_DROP_ALL);
-
-	/* We don't set drop flags */
-	data->rx.drop_ip_cs_err_flg = 0;
-	data->rx.drop_tcp_cs_err_flg = 0;
-	data->rx.drop_ttl0_flg = 0;
-	data->rx.drop_udp_cs_err_flg = 0;
-	data->rx.inner_vlan_removal_enable_flg =
-		test_bit(BNX2X_Q_FLG_VLAN, &params->flags);
-	data->rx.outer_vlan_removal_enable_flg =
-		test_bit(BNX2X_Q_FLG_OV, &params->flags);
-	data->rx.status_block_id = params->rxq_params.fw_sb_id;
-	data->rx.rx_sb_index_number = params->rxq_params.sb_cq_index;
-	data->rx.max_tpa_queues = params->rxq_params.max_tpa_queues;
-	data->rx.max_bytes_on_bd = cpu_to_le16(params->rxq_params.buf_sz);
-	data->rx.sge_buff_size = cpu_to_le16(params->rxq_params.sge_buf_sz);
-	data->rx.bd_page_base.lo =
-		cpu_to_le32(U64_LO(params->rxq_params.dscr_map));
-	data->rx.bd_page_base.hi =
-		cpu_to_le32(U64_HI(params->rxq_params.dscr_map));
-	data->rx.sge_page_base.lo =
-		cpu_to_le32(U64_LO(params->rxq_params.sge_map));
-	data->rx.sge_page_base.hi =
-		cpu_to_le32(U64_HI(params->rxq_params.sge_map));
-	data->rx.cqe_page_base.lo =
-		cpu_to_le32(U64_LO(params->rxq_params.rcq_map));
-	data->rx.cqe_page_base.hi =
-		cpu_to_le32(U64_HI(params->rxq_params.rcq_map));
-	data->rx.is_leading_rss = test_bit(BNX2X_Q_FLG_LEADING_RSS,
-					   &params->flags);
-
-	if (test_bit(BNX2X_Q_FLG_MCAST, &params->flags)) {
-		data->rx.approx_mcast_engine_id = o->func_id;
-		data->rx.is_approx_mcast = 1;
-	}
-
-	data->rx.rss_engine_id = params->rxq_params.rss_engine_id;
-
-	/* flow control data */
-	data->rx.cqe_pause_thr_low = cpu_to_le16(params->pause.rcq_th_lo);
-	data->rx.cqe_pause_thr_high = cpu_to_le16(params->pause.rcq_th_hi);
-	data->rx.bd_pause_thr_low = cpu_to_le16(params->pause.bd_th_lo);
-	data->rx.bd_pause_thr_high = cpu_to_le16(params->pause.bd_th_hi);
-	data->rx.sge_pause_thr_low = cpu_to_le16(params->pause.sge_th_lo);
-	data->rx.sge_pause_thr_high = cpu_to_le16(params->pause.sge_th_hi);
-	data->rx.rx_cos_mask = cpu_to_le16(params->pause.pri_map);
-
-	/* silent vlan removal */
-	data->rx.silent_vlan_removal_flg =
-		test_bit(BNX2X_Q_FLG_SILENT_VLAN_REM, &params->flags);
-	data->rx.silent_vlan_value =
-		cpu_to_le16(params->rxq_params.silent_removal_value);
-	data->rx.silent_vlan_mask =
-		cpu_to_le16(params->rxq_params.silent_removal_mask);
-
-	/* Tx data */
-	data->tx.enforce_security_flg =
-		test_bit(BNX2X_Q_FLG_TX_SEC, &params->flags);
-	data->tx.default_vlan =
-		cpu_to_le16(params->txq_params.default_vlan);
-	data->tx.default_vlan_flg =
-		test_bit(BNX2X_Q_FLG_DEF_VLAN, &params->flags);
-	data->tx.tx_switching_flg =
-		test_bit(BNX2X_Q_FLG_TX_SWITCH, &params->flags);
-	data->tx.anti_spoofing_flg =
-		test_bit(BNX2X_Q_FLG_ANTI_SPOOF, &params->flags);
-	data->tx.tx_status_block_id = params->txq_params.fw_sb_id;
-	data->tx.tx_sb_index_number = params->txq_params.sb_cq_index;
-	data->tx.tss_leading_client_id = params->txq_params.tss_leading_cl_id;
-
-	data->tx.tx_bd_page_base.lo =
-		cpu_to_le32(U64_LO(params->txq_params.dscr_map));
-	data->tx.tx_bd_page_base.hi =
-		cpu_to_le32(U64_HI(params->txq_params.dscr_map));
-
-	/* Don't configure any Tx switching mode during queue SETUP */
-	data->tx.state = 0;
+	bnx2x_q_fill_init_pause_data(cmd_params->q_obj,
+				     &cmd_params->params.setup.pause_params,
+				     &data->rx);
 }
 
+/* initialize the general and tx parts of a tx-only queue object */
+static void bnx2x_q_fill_setup_tx_only(struct bnx2x *bp,
+				struct bnx2x_queue_state_params *cmd_params,
+				struct tx_queue_init_ramrod_data *data)
+{
+	bnx2x_q_fill_init_general_data(bp, cmd_params->q_obj,
+				       &cmd_params->params.tx_only.gen_params,
+				       &data->general,
+				       &cmd_params->params.tx_only.flags);
+
+	bnx2x_q_fill_init_tx_data(cmd_params->q_obj,
+				  &cmd_params->params.tx_only.txq_params,
+				  &data->tx,
+				  &cmd_params->params.tx_only.flags);
+
+	DP(BNX2X_MSG_SP, "cid %d, tx bd page lo %x hi %x",cmd_params->q_obj->cids[0],
+	   data->tx.tx_bd_page_base.lo, data->tx.tx_bd_page_base.hi);
+}
 
 /**
  * bnx2x_q_init - init HW/FW queue
@@ -4377,6 +4449,7 @@
 	struct bnx2x_queue_sp_obj *o = params->q_obj;
 	struct bnx2x_queue_init_params *init = &params->params.init;
 	u16 hc_usec;
+	u8 cos;
 
 	/* Tx HC configuration */
 	if (test_bit(BNX2X_Q_TYPE_HAS_TX, &o->type) &&
@@ -4401,7 +4474,12 @@
 	}
 
 	/* Set CDU context validation values */
-	bnx2x_set_ctx_validation(bp, init->cxt, o->cid);
+	for (cos = 0; cos < o->max_cos; cos++) {
+		DP(BNX2X_MSG_SP, "setting context validation. cid %d, cos %d",
+				 o->cids[cos], cos);
+		DP(BNX2X_MSG_SP, "context pointer %p", init->cxts[cos]);
+		bnx2x_set_ctx_validation(bp, init->cxts[cos], o->cids[cos]);
+	}
 
 	/* As no ramrod is sent, complete the command immediately  */
 	o->complete_cmd(bp, o, BNX2X_Q_CMD_INIT);
@@ -4429,7 +4507,8 @@
 
 	mb();
 
-	return bnx2x_sp_post(bp, ramrod, o->cid, U64_HI(data_mapping),
+	return bnx2x_sp_post(bp, ramrod, o->cids[BNX2X_PRIMARY_CID_INDEX],
+			     U64_HI(data_mapping),
 			     U64_LO(data_mapping), ETH_CONNECTION_TYPE);
 }
 
@@ -4449,9 +4528,57 @@
 	bnx2x_q_fill_setup_data_cmn(bp, params, rdata);
 	bnx2x_q_fill_setup_data_e2(bp, params, rdata);
 
-	mb();
 
-	return bnx2x_sp_post(bp, ramrod, o->cid, U64_HI(data_mapping),
+	return bnx2x_sp_post(bp, ramrod, o->cids[BNX2X_PRIMARY_CID_INDEX],
+			     U64_HI(data_mapping),
+			     U64_LO(data_mapping), ETH_CONNECTION_TYPE);
+}
+
+static inline int bnx2x_q_send_setup_tx_only(struct bnx2x *bp,
+				  struct bnx2x_queue_state_params *params)
+{
+	struct bnx2x_queue_sp_obj *o = params->q_obj;
+	struct tx_queue_init_ramrod_data *rdata =
+		(struct tx_queue_init_ramrod_data *)o->rdata;
+	dma_addr_t data_mapping = o->rdata_mapping;
+	int ramrod = RAMROD_CMD_ID_ETH_TX_QUEUE_SETUP;
+	struct bnx2x_queue_setup_tx_only_params *tx_only_params =
+		&params->params.tx_only;
+	u8 cid_index = tx_only_params->cid_index;
+
+
+	if (cid_index >= o->max_cos) {
+		BNX2X_ERR("queue[%d]: cid_index (%d) is out of range\n",
+			  o->cl_id, cid_index);
+		return -EINVAL;
+	}
+
+	DP(BNX2X_MSG_SP, "parameters received: cos: %d sp-id: %d",
+			 tx_only_params->gen_params.cos,
+			 tx_only_params->gen_params.spcl_id);
+
+	/* Clear the ramrod data */
+	memset(rdata, 0, sizeof(*rdata));
+
+	/* Fill the ramrod data */
+	bnx2x_q_fill_setup_tx_only(bp, params, rdata);
+
+	DP(BNX2X_MSG_SP, "sending tx-only ramrod: cid %d, client-id %d,"
+			 "sp-client id %d, cos %d",
+			 o->cids[cid_index],
+			 rdata->general.client_id,
+			 rdata->general.sp_client_id, rdata->general.cos);
+
+	/*
+	 *  No need for an explicit memory barrier here as long we would
+	 *  need to ensure the ordering of writing to the SPQ element
+	 *  and updating of the SPQ producer which involves a memory
+	 *  read and we will have to put a full memory barrier there
+	 *  (inside bnx2x_sp_post()).
+	 */
+
+	return bnx2x_sp_post(bp, ramrod, o->cids[cid_index],
+			     U64_HI(data_mapping),
 			     U64_LO(data_mapping), ETH_CONNECTION_TYPE);
 }
 
@@ -4521,17 +4648,27 @@
 	struct client_update_ramrod_data *rdata =
 		(struct client_update_ramrod_data *)o->rdata;
 	dma_addr_t data_mapping = o->rdata_mapping;
+	struct bnx2x_queue_update_params *update_params =
+		&params->params.update;
+	u8 cid_index = update_params->cid_index;
+
+	if (cid_index >= o->max_cos) {
+		BNX2X_ERR("queue[%d]: cid_index (%d) is out of range\n",
+			  o->cl_id, cid_index);
+		return -EINVAL;
+	}
+
 
 	/* Clear the ramrod data */
 	memset(rdata, 0, sizeof(*rdata));
 
 	/* Fill the ramrod data */
-	bnx2x_q_fill_update_data(bp, o, &params->params.update, rdata);
+	bnx2x_q_fill_update_data(bp, o, update_params, rdata);
 
 	mb();
 
-	return bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_CLIENT_UPDATE, o->cid,
-			     U64_HI(data_mapping),
+	return bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_CLIENT_UPDATE,
+			     o->cids[cid_index], U64_HI(data_mapping),
 			     U64_LO(data_mapping), ETH_CONNECTION_TYPE);
 }
 
@@ -4588,7 +4725,8 @@
 {
 	struct bnx2x_queue_sp_obj *o = params->q_obj;
 
-	return bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_HALT, o->cid, 0, o->cl_id,
+	return bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_HALT,
+			     o->cids[BNX2X_PRIMARY_CID_INDEX], 0, o->cl_id,
 			     ETH_CONNECTION_TYPE);
 }
 
@@ -4596,18 +4734,32 @@
 				       struct bnx2x_queue_state_params *params)
 {
 	struct bnx2x_queue_sp_obj *o = params->q_obj;
+	u8 cid_idx = params->params.cfc_del.cid_index;
 
-	return bnx2x_sp_post(bp, RAMROD_CMD_ID_COMMON_CFC_DEL, o->cid, 0, 0,
-			     NONE_CONNECTION_TYPE);
+	if (cid_idx >= o->max_cos) {
+		BNX2X_ERR("queue[%d]: cid_index (%d) is out of range\n",
+			  o->cl_id, cid_idx);
+		return -EINVAL;
+	}
+
+	return bnx2x_sp_post(bp, RAMROD_CMD_ID_COMMON_CFC_DEL,
+			     o->cids[cid_idx], 0, 0, NONE_CONNECTION_TYPE);
 }
 
 static inline int bnx2x_q_send_terminate(struct bnx2x *bp,
 					struct bnx2x_queue_state_params *params)
 {
 	struct bnx2x_queue_sp_obj *o = params->q_obj;
+	u8 cid_index = params->params.terminate.cid_index;
 
-	return bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_TERMINATE, o->cid, 0, 0,
-			     ETH_CONNECTION_TYPE);
+	if (cid_index >= o->max_cos) {
+		BNX2X_ERR("queue[%d]: cid_index (%d) is out of range\n",
+			  o->cl_id, cid_index);
+		return -EINVAL;
+	}
+
+	return bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_TERMINATE,
+			     o->cids[cid_index], 0, 0, ETH_CONNECTION_TYPE);
 }
 
 static inline int bnx2x_q_send_empty(struct bnx2x *bp,
@@ -4615,7 +4767,8 @@
 {
 	struct bnx2x_queue_sp_obj *o = params->q_obj;
 
-	return bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_EMPTY, o->cid, 0, 0,
+	return bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_EMPTY,
+			     o->cids[BNX2X_PRIMARY_CID_INDEX], 0, 0,
 			     ETH_CONNECTION_TYPE);
 }
 
@@ -4625,6 +4778,8 @@
 	switch (params->cmd) {
 	case BNX2X_Q_CMD_INIT:
 		return bnx2x_q_init(bp, params);
+	case BNX2X_Q_CMD_SETUP_TX_ONLY:
+		return bnx2x_q_send_setup_tx_only(bp, params);
 	case BNX2X_Q_CMD_DEACTIVATE:
 		return bnx2x_q_send_deactivate(bp, params);
 	case BNX2X_Q_CMD_ACTIVATE:
@@ -4654,6 +4809,7 @@
 	case BNX2X_Q_CMD_SETUP:
 		return bnx2x_q_send_setup_e1x(bp, params);
 	case BNX2X_Q_CMD_INIT:
+	case BNX2X_Q_CMD_SETUP_TX_ONLY:
 	case BNX2X_Q_CMD_DEACTIVATE:
 	case BNX2X_Q_CMD_ACTIVATE:
 	case BNX2X_Q_CMD_UPDATE:
@@ -4676,6 +4832,7 @@
 	case BNX2X_Q_CMD_SETUP:
 		return bnx2x_q_send_setup_e2(bp, params);
 	case BNX2X_Q_CMD_INIT:
+	case BNX2X_Q_CMD_SETUP_TX_ONLY:
 	case BNX2X_Q_CMD_DEACTIVATE:
 	case BNX2X_Q_CMD_ACTIVATE:
 	case BNX2X_Q_CMD_UPDATE:
@@ -4713,6 +4870,9 @@
 {
 	enum bnx2x_q_state state = o->state, next_state = BNX2X_Q_STATE_MAX;
 	enum bnx2x_queue_cmd cmd = params->cmd;
+	struct bnx2x_queue_update_params *update_params =
+		 &params->params.update;
+	u8 next_tx_only = o->num_tx_only;
 
 	switch (state) {
 	case BNX2X_Q_STATE_RESET:
@@ -4738,13 +4898,15 @@
 			 (cmd == BNX2X_Q_CMD_UPDATE_TPA))
 			next_state = BNX2X_Q_STATE_ACTIVE;
 
+		else if (cmd == BNX2X_Q_CMD_SETUP_TX_ONLY) {
+			next_state = BNX2X_Q_STATE_MULTI_COS;
+			next_tx_only = 1;
+		}
+
 		else if (cmd == BNX2X_Q_CMD_HALT)
 			next_state = BNX2X_Q_STATE_STOPPED;
 
 		else if (cmd == BNX2X_Q_CMD_UPDATE) {
-			struct bnx2x_queue_update_params *update_params =
-				&params->params.update;
-
 			/* If "active" state change is requested, update the
 			 *  state accordingly.
 			 */
@@ -4758,6 +4920,43 @@
 		}
 
 		break;
+	case BNX2X_Q_STATE_MULTI_COS:
+		if (cmd == BNX2X_Q_CMD_TERMINATE)
+			next_state = BNX2X_Q_STATE_MCOS_TERMINATED;
+
+		else if (cmd == BNX2X_Q_CMD_SETUP_TX_ONLY) {
+			next_state = BNX2X_Q_STATE_MULTI_COS;
+			next_tx_only = o->num_tx_only + 1;
+		}
+
+		else if ((cmd == BNX2X_Q_CMD_EMPTY) ||
+			 (cmd == BNX2X_Q_CMD_UPDATE_TPA))
+			next_state = BNX2X_Q_STATE_MULTI_COS;
+
+		else if (cmd == BNX2X_Q_CMD_UPDATE) {
+			/* If "active" state change is requested, update the
+			 *  state accordingly.
+			 */
+			if (test_bit(BNX2X_Q_UPDATE_ACTIVATE_CHNG,
+				     &update_params->update_flags) &&
+			    !test_bit(BNX2X_Q_UPDATE_ACTIVATE,
+				      &update_params->update_flags))
+				next_state = BNX2X_Q_STATE_INACTIVE;
+			else
+				next_state = BNX2X_Q_STATE_MULTI_COS;
+		}
+
+		break;
+	case BNX2X_Q_STATE_MCOS_TERMINATED:
+		if (cmd == BNX2X_Q_CMD_CFC_DEL) {
+			next_tx_only = o->num_tx_only - 1;
+			if (next_tx_only == 0)
+				next_state = BNX2X_Q_STATE_ACTIVE;
+			else
+				next_state = BNX2X_Q_STATE_MULTI_COS;
+		}
+
+		break;
 	case BNX2X_Q_STATE_INACTIVE:
 		if (cmd == BNX2X_Q_CMD_ACTIVATE)
 			next_state = BNX2X_Q_STATE_ACTIVE;
@@ -4770,18 +4969,18 @@
 			next_state = BNX2X_Q_STATE_STOPPED;
 
 		else if (cmd == BNX2X_Q_CMD_UPDATE) {
-			struct bnx2x_queue_update_params *update_params =
-				&params->params.update;
-
 			/* If "active" state change is requested, update the
 			 * state accordingly.
 			 */
 			if (test_bit(BNX2X_Q_UPDATE_ACTIVATE_CHNG,
 				     &update_params->update_flags) &&
 			    test_bit(BNX2X_Q_UPDATE_ACTIVATE,
-				     &update_params->update_flags))
-				next_state = BNX2X_Q_STATE_ACTIVE;
-			else
+				     &update_params->update_flags)){
+				if (o->num_tx_only == 0)
+					next_state = BNX2X_Q_STATE_ACTIVE;
+				else /* tx only queues exist for this queue */
+					next_state = BNX2X_Q_STATE_MULTI_COS;
+			} else
 				next_state = BNX2X_Q_STATE_INACTIVE;
 		}
 
@@ -4805,6 +5004,7 @@
 		DP(BNX2X_MSG_SP, "Good state transition: %d(%d)->%d\n",
 				 state, cmd, next_state);
 		o->next_state = next_state;
+		o->next_tx_only = next_tx_only;
 		return 0;
 	}
 
@@ -4815,12 +5015,17 @@
 
 void bnx2x_init_queue_obj(struct bnx2x *bp,
 			  struct bnx2x_queue_sp_obj *obj,
-			  u8 cl_id, u32 cid, u8 func_id, void *rdata,
+			  u8 cl_id, u32 *cids, u8 cid_cnt, u8 func_id,
+			  void *rdata,
 			  dma_addr_t rdata_mapping, unsigned long type)
 {
 	memset(obj, 0, sizeof(*obj));
 
-	obj->cid = cid;
+	/* We support only BNX2X_MULTI_TX_COS Tx CoS at the moment */
+	BUG_ON(BNX2X_MULTI_TX_COS < cid_cnt);
+
+	memcpy(obj->cids, cids, sizeof(obj->cids[0]) * cid_cnt);
+	obj->max_cos = cid_cnt;
 	obj->cl_id = cl_id;
 	obj->func_id = func_id;
 	obj->rdata = rdata;
@@ -4840,6 +5045,13 @@
 	obj->set_pending = bnx2x_queue_set_pending;
 }
 
+void bnx2x_queue_set_cos_cid(struct bnx2x *bp,
+			     struct bnx2x_queue_sp_obj *obj,
+			     u32 cid, u8 index)
+{
+	obj->cids[index] = cid;
+}
+
 /********************** Function state object *********************************/
 
 static int bnx2x_func_wait_comp(struct bnx2x *bp,