Merge of rsync://rsync.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.git/
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index f79b02e..4d2bdbd 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -420,7 +420,8 @@
 {
 	tw32(TG3PCI_MISC_HOST_CTRL,
 	     (tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT));
-	tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000000);
+	tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+		     (tp->last_tag << 24));
 	tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
 
 	tg3_cond_int(tp);
@@ -455,10 +456,16 @@
 {
 	tw32(TG3PCI_MISC_HOST_CTRL,
 		(tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT));
-	tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000000);
+	tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+		     tp->last_tag << 24);
 	mmiowb();
 
-	if (tg3_has_work(tp))
+	/* When doing tagged status, this work check is unnecessary.
+	 * The last_tag we write above tells the chip which piece of
+	 * work we've completed.
+	 */
+	if (!(tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) &&
+	    tg3_has_work(tp))
 		tw32(HOSTCC_MODE, tp->coalesce_mode |
 		     (HOSTCC_MODE_ENABLE | HOSTCC_MODE_NOW));
 }
@@ -2500,7 +2507,7 @@
 	if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
 		if (netif_carrier_ok(tp->dev)) {
 			tw32(HOSTCC_STAT_COAL_TICKS,
-			     DEFAULT_STAT_COAL_TICKS);
+			     tp->coal.stats_block_coalesce_usecs);
 		} else {
 			tw32(HOSTCC_STAT_COAL_TICKS, 0);
 		}
@@ -2886,7 +2893,6 @@
 	 * All RX "locking" is done by ensuring outside
 	 * code synchronizes with dev->poll()
 	 */
-	done = 1;
 	if (sblk->idx[0].rx_producer != tp->rx_rcb_ptr) {
 		int orig_budget = *budget;
 		int work_done;
@@ -2898,12 +2904,14 @@
 
 		*budget -= work_done;
 		netdev->quota -= work_done;
-
-		if (work_done >= orig_budget)
-			done = 0;
 	}
 
+	if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)
+		tp->last_tag = sblk->status_tag;
+	rmb();
+
 	/* if no more work, tell net stack and NIC we're done */
+	done = !tg3_has_work(tp);
 	if (done) {
 		spin_lock_irqsave(&tp->lock, flags);
 		__netif_rx_complete(netdev);
@@ -2928,22 +2936,21 @@
 	spin_lock_irqsave(&tp->lock, flags);
 
 	/*
-	 * writing any value to intr-mbox-0 clears PCI INTA# and
+	 * Writing any value to intr-mbox-0 clears PCI INTA# and
 	 * chip-internal interrupt pending events.
-	 * writing non-zero to intr-mbox-0 additional tells the
+	 * Writing non-zero to intr-mbox-0 additional tells the
 	 * NIC to stop sending us irqs, engaging "in-intr-handler"
 	 * event coalescing.
 	 */
 	tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001);
+	tp->last_tag = sblk->status_tag;
 	sblk->status &= ~SD_STATUS_UPDATED;
-
 	if (likely(tg3_has_work(tp)))
 		netif_rx_schedule(dev);		/* schedule NAPI poll */
 	else {
-		/* no work, re-enable interrupts
-		 */
+		/* No work, re-enable interrupts.  */
 		tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
-			     0x00000000);
+			     tp->last_tag << 24);
 	}
 
 	spin_unlock_irqrestore(&tp->lock, flags);
@@ -2969,6 +2976,52 @@
 	if ((sblk->status & SD_STATUS_UPDATED) ||
 	    !(tr32(TG3PCI_PCISTATE) & PCISTATE_INT_NOT_ACTIVE)) {
 		/*
+		 * Writing any value to intr-mbox-0 clears PCI INTA# and
+		 * chip-internal interrupt pending events.
+		 * Writing non-zero to intr-mbox-0 additional tells the
+		 * NIC to stop sending us irqs, engaging "in-intr-handler"
+		 * event coalescing.
+		 */
+		tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+			     0x00000001);
+		sblk->status &= ~SD_STATUS_UPDATED;
+		if (likely(tg3_has_work(tp)))
+			netif_rx_schedule(dev);		/* schedule NAPI poll */
+		else {
+			/* No work, shared interrupt perhaps?  re-enable
+			 * interrupts, and flush that PCI write
+			 */
+			tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+			     	0x00000000);
+			tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+		}
+	} else {	/* shared interrupt */
+		handled = 0;
+	}
+
+	spin_unlock_irqrestore(&tp->lock, flags);
+
+	return IRQ_RETVAL(handled);
+}
+
+static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct net_device *dev = dev_id;
+	struct tg3 *tp = netdev_priv(dev);
+	struct tg3_hw_status *sblk = tp->hw_status;
+	unsigned long flags;
+	unsigned int handled = 1;
+
+	spin_lock_irqsave(&tp->lock, flags);
+
+	/* In INTx mode, it is possible for the interrupt to arrive at
+	 * the CPU before the status block posted prior to the interrupt.
+	 * Reading the PCI State register will confirm whether the
+	 * interrupt is ours and will flush the status block.
+	 */
+	if ((sblk->status & SD_STATUS_UPDATED) ||
+	    !(tr32(TG3PCI_PCISTATE) & PCISTATE_INT_NOT_ACTIVE)) {
+		/*
 		 * writing any value to intr-mbox-0 clears PCI INTA# and
 		 * chip-internal interrupt pending events.
 		 * writing non-zero to intr-mbox-0 additional tells the
@@ -2977,13 +3030,8 @@
 		 */
 		tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
 			     0x00000001);
-		/*
-		 * Flush PCI write.  This also guarantees that our
-		 * status block has been flushed to host memory.
-		 */
-		tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+		tp->last_tag = sblk->status_tag;
 		sblk->status &= ~SD_STATUS_UPDATED;
-
 		if (likely(tg3_has_work(tp)))
 			netif_rx_schedule(dev);		/* schedule NAPI poll */
 		else {
@@ -2991,7 +3039,7 @@
 			 * interrupts, and flush that PCI write
 			 */
 			tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
-			     	0x00000000);
+				     tp->last_tag << 24);
 			tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
 		}
 	} else {	/* shared interrupt */
@@ -5044,6 +5092,27 @@
 }
 
 static void __tg3_set_rx_mode(struct net_device *);
+static void tg3_set_coalesce(struct tg3 *tp, struct ethtool_coalesce *ec)
+{
+	tw32(HOSTCC_RXCOL_TICKS, ec->rx_coalesce_usecs);
+	tw32(HOSTCC_TXCOL_TICKS, ec->tx_coalesce_usecs);
+	tw32(HOSTCC_RXMAX_FRAMES, ec->rx_max_coalesced_frames);
+	tw32(HOSTCC_TXMAX_FRAMES, ec->tx_max_coalesced_frames);
+	if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
+		tw32(HOSTCC_RXCOAL_TICK_INT, ec->rx_coalesce_usecs_irq);
+		tw32(HOSTCC_TXCOAL_TICK_INT, ec->tx_coalesce_usecs_irq);
+	}
+	tw32(HOSTCC_RXCOAL_MAXF_INT, ec->rx_max_coalesced_frames_irq);
+	tw32(HOSTCC_TXCOAL_MAXF_INT, ec->tx_max_coalesced_frames_irq);
+	if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
+		u32 val = ec->stats_block_coalesce_usecs;
+
+		if (!netif_carrier_ok(tp->dev))
+			val = 0;
+
+		tw32(HOSTCC_STAT_COAL_TICKS, val);
+	}
+}
 
 /* tp->lock is held. */
 static int tg3_reset_hw(struct tg3 *tp)
@@ -5366,16 +5435,7 @@
 		udelay(10);
 	}
 
-	tw32(HOSTCC_RXCOL_TICKS, 0);
-	tw32(HOSTCC_TXCOL_TICKS, LOW_TXCOL_TICKS);
-	tw32(HOSTCC_RXMAX_FRAMES, 1);
-	tw32(HOSTCC_TXMAX_FRAMES, LOW_RXMAX_FRAMES);
-	if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
-		tw32(HOSTCC_RXCOAL_TICK_INT, 0);
-		tw32(HOSTCC_TXCOAL_TICK_INT, 0);
-	}
-	tw32(HOSTCC_RXCOAL_MAXF_INT, 1);
-	tw32(HOSTCC_TXCOAL_MAXF_INT, 0);
+	tg3_set_coalesce(tp, &tp->coal);
 
 	/* set status block DMA address */
 	tw32(HOSTCC_STATUS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH,
@@ -5388,8 +5448,6 @@
 		 * the tg3_periodic_fetch_stats call there, and
 		 * tg3_get_stats to see how this works for 5705/5750 chips.
 		 */
-		tw32(HOSTCC_STAT_COAL_TICKS,
-		     DEFAULT_STAT_COAL_TICKS);
 		tw32(HOSTCC_STATS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH,
 		     ((u64) tp->stats_mapping >> 32));
 		tw32(HOSTCC_STATS_BLK_HOST_ADDR + TG3_64BIT_REG_LOW,
@@ -5445,7 +5503,8 @@
 	udelay(100);
 
 	tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0);
-	tr32(MAILBOX_INTERRUPT_0);
+	tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+	tp->last_tag = 0;
 
 	if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
 		tw32_f(DMAC_MODE, DMAC_MODE_ENABLE);
@@ -5723,31 +5782,33 @@
 	spin_lock_irqsave(&tp->lock, flags);
 	spin_lock(&tp->tx_lock);
 
-	/* All of this garbage is because when using non-tagged
-	 * IRQ status the mailbox/status_block protocol the chip
-	 * uses with the cpu is race prone.
-	 */
-	if (tp->hw_status->status & SD_STATUS_UPDATED) {
-		tw32(GRC_LOCAL_CTRL,
-		     tp->grc_local_ctrl | GRC_LCLCTRL_SETINT);
-	} else {
-		tw32(HOSTCC_MODE, tp->coalesce_mode |
-		     (HOSTCC_MODE_ENABLE | HOSTCC_MODE_NOW));
-	}
+	if (!(tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)) {
+		/* All of this garbage is because when using non-tagged
+		 * IRQ status the mailbox/status_block protocol the chip
+		 * uses with the cpu is race prone.
+		 */
+		if (tp->hw_status->status & SD_STATUS_UPDATED) {
+			tw32(GRC_LOCAL_CTRL,
+			     tp->grc_local_ctrl | GRC_LCLCTRL_SETINT);
+		} else {
+			tw32(HOSTCC_MODE, tp->coalesce_mode |
+			     (HOSTCC_MODE_ENABLE | HOSTCC_MODE_NOW));
+		}
 
-	if (!(tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
-		tp->tg3_flags2 |= TG3_FLG2_RESTART_TIMER;
-		spin_unlock(&tp->tx_lock);
-		spin_unlock_irqrestore(&tp->lock, flags);
-		schedule_work(&tp->reset_task);
-		return;
+		if (!(tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
+			tp->tg3_flags2 |= TG3_FLG2_RESTART_TIMER;
+			spin_unlock(&tp->tx_lock);
+			spin_unlock_irqrestore(&tp->lock, flags);
+			schedule_work(&tp->reset_task);
+			return;
+		}
 	}
 
-	if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS)
-		tg3_periodic_fetch_stats(tp);
-
 	/* This part only runs once per second. */
 	if (!--tp->timer_counter) {
+		if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS)
+			tg3_periodic_fetch_stats(tp);
+
 		if (tp->tg3_flags & TG3_FLAG_USE_LINKCHG_REG) {
 			u32 mac_stat;
 			int phy_event;
@@ -5846,9 +5907,13 @@
 	if (tp->tg3_flags2 & TG3_FLG2_USING_MSI)
 		err = request_irq(tp->pdev->irq, tg3_msi,
 				  SA_SAMPLE_RANDOM, dev->name, dev);
-	else
-		err = request_irq(tp->pdev->irq, tg3_interrupt,
+	else {
+		irqreturn_t (*fn)(int, void *, struct pt_regs *)=tg3_interrupt;
+		if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)
+			fn = tg3_interrupt_tagged;
+		err = request_irq(tp->pdev->irq, fn,
 				  SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev);
+	}
 
 	if (err)
 		return err;
@@ -5900,9 +5965,14 @@
 
 	tp->tg3_flags2 &= ~TG3_FLG2_USING_MSI;
 
-	err = request_irq(tp->pdev->irq, tg3_interrupt,
-			  SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev);
+	{
+		irqreturn_t (*fn)(int, void *, struct pt_regs *)=tg3_interrupt;
+		if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)
+			fn = tg3_interrupt_tagged;
 
+		err = request_irq(tp->pdev->irq, fn,
+				  SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev);
+	}
 	if (err)
 		return err;
 
@@ -5948,7 +6018,13 @@
 	if ((tp->tg3_flags2 & TG3_FLG2_5750_PLUS) &&
 	    (GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5750_AX) &&
 	    (GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5750_BX)) {
-		if (pci_enable_msi(tp->pdev) == 0) {
+		/* All MSI supporting chips should support tagged
+		 * status.  Assert that this is the case.
+		 */
+		if (!(tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)) {
+			printk(KERN_WARNING PFX "%s: MSI without TAGGED? "
+			       "Not using MSI.\n", tp->dev->name);
+		} else if (pci_enable_msi(tp->pdev) == 0) {
 			u32 msi_mode;
 
 			msi_mode = tr32(MSGINT_MODE);
@@ -5959,9 +6035,14 @@
 	if (tp->tg3_flags2 & TG3_FLG2_USING_MSI)
 		err = request_irq(tp->pdev->irq, tg3_msi,
 				  SA_SAMPLE_RANDOM, dev->name, dev);
-	else
-		err = request_irq(tp->pdev->irq, tg3_interrupt,
+	else {
+		irqreturn_t (*fn)(int, void *, struct pt_regs *)=tg3_interrupt;
+		if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)
+			fn = tg3_interrupt_tagged;
+
+		err = request_irq(tp->pdev->irq, fn,
 				  SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev);
+	}
 
 	if (err) {
 		if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) {
@@ -5980,9 +6061,16 @@
 		tg3_halt(tp, 1);
 		tg3_free_rings(tp);
 	} else {
-		tp->timer_offset = HZ / 10;
-		tp->timer_counter = tp->timer_multiplier = 10;
-		tp->asf_counter = tp->asf_multiplier = (10 * 120);
+		if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)
+			tp->timer_offset = HZ;
+		else
+			tp->timer_offset = HZ / 10;
+
+		BUG_ON(tp->timer_offset > HZ);
+		tp->timer_counter = tp->timer_multiplier =
+			(HZ / tp->timer_offset);
+		tp->asf_counter = tp->asf_multiplier =
+			((HZ / tp->timer_offset) * 120);
 
 		init_timer(&tp->timer);
 		tp->timer.expires = jiffies + tp->timer_offset;
@@ -6005,6 +6093,7 @@
 
 	if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) {
 		err = tg3_test_msi(tp);
+
 		if (err) {
 			spin_lock_irq(&tp->lock);
 			spin_lock(&tp->tx_lock);
@@ -7203,6 +7292,14 @@
 }
 #endif
 
+static int tg3_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+{
+	struct tg3 *tp = netdev_priv(dev);
+
+	memcpy(ec, &tp->coal, sizeof(*ec));
+	return 0;
+}
+
 static struct ethtool_ops tg3_ethtool_ops = {
 	.get_settings		= tg3_get_settings,
 	.set_settings		= tg3_set_settings,
@@ -7235,6 +7332,7 @@
 	.get_strings		= tg3_get_strings,
 	.get_stats_count	= tg3_get_stats_count,
 	.get_ethtool_stats	= tg3_get_ethtool_stats,
+	.get_coalesce		= tg3_get_coalesce,
 };
 
 static void __devinit tg3_get_eeprom_size(struct tg3 *tp)
@@ -8422,15 +8520,7 @@
 	if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS)
 		tp->tg3_flags2 |= TG3_FLG2_PHY_BER_BUG;
 
-	/* Only 5701 and later support tagged irq status mode.
-	 * Also, 5788 chips cannot use tagged irq status.
-	 *
-	 * However, since we are using NAPI avoid tagged irq status
-	 * because the interrupt condition is more difficult to
-	 * fully clear in that mode.
-	 */
 	tp->coalesce_mode = 0;
-
 	if (GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5700_AX &&
 	    GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5700_BX)
 		tp->coalesce_mode |= HOSTCC_MODE_32BYTE;
@@ -8494,6 +8584,18 @@
 	     grc_misc_cfg == GRC_MISC_CFG_BOARD_ID_5788M))
 		tp->tg3_flags2 |= TG3_FLG2_IS_5788;
 
+	if (!(tp->tg3_flags2 & TG3_FLG2_IS_5788) &&
+	    (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700))
+		tp->tg3_flags |= TG3_FLAG_TAGGED_STATUS;
+	if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) {
+		tp->coalesce_mode |= (HOSTCC_MODE_CLRTICK_RXBD |
+				      HOSTCC_MODE_CLRTICK_TXBD);
+
+		tp->misc_host_ctrl |= MISC_HOST_CTRL_TAGGED_STATUS;
+		pci_write_config_dword(tp->pdev, TG3PCI_MISC_HOST_CTRL,
+				       tp->misc_host_ctrl);
+	}
+
 	/* these are limited to 10/100 only */
 	if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 &&
 	     (grc_misc_cfg == 0x8000 || grc_misc_cfg == 0x4000)) ||
@@ -8671,6 +8773,146 @@
 	return 0;
 }
 
+#define BOUNDARY_SINGLE_CACHELINE	1
+#define BOUNDARY_MULTI_CACHELINE	2
+
+static u32 __devinit tg3_calc_dma_bndry(struct tg3 *tp, u32 val)
+{
+	int cacheline_size;
+	u8 byte;
+	int goal;
+
+	pci_read_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE, &byte);
+	if (byte == 0)
+		cacheline_size = 1024;
+	else
+		cacheline_size = (int) byte * 4;
+
+	/* On 5703 and later chips, the boundary bits have no
+	 * effect.
+	 */
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700 &&
+	    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701 &&
+	    !(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS))
+		goto out;
+
+#if defined(CONFIG_PPC64) || defined(CONFIG_IA64) || defined(CONFIG_PARISC)
+	goal = BOUNDARY_MULTI_CACHELINE;
+#else
+#if defined(CONFIG_SPARC64) || defined(CONFIG_ALPHA)
+	goal = BOUNDARY_SINGLE_CACHELINE;
+#else
+	goal = 0;
+#endif
+#endif
+
+	if (!goal)
+		goto out;
+
+	/* PCI controllers on most RISC systems tend to disconnect
+	 * when a device tries to burst across a cache-line boundary.
+	 * Therefore, letting tg3 do so just wastes PCI bandwidth.
+	 *
+	 * Unfortunately, for PCI-E there are only limited
+	 * write-side controls for this, and thus for reads
+	 * we will still get the disconnects.  We'll also waste
+	 * these PCI cycles for both read and write for chips
+	 * other than 5700 and 5701 which do not implement the
+	 * boundary bits.
+	 */
+	if ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) &&
+	    !(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) {
+		switch (cacheline_size) {
+		case 16:
+		case 32:
+		case 64:
+		case 128:
+			if (goal == BOUNDARY_SINGLE_CACHELINE) {
+				val |= (DMA_RWCTRL_READ_BNDRY_128_PCIX |
+					DMA_RWCTRL_WRITE_BNDRY_128_PCIX);
+			} else {
+				val |= (DMA_RWCTRL_READ_BNDRY_384_PCIX |
+					DMA_RWCTRL_WRITE_BNDRY_384_PCIX);
+			}
+			break;
+
+		case 256:
+			val |= (DMA_RWCTRL_READ_BNDRY_256_PCIX |
+				DMA_RWCTRL_WRITE_BNDRY_256_PCIX);
+			break;
+
+		default:
+			val |= (DMA_RWCTRL_READ_BNDRY_384_PCIX |
+				DMA_RWCTRL_WRITE_BNDRY_384_PCIX);
+			break;
+		};
+	} else if (tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS) {
+		switch (cacheline_size) {
+		case 16:
+		case 32:
+		case 64:
+			if (goal == BOUNDARY_SINGLE_CACHELINE) {
+				val &= ~DMA_RWCTRL_WRITE_BNDRY_DISAB_PCIE;
+				val |= DMA_RWCTRL_WRITE_BNDRY_64_PCIE;
+				break;
+			}
+			/* fallthrough */
+		case 128:
+		default:
+			val &= ~DMA_RWCTRL_WRITE_BNDRY_DISAB_PCIE;
+			val |= DMA_RWCTRL_WRITE_BNDRY_128_PCIE;
+			break;
+		};
+	} else {
+		switch (cacheline_size) {
+		case 16:
+			if (goal == BOUNDARY_SINGLE_CACHELINE) {
+				val |= (DMA_RWCTRL_READ_BNDRY_16 |
+					DMA_RWCTRL_WRITE_BNDRY_16);
+				break;
+			}
+			/* fallthrough */
+		case 32:
+			if (goal == BOUNDARY_SINGLE_CACHELINE) {
+				val |= (DMA_RWCTRL_READ_BNDRY_32 |
+					DMA_RWCTRL_WRITE_BNDRY_32);
+				break;
+			}
+			/* fallthrough */
+		case 64:
+			if (goal == BOUNDARY_SINGLE_CACHELINE) {
+				val |= (DMA_RWCTRL_READ_BNDRY_64 |
+					DMA_RWCTRL_WRITE_BNDRY_64);
+				break;
+			}
+			/* fallthrough */
+		case 128:
+			if (goal == BOUNDARY_SINGLE_CACHELINE) {
+				val |= (DMA_RWCTRL_READ_BNDRY_128 |
+					DMA_RWCTRL_WRITE_BNDRY_128);
+				break;
+			}
+			/* fallthrough */
+		case 256:
+			val |= (DMA_RWCTRL_READ_BNDRY_256 |
+				DMA_RWCTRL_WRITE_BNDRY_256);
+			break;
+		case 512:
+			val |= (DMA_RWCTRL_READ_BNDRY_512 |
+				DMA_RWCTRL_WRITE_BNDRY_512);
+			break;
+		case 1024:
+		default:
+			val |= (DMA_RWCTRL_READ_BNDRY_1024 |
+				DMA_RWCTRL_WRITE_BNDRY_1024);
+			break;
+		};
+	}
+
+out:
+	return val;
+}
+
 static int __devinit tg3_do_test_dma(struct tg3 *tp, u32 *buf, dma_addr_t buf_dma, int size, int to_device)
 {
 	struct tg3_internal_buffer_desc test_desc;
@@ -8757,7 +8999,7 @@
 static int __devinit tg3_test_dma(struct tg3 *tp)
 {
 	dma_addr_t buf_dma;
-	u32 *buf;
+	u32 *buf, saved_dma_rwctrl;
 	int ret;
 
 	buf = pci_alloc_consistent(tp->pdev, TEST_BUFFER_SIZE, &buf_dma);
@@ -8769,46 +9011,7 @@
 	tp->dma_rwctrl = ((0x7 << DMA_RWCTRL_PCI_WRITE_CMD_SHIFT) |
 			  (0x6 << DMA_RWCTRL_PCI_READ_CMD_SHIFT));
 
-#ifndef CONFIG_X86
-	{
-		u8 byte;
-		int cacheline_size;
-		pci_read_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE, &byte);
-
-		if (byte == 0)
-			cacheline_size = 1024;
-		else
-			cacheline_size = (int) byte * 4;
-
-		switch (cacheline_size) {
-		case 16:
-		case 32:
-		case 64:
-		case 128:
-			if ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) &&
-			    !(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) {
-				tp->dma_rwctrl |=
-					DMA_RWCTRL_WRITE_BNDRY_384_PCIX;
-				break;
-			} else if (tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS) {
-				tp->dma_rwctrl &=
-					~(DMA_RWCTRL_PCI_WRITE_CMD);
-				tp->dma_rwctrl |=
-					DMA_RWCTRL_WRITE_BNDRY_128_PCIE;
-				break;
-			}
-			/* fallthrough */
-		case 256:
-			if (!(tp->tg3_flags & TG3_FLAG_PCIX_MODE) &&
-			    !(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS))
-				tp->dma_rwctrl |=
-					DMA_RWCTRL_WRITE_BNDRY_256;
-			else if (!(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS))
-				tp->dma_rwctrl |=
-					DMA_RWCTRL_WRITE_BNDRY_256_PCIX;
-		};
-	}
-#endif
+	tp->dma_rwctrl = tg3_calc_dma_bndry(tp, tp->dma_rwctrl);
 
 	if (tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS) {
 		/* DMA read watermark not used on PCIE */
@@ -8827,7 +9030,7 @@
 			if (ccval == 0x6 || ccval == 0x7)
 				tp->dma_rwctrl |= DMA_RWCTRL_ONE_DMA;
 
-			/* Set bit 23 to renable PCIX hw bug fix */
+			/* Set bit 23 to enable PCIX hw bug fix */
 			tp->dma_rwctrl |= 0x009f0000;
 		} else {
 			tp->dma_rwctrl |= 0x001b000f;
@@ -8868,6 +9071,13 @@
 	    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701)
 		goto out;
 
+	/* It is best to perform DMA test with maximum write burst size
+	 * to expose the 5700/5701 write DMA bug.
+	 */
+	saved_dma_rwctrl = tp->dma_rwctrl;
+	tp->dma_rwctrl &= ~DMA_RWCTRL_WRITE_BNDRY_MASK;
+	tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl);
+
 	while (1) {
 		u32 *p = buf, i;
 
@@ -8906,8 +9116,9 @@
 			if (p[i] == i)
 				continue;
 
-			if ((tp->dma_rwctrl & DMA_RWCTRL_WRITE_BNDRY_MASK) ==
-			    DMA_RWCTRL_WRITE_BNDRY_DISAB) {
+			if ((tp->dma_rwctrl & DMA_RWCTRL_WRITE_BNDRY_MASK) !=
+			    DMA_RWCTRL_WRITE_BNDRY_16) {
+				tp->dma_rwctrl &= ~DMA_RWCTRL_WRITE_BNDRY_MASK;
 				tp->dma_rwctrl |= DMA_RWCTRL_WRITE_BNDRY_16;
 				tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl);
 				break;
@@ -8924,6 +9135,14 @@
 			break;
 		}
 	}
+	if ((tp->dma_rwctrl & DMA_RWCTRL_WRITE_BNDRY_MASK) !=
+	    DMA_RWCTRL_WRITE_BNDRY_16) {
+		/* DMA test passed without adjusting DMA boundary,
+		 * just restore the calculated DMA boundary
+		 */
+		tp->dma_rwctrl = saved_dma_rwctrl;
+		tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl);
+	}
 
 out:
 	pci_free_consistent(tp->pdev, TEST_BUFFER_SIZE, buf, buf_dma);
@@ -9011,6 +9230,31 @@
 	return peer;
 }
 
+static void __devinit tg3_init_coal(struct tg3 *tp)
+{
+	struct ethtool_coalesce *ec = &tp->coal;
+
+	memset(ec, 0, sizeof(*ec));
+	ec->cmd = ETHTOOL_GCOALESCE;
+	ec->rx_coalesce_usecs = LOW_RXCOL_TICKS;
+	ec->tx_coalesce_usecs = LOW_TXCOL_TICKS;
+	ec->rx_max_coalesced_frames = LOW_RXMAX_FRAMES;
+	ec->tx_max_coalesced_frames = LOW_TXMAX_FRAMES;
+	ec->rx_coalesce_usecs_irq = DEFAULT_RXCOAL_TICK_INT;
+	ec->tx_coalesce_usecs_irq = DEFAULT_TXCOAL_TICK_INT;
+	ec->rx_max_coalesced_frames_irq = DEFAULT_RXCOAL_MAXF_INT;
+	ec->tx_max_coalesced_frames_irq = DEFAULT_TXCOAL_MAXF_INT;
+	ec->stats_block_coalesce_usecs = DEFAULT_STAT_COAL_TICKS;
+
+	if (tp->coalesce_mode & (HOSTCC_MODE_CLRTICK_RXBD |
+				 HOSTCC_MODE_CLRTICK_TXBD)) {
+		ec->rx_coalesce_usecs = LOW_RXCOL_TICKS_CLRTCKS;
+		ec->rx_coalesce_usecs_irq = DEFAULT_RXCOAL_TICK_INT_CLRTCKS;
+		ec->tx_coalesce_usecs = LOW_TXCOL_TICKS_CLRTCKS;
+		ec->tx_coalesce_usecs_irq = DEFAULT_TXCOAL_TICK_INT_CLRTCKS;
+	}
+}
+
 static int __devinit tg3_init_one(struct pci_dev *pdev,
 				  const struct pci_device_id *ent)
 {
@@ -9256,6 +9500,8 @@
 	/* flow control autonegotiation is default behavior */
 	tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG;
 
+	tg3_init_coal(tp);
+
 	err = register_netdev(dev);
 	if (err) {
 		printk(KERN_ERR PFX "Cannot register net device, "
@@ -9298,6 +9544,8 @@
 	       (tp->tg3_flags & TG3_FLAG_SPLIT_MODE) != 0,
 	       (tp->tg3_flags2 & TG3_FLG2_NO_ETH_WIRE_SPEED) == 0,
 	       (tp->tg3_flags2 & TG3_FLG2_TSO_CAPABLE) != 0);
+	printk(KERN_INFO "%s: dma_rwctrl[%08x]\n",
+	       dev->name, tp->dma_rwctrl);
 
 	return 0;
 
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 8de6f21..993f84c 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -876,10 +876,12 @@
 #define  HOSTCC_STATUS_ERROR_ATTN	 0x00000004
 #define HOSTCC_RXCOL_TICKS		0x00003c08
 #define  LOW_RXCOL_TICKS		 0x00000032
+#define  LOW_RXCOL_TICKS_CLRTCKS	 0x00000014
 #define  DEFAULT_RXCOL_TICKS		 0x00000048
 #define  HIGH_RXCOL_TICKS		 0x00000096
 #define HOSTCC_TXCOL_TICKS		0x00003c0c
 #define  LOW_TXCOL_TICKS		 0x00000096
+#define  LOW_TXCOL_TICKS_CLRTCKS	 0x00000048
 #define  DEFAULT_TXCOL_TICKS		 0x0000012c
 #define  HIGH_TXCOL_TICKS		 0x00000145
 #define HOSTCC_RXMAX_FRAMES		0x00003c10
@@ -892,8 +894,10 @@
 #define  HIGH_TXMAX_FRAMES		 0x00000052
 #define HOSTCC_RXCOAL_TICK_INT		0x00003c18
 #define  DEFAULT_RXCOAL_TICK_INT	 0x00000019
+#define  DEFAULT_RXCOAL_TICK_INT_CLRTCKS 0x00000014
 #define HOSTCC_TXCOAL_TICK_INT		0x00003c1c
 #define  DEFAULT_TXCOAL_TICK_INT	 0x00000019
+#define  DEFAULT_TXCOAL_TICK_INT_CLRTCKS 0x00000014
 #define HOSTCC_RXCOAL_MAXF_INT		0x00003c20
 #define  DEFAULT_RXCOAL_MAXF_INT	 0x00000005
 #define HOSTCC_TXCOAL_MAXF_INT		0x00003c24
@@ -2023,6 +2027,7 @@
 
 	struct tg3_hw_status		*hw_status;
 	dma_addr_t			status_mapping;
+	u32				last_tag;
 
 	u32				msg_enable;
 
@@ -2068,6 +2073,7 @@
 
 	u32				rx_offset;
 	u32				tg3_flags;
+#define TG3_FLAG_TAGGED_STATUS		0x00000001
 #define TG3_FLAG_TXD_MBOX_HWBUG		0x00000002
 #define TG3_FLAG_RX_CHECKSUMS		0x00000004
 #define TG3_FLAG_USE_LINKCHG_REG	0x00000008
@@ -2225,7 +2231,7 @@
 
 #define SST_25VF0X0_PAGE_SIZE		4098
 
-
+	struct ethtool_coalesce		coal;
 };
 
 #endif /* !(_T3_H) */
diff --git a/fs/namei.c b/fs/namei.c
index defe678..dd78f01 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1580,6 +1580,7 @@
 fail:
 	return dentry;
 }
+EXPORT_SYMBOL_GPL(lookup_create);
 
 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 {
diff --git a/include/net/act_generic.h b/include/net/act_generic.h
index 95b1207..c9daa7e 100644
--- a/include/net/act_generic.h
+++ b/include/net/act_generic.h
@@ -2,8 +2,8 @@
  * include/net/act_generic.h
  *
 */
-#ifndef ACT_GENERIC_H
-#define ACT_GENERIC_H
+#ifndef _NET_ACT_GENERIC_H
+#define _NET_ACT_GENERIC_H
 static inline int tcf_defact_release(struct tcf_defact *p, int bind)
 {
 	int ret = 0;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index daebd93..760dc82 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -490,6 +490,14 @@
 			/* Partially cloned skb? */
 			if (skb_shared(frag))
 				goto slow_path;
+
+			BUG_ON(frag->sk);
+			if (skb->sk) {
+				sock_hold(skb->sk);
+				frag->sk = skb->sk;
+				frag->destructor = sock_wfree;
+				skb->truesize -= frag->truesize;
+			}
 		}
 
 		/* Everything is OK. Generate! */
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index faa6176..de21da0 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -508,7 +508,6 @@
 			rc = NF_ACCEPT;
 		/* do not touch skb anymore */
 		atomic_inc(&cp->in_pkts);
-		__ip_vs_conn_put(cp);
 		goto out;
 	}
 
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 28d9425..09e8246 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -940,37 +940,25 @@
 struct sk_buff *
 ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
-	struct sock *sk = skb->sk;
 #ifdef CONFIG_NETFILTER_DEBUG
 	unsigned int olddebug = skb->nf_debug;
 #endif
 
-	if (sk) {
-		sock_hold(sk);
-		skb_orphan(skb);
-	}
+	skb_orphan(skb);
 
 	local_bh_disable(); 
 	skb = ip_defrag(skb, user);
 	local_bh_enable();
 
-	if (!skb) {
-		if (sk)
-			sock_put(sk);
-		return skb;
-	}
-
-	if (sk) {
-		skb_set_owner_w(skb, sk);
-		sock_put(sk);
-	}
-
-	ip_send_check(skb->nh.iph);
-	skb->nfcache |= NFC_ALTERED;
+	if (skb) {
+		ip_send_check(skb->nh.iph);
+		skb->nfcache |= NFC_ALTERED;
 #ifdef CONFIG_NETFILTER_DEBUG
-	/* Packet path as if nothing had happened. */
-	skb->nf_debug = olddebug;
+		/* Packet path as if nothing had happened. */
+		skb->nf_debug = olddebug;
 #endif
+	}
+
 	return skb;
 }
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0f07114..b78a535 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -552,13 +552,17 @@
 			    skb_headroom(frag) < hlen)
 			    goto slow_path;
 
-			/* Correct socket ownership. */
-			if (frag->sk == NULL)
-				goto slow_path;
-
 			/* Partially cloned skb? */
 			if (skb_shared(frag))
 				goto slow_path;
+
+			BUG_ON(frag->sk);
+			if (skb->sk) {
+				sock_hold(skb->sk);
+				frag->sk = skb->sk;
+				frag->destructor = sock_wfree;
+				skb->truesize -= frag->truesize;
+			}
 		}
 
 		err = 0;
@@ -1116,12 +1120,10 @@
 		tail_skb = &(tmp_skb->next);
 		skb->len += tmp_skb->len;
 		skb->data_len += tmp_skb->len;
-#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
 		skb->truesize += tmp_skb->truesize;
 		__sock_put(tmp_skb->sk);
 		tmp_skb->destructor = NULL;
 		tmp_skb->sk = NULL;
-#endif
 	}
 
 	ipv6_addr_copy(final_dst, &fl->fl6_dst);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 733bf52..e41ce45 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -735,11 +735,15 @@
 
 	sock_hold(sk);
 	if (p->skb2 == NULL) {
-		if (atomic_read(&p->skb->users) != 1) {
+		if (skb_shared(p->skb)) {
 			p->skb2 = skb_clone(p->skb, p->allocation);
 		} else {
-			p->skb2 = p->skb;
-			atomic_inc(&p->skb->users);
+			p->skb2 = skb_get(p->skb);
+			/*
+			 * skb ownership may have been set when
+			 * delivered to a previous socket.
+			 */
+			skb_orphan(p->skb2);
 		}
 	}
 	if (p->skb2 == NULL) {
@@ -785,11 +789,12 @@
 	sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
 		do_one_broadcast(sk, &info);
 
+	kfree_skb(skb);
+
 	netlink_unlock_table();
 
 	if (info.skb2)
 		kfree_skb(info.skb2);
-	kfree_skb(skb);
 
 	if (info.delivered) {
 		if (info.congested && (allocation & __GFP_WAIT))
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c478fc8..c420eba 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -770,33 +770,12 @@
 		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
 		if (err)
 			goto out_mknod_parent;
-		/*
-		 * Yucky last component or no last component at all?
-		 * (foo/., foo/.., /////)
-		 */
-		err = -EEXIST;
-		if (nd.last_type != LAST_NORM)
-			goto out_mknod;
-		/*
-		 * Lock the directory.
-		 */
-		down(&nd.dentry->d_inode->i_sem);
-		/*
-		 * Do the final lookup.
-		 */
-		dentry = lookup_hash(&nd.last, nd.dentry);
+
+		dentry = lookup_create(&nd, 0);
 		err = PTR_ERR(dentry);
 		if (IS_ERR(dentry))
 			goto out_mknod_unlock;
-		err = -ENOENT;
-		/*
-		 * Special case - lookup gave negative, but... we had foo/bar/
-		 * From the vfs_mknod() POV we just have a negative dentry -
-		 * all is fine. Let's be bastards - you had / on the end, you've
-		 * been asking for (non-existent) directory. -ENOENT for you.
-		 */
-		if (nd.last.name[nd.last.len] && !dentry->d_inode)
-			goto out_mknod_dput;
+
 		/*
 		 * All right, let's create it.
 		 */
@@ -845,7 +824,6 @@
 	dput(dentry);
 out_mknod_unlock:
 	up(&nd.dentry->d_inode->i_sem);
-out_mknod:
 	path_release(&nd);
 out_mknod_parent:
 	if (err==-EEXIST)
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 080aae2..2f4531f 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -698,7 +698,7 @@
 				return -ENOMEM;
 
 			if (skb1->sk)
-				skb_set_owner_w(skb, skb1->sk);
+				skb_set_owner_w(skb2, skb1->sk);
 
 			/* Looking around. Are we still alive?
 			 * OK, link new skb, drop old one */
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 5ddda2c..9750901 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -34,14 +34,21 @@
 {
 	struct rtattr *rt = xfrma[type - 1];
 	struct xfrm_algo *algp;
+	int len;
 
 	if (!rt)
 		return 0;
 
-	if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp))
+	len = (rt->rta_len - sizeof(*rt)) - sizeof(*algp);
+	if (len < 0)
 		return -EINVAL;
 
 	algp = RTA_DATA(rt);
+
+	len -= (algp->alg_key_len + 7U) / 8; 
+	if (len < 0)
+		return -EINVAL;
+
 	switch (type) {
 	case XFRMA_ALG_AUTH:
 		if (!algp->alg_key_len &&
@@ -162,6 +169,7 @@
 	struct rtattr *rta = u_arg;
 	struct xfrm_algo *p, *ualg;
 	struct xfrm_algo_desc *algo;
+	int len;
 
 	if (!rta)
 		return 0;
@@ -173,11 +181,12 @@
 		return -ENOSYS;
 	*props = algo->desc.sadb_alg_id;
 
-	p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL);
+	len = sizeof(*ualg) + (ualg->alg_key_len + 7U) / 8;
+	p = kmalloc(len, GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 
-	memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len);
+	memcpy(p, ualg, len);
 	*algpp = p;
 	return 0;
 }