Merge commit 'AU_LINUX_ANDROID_ICS.04.00.04.00.126' into msm-3.4

AU_LINUX_ANDROID_ICS.04.00.04.00.126 from msm-3.0.
First parent is from google/android-3.4.

* commit 'AU_LINUX_ANDROID_ICS.04.00.04.00.126': (8712 commits)
  PRNG: Device tree entry for qrng device.
  vidc:1080p: Set video core timeout value for Thumbnail mode
  msm: sps: improve the debugging support in SPS driver
  board-8064 msm: Overlap secure and non secure video firmware heaps.
  msm: clock: Add handoff ops for 7x30 and copper XO clocks
  msm_fb: display: Wait for external vsync before DTV IOMMU unmap
  msm: Fix ciruclar dependency in debug UART settings
  msm: gdsc: Add GDSC regulator driver for msm-copper
  defconfig: Enable Mobicore Driver.
  mobicore: Add mobicore driver.
  mobicore: rename variable to lower case.
  mobicore: rename folder.
  mobicore: add makefiles
  mobicore: initial import of kernel driver
  ASoC: msm: Add SLIMBUS_2_RX CPU DAI
  board-8064-gpio: Update FUNC for EPM SPI CS
  msm_fb: display: Remove chicken bit config during video playback
  mmc: msm_sdcc: enable the sanitize capability
  msm-fb: display: lm2 writeback support on mpq platfroms
  msm_fb: display: Disable LVDS phy & pll during panel off
  ...

Signed-off-by: Steve Muckle <smuckle@codeaurora.org>
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index cee307e..e17e1f8 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2010-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -8,11 +8,6 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
  */
 
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
@@ -27,6 +22,7 @@
 #include <linux/slab.h>
 #include <linux/iommu.h>
 #include <linux/clk.h>
+#include <linux/scatterlist.h>
 
 #include <asm/cacheflush.h>
 #include <asm/sizes.h>
@@ -42,15 +38,34 @@
 #define RCP15_PRRR(reg)		MRC(reg, p15, 0, c10, c2, 0)
 #define RCP15_NMRR(reg)		MRC(reg, p15, 0, c10, c2, 1)
 
+/* Sharability attributes of MSM IOMMU mappings */
+#define MSM_IOMMU_ATTR_NON_SH		0x0
+#define MSM_IOMMU_ATTR_SH		0x4
+
+/* Cacheability attributes of MSM IOMMU mappings */
+#define MSM_IOMMU_ATTR_NONCACHED	0x0
+#define MSM_IOMMU_ATTR_CACHED_WB_WA	0x1
+#define MSM_IOMMU_ATTR_CACHED_WB_NWA	0x2
+#define MSM_IOMMU_ATTR_CACHED_WT	0x3
+
+
+static inline void clean_pte(unsigned long *start, unsigned long *end,
+			     int redirect)
+{
+	if (!redirect)
+		dmac_flush_range(start, end);
+}
+
 /* bitmap of the page sizes currently supported */
 #define MSM_IOMMU_PGSIZES	(SZ_4K | SZ_64K | SZ_1M | SZ_16M)
 
 static int msm_iommu_tex_class[4];
 
-DEFINE_SPINLOCK(msm_iommu_lock);
+DEFINE_MUTEX(msm_iommu_lock);
 
 struct msm_priv {
 	unsigned long *pgtable;
+	int redirect;
 	struct list_head list_attached;
 };
 
@@ -58,14 +73,14 @@
 {
 	int ret;
 
-	ret = clk_enable(drvdata->pclk);
+	ret = clk_prepare_enable(drvdata->pclk);
 	if (ret)
 		goto fail;
 
 	if (drvdata->clk) {
-		ret = clk_enable(drvdata->clk);
+		ret = clk_prepare_enable(drvdata->clk);
 		if (ret)
-			clk_disable(drvdata->pclk);
+			clk_disable_unprepare(drvdata->pclk);
 	}
 fail:
 	return ret;
@@ -74,8 +89,40 @@
 static void __disable_clocks(struct msm_iommu_drvdata *drvdata)
 {
 	if (drvdata->clk)
-		clk_disable(drvdata->clk);
-	clk_disable(drvdata->pclk);
+		clk_disable_unprepare(drvdata->clk);
+	clk_disable_unprepare(drvdata->pclk);
+}
+
+static int __flush_iotlb_va(struct iommu_domain *domain, unsigned int va)
+{
+	struct msm_priv *priv = domain->priv;
+	struct msm_iommu_drvdata *iommu_drvdata;
+	struct msm_iommu_ctx_drvdata *ctx_drvdata;
+	int ret = 0;
+	int asid;
+
+	list_for_each_entry(ctx_drvdata, &priv->list_attached, attached_elm) {
+		if (!ctx_drvdata->pdev || !ctx_drvdata->pdev->dev.parent)
+			BUG();
+
+		iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
+		if (!iommu_drvdata)
+			BUG();
+
+		ret = __enable_clocks(iommu_drvdata);
+		if (ret)
+			goto fail;
+
+		asid = GET_CONTEXTIDR_ASID(iommu_drvdata->base,
+					   ctx_drvdata->num);
+
+		SET_TLBIVA(iommu_drvdata->base, ctx_drvdata->num,
+			   asid | (va & TLBIVA_VA));
+		mb();
+		__disable_clocks(iommu_drvdata);
+	}
+fail:
+	return ret;
 }
 
 static int __flush_iotlb(struct iommu_domain *domain)
@@ -84,34 +131,25 @@
 	struct msm_iommu_drvdata *iommu_drvdata;
 	struct msm_iommu_ctx_drvdata *ctx_drvdata;
 	int ret = 0;
-#ifndef CONFIG_IOMMU_PGTABLES_L2
-	unsigned long *fl_table = priv->pgtable;
-	int i;
-
-	if (!list_empty(&priv->list_attached)) {
-		dmac_flush_range(fl_table, fl_table + SZ_16K);
-
-		for (i = 0; i < NUM_FL_PTE; i++)
-			if ((fl_table[i] & 0x03) == FL_TYPE_TABLE) {
-				void *sl_table = __va(fl_table[i] &
-								FL_BASE_MASK);
-				dmac_flush_range(sl_table, sl_table + SZ_4K);
-			}
-	}
-#endif
+	int asid;
 
 	list_for_each_entry(ctx_drvdata, &priv->list_attached, attached_elm) {
 		if (!ctx_drvdata->pdev || !ctx_drvdata->pdev->dev.parent)
 			BUG();
 
 		iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
-		BUG_ON(!iommu_drvdata);
+		if (!iommu_drvdata)
+			BUG();
 
 		ret = __enable_clocks(iommu_drvdata);
 		if (ret)
 			goto fail;
 
-		SET_CTX_TLBIALL(iommu_drvdata->base, ctx_drvdata->num, 0);
+		asid = GET_CONTEXTIDR_ASID(iommu_drvdata->base,
+					   ctx_drvdata->num);
+
+		SET_TLBIASID(iommu_drvdata->base, ctx_drvdata->num, asid);
+		mb();
 		__disable_clocks(iommu_drvdata);
 	}
 fail:
@@ -134,17 +172,20 @@
 	SET_BFBCR(base, ctx, 0);
 	SET_PAR(base, ctx, 0);
 	SET_FAR(base, ctx, 0);
-	SET_CTX_TLBIALL(base, ctx, 0);
 	SET_TLBFLPTER(base, ctx, 0);
 	SET_TLBSLPTER(base, ctx, 0);
 	SET_TLBLKCR(base, ctx, 0);
 	SET_PRRR(base, ctx, 0);
 	SET_NMRR(base, ctx, 0);
+	mb();
 }
 
-static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable)
+static void __program_context(void __iomem *base, int ctx, int ncb,
+			      phys_addr_t pgtable, int redirect,
+			      int ttbr_split)
 {
 	unsigned int prrr, nmrr;
+	int i, j, found;
 	__reset_context(base, ctx);
 
 	/* Set up HTW mode */
@@ -154,14 +195,10 @@
 	/* V2P configuration: HTW for access */
 	SET_V2PCFG(base, ctx, 0x3);
 
-	SET_TTBCR(base, ctx, 0);
-	SET_TTBR0_PA(base, ctx, (pgtable >> 14));
-
-	/* Invalidate the TLB for this context */
-	SET_CTX_TLBIALL(base, ctx, 0);
-
-	/* Set interrupt number to "secure" interrupt */
-	SET_IRPTNDX(base, ctx, 0);
+	SET_TTBCR(base, ctx, ttbr_split);
+	SET_TTBR0_PA(base, ctx, (pgtable >> TTBR0_PA_SHIFT));
+	if (ttbr_split)
+		SET_TTBR1_PA(base, ctx, (pgtable >> TTBR1_PA_SHIFT));
 
 	/* Enable context fault interrupt */
 	SET_CFEIE(base, ctx, 1);
@@ -186,31 +223,61 @@
 	/* Turn on BFB prefetch */
 	SET_BFBDFE(base, ctx, 1);
 
-#ifdef CONFIG_IOMMU_PGTABLES_L2
 	/* Configure page tables as inner-cacheable and shareable to reduce
 	 * the TLB miss penalty.
 	 */
-	SET_TTBR0_SH(base, ctx, 1);
-	SET_TTBR1_SH(base, ctx, 1);
+	if (redirect) {
+		SET_TTBR0_SH(base, ctx, 1);
+		SET_TTBR1_SH(base, ctx, 1);
 
-	SET_TTBR0_NOS(base, ctx, 1);
-	SET_TTBR1_NOS(base, ctx, 1);
+		SET_TTBR0_NOS(base, ctx, 1);
+		SET_TTBR1_NOS(base, ctx, 1);
 
-	SET_TTBR0_IRGNH(base, ctx, 0); /* WB, WA */
-	SET_TTBR0_IRGNL(base, ctx, 1);
+		SET_TTBR0_IRGNH(base, ctx, 0); /* WB, WA */
+		SET_TTBR0_IRGNL(base, ctx, 1);
 
-	SET_TTBR1_IRGNH(base, ctx, 0); /* WB, WA */
-	SET_TTBR1_IRGNL(base, ctx, 1);
+		SET_TTBR1_IRGNH(base, ctx, 0); /* WB, WA */
+		SET_TTBR1_IRGNL(base, ctx, 1);
 
-	SET_TTBR0_ORGN(base, ctx, 1); /* WB, WA */
-	SET_TTBR1_ORGN(base, ctx, 1); /* WB, WA */
-#endif
+		SET_TTBR0_ORGN(base, ctx, 1); /* WB, WA */
+		SET_TTBR1_ORGN(base, ctx, 1); /* WB, WA */
+	}
+
+	/* Find if this page table is used elsewhere, and re-use ASID */
+	found = 0;
+	for (i = 0; i < ncb; i++)
+		if (GET_TTBR0_PA(base, i) == (pgtable >> TTBR0_PA_SHIFT) &&
+		    i != ctx) {
+			SET_CONTEXTIDR_ASID(base, ctx, \
+					    GET_CONTEXTIDR_ASID(base, i));
+			found = 1;
+			break;
+		}
+
+	/* If page table is new, find an unused ASID */
+	if (!found) {
+		for (i = 0; i < ncb; i++) {
+			found = 0;
+			for (j = 0; j < ncb; j++) {
+				if (GET_CONTEXTIDR_ASID(base, j) == i &&
+				    j != ctx)
+					found = 1;
+			}
+
+			if (!found) {
+				SET_CONTEXTIDR_ASID(base, ctx, i);
+				break;
+			}
+		}
+		BUG_ON(found);
+	}
 
 	/* Enable the MMU */
 	SET_M(base, ctx, 1);
+	mb();
 }
 
-static int msm_iommu_domain_init(struct iommu_domain *domain)
+static int msm_iommu_domain_init(struct iommu_domain *domain, int flags)
 {
 	struct msm_priv *priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 
@@ -224,8 +291,15 @@
 	if (!priv->pgtable)
 		goto fail_nomem;
 
+#ifdef CONFIG_IOMMU_PGTABLES_L2
+	priv->redirect = flags & MSM_IOMMU_DOMAIN_PT_CACHEABLE;
+#endif
+
 	memset(priv->pgtable, 0, SZ_16K);
 	domain->priv = priv;
+
+	clean_pte(priv->pgtable, priv->pgtable + NUM_FL_PTE, priv->redirect);
+
 	return 0;
 
 fail_nomem:
@@ -236,11 +310,10 @@
 static void msm_iommu_domain_destroy(struct iommu_domain *domain)
 {
 	struct msm_priv *priv;
-	unsigned long flags;
 	unsigned long *fl_table;
 	int i;
 
-	spin_lock_irqsave(&msm_iommu_lock, flags);
+	mutex_lock(&msm_iommu_lock);
 	priv = domain->priv;
 	domain->priv = NULL;
 
@@ -257,7 +330,7 @@
 	}
 
 	kfree(priv);
-	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+	mutex_unlock(&msm_iommu_lock);
 }
 
 static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
@@ -268,9 +341,8 @@
 	struct msm_iommu_ctx_drvdata *ctx_drvdata;
 	struct msm_iommu_ctx_drvdata *tmp_drvdata;
 	int ret = 0;
-	unsigned long flags;
 
-	spin_lock_irqsave(&msm_iommu_lock, flags);
+	mutex_lock(&msm_iommu_lock);
 
 	priv = domain->priv;
 
@@ -303,15 +375,16 @@
 	if (ret)
 		goto fail;
 
-	__program_context(iommu_drvdata->base, ctx_dev->num,
-			  __pa(priv->pgtable));
+	__program_context(iommu_drvdata->base, ctx_dev->num, iommu_drvdata->ncb,
+			  __pa(priv->pgtable), priv->redirect,
+			  iommu_drvdata->ttbr_split);
 
 	__disable_clocks(iommu_drvdata);
 	list_add(&(ctx_drvdata->attached_elm), &priv->list_attached);
-	ret = __flush_iotlb(domain);
 
+	ctx_drvdata->attached_domain = domain;
 fail:
-	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+	mutex_unlock(&msm_iommu_lock);
 	return ret;
 }
 
@@ -322,10 +395,9 @@
 	struct msm_iommu_ctx_dev *ctx_dev;
 	struct msm_iommu_drvdata *iommu_drvdata;
 	struct msm_iommu_ctx_drvdata *ctx_drvdata;
-	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&msm_iommu_lock, flags);
+	mutex_lock(&msm_iommu_lock);
 	priv = domain->priv;
 
 	if (!priv || !dev)
@@ -338,27 +410,67 @@
 	if (!iommu_drvdata || !ctx_drvdata || !ctx_dev)
 		goto fail;
 
-	ret = __flush_iotlb(domain);
-	if (ret)
-		goto fail;
-
 	ret = __enable_clocks(iommu_drvdata);
 	if (ret)
 		goto fail;
 
+	SET_TLBIASID(iommu_drvdata->base, ctx_dev->num,
+		     GET_CONTEXTIDR_ASID(iommu_drvdata->base, ctx_dev->num));
+
 	__reset_context(iommu_drvdata->base, ctx_dev->num);
 	__disable_clocks(iommu_drvdata);
 	list_del_init(&ctx_drvdata->attached_elm);
-
+	ctx_drvdata->attached_domain = NULL;
 fail:
-	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+	mutex_unlock(&msm_iommu_lock);
+}
+
+static int __get_pgprot(int prot, int len)
+{
+	unsigned int pgprot;
+	int tex;
+
+	if (!(prot & (IOMMU_READ | IOMMU_WRITE))) {
+		prot |= IOMMU_READ | IOMMU_WRITE;
+		WARN_ONCE(1, "No attributes in iommu mapping; assuming RW\n");
+	}
+
+	if ((prot & IOMMU_WRITE) && !(prot & IOMMU_READ)) {
+		prot |= IOMMU_READ;
+		WARN_ONCE(1, "Write-only iommu mappings unsupported; falling back to RW\n");
+	}
+
+	if (prot & IOMMU_CACHE)
+		tex = (pgprot_kernel >> 2) & 0x07;
+	else
+		tex = msm_iommu_tex_class[MSM_IOMMU_ATTR_NONCACHED];
+
+	if (tex < 0 || tex > NUM_TEX_CLASS - 1)
+		return 0;
+
+	if (len == SZ_16M || len == SZ_1M) {
+		pgprot = FL_SHARED;
+		pgprot |= tex & 0x01 ? FL_BUFFERABLE : 0;
+		pgprot |= tex & 0x02 ? FL_CACHEABLE : 0;
+		pgprot |= tex & 0x04 ? FL_TEX0 : 0;
+		pgprot |= FL_AP0 | FL_AP1;
+		pgprot |= prot & IOMMU_WRITE ? 0 : FL_AP2;
+	} else	{
+		pgprot = SL_SHARED;
+		pgprot |= tex & 0x01 ? SL_BUFFERABLE : 0;
+		pgprot |= tex & 0x02 ? SL_CACHEABLE : 0;
+		pgprot |= tex & 0x04 ? SL_TEX0 : 0;
+		pgprot |= SL_AP0 | SL_AP1;
+		pgprot |= prot & IOMMU_WRITE ? 0 : SL_AP2;
+	}
+
+	return pgprot;
 }
 
 static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
 			 phys_addr_t pa, size_t len, int prot)
 {
 	struct msm_priv *priv;
-	unsigned long flags;
 	unsigned long *fl_table;
 	unsigned long *fl_pte;
 	unsigned long fl_offset;
@@ -366,17 +478,9 @@
 	unsigned long *sl_pte;
 	unsigned long sl_offset;
 	unsigned int pgprot;
-	int ret = 0, tex, sh;
+	int ret = 0;
 
-	spin_lock_irqsave(&msm_iommu_lock, flags);
-
-	sh = (prot & MSM_IOMMU_ATTR_SH) ? 1 : 0;
-	tex = msm_iommu_tex_class[prot & MSM_IOMMU_CP_MASK];
-
-	if (tex < 0 || tex > NUM_TEX_CLASS - 1) {
-		ret = -EINVAL;
-		goto fail;
-	}
+	mutex_lock(&msm_iommu_lock);
 
 	priv = domain->priv;
 	if (!priv) {
@@ -399,16 +503,11 @@
 		goto fail;
 	}
 
-	if (len == SZ_16M || len == SZ_1M) {
-		pgprot = sh ? FL_SHARED : 0;
-		pgprot |= tex & 0x01 ? FL_BUFFERABLE : 0;
-		pgprot |= tex & 0x02 ? FL_CACHEABLE : 0;
-		pgprot |= tex & 0x04 ? FL_TEX0 : 0;
-	} else	{
-		pgprot = sh ? SL_SHARED : 0;
-		pgprot |= tex & 0x01 ? SL_BUFFERABLE : 0;
-		pgprot |= tex & 0x02 ? SL_CACHEABLE : 0;
-		pgprot |= tex & 0x04 ? SL_TEX0 : 0;
+	pgprot = __get_pgprot(prot, len);
+
+	if (!pgprot) {
+		ret = -EINVAL;
+		goto fail;
 	}
 
 	fl_offset = FL_OFFSET(va);	/* Upper 12 bits */
@@ -416,52 +515,92 @@
 
 	if (len == SZ_16M) {
 		int i = 0;
+
 		for (i = 0; i < 16; i++)
-			*(fl_pte+i) = (pa & 0xFF000000) | FL_SUPERSECTION |
-				  FL_AP_READ | FL_AP_WRITE | FL_TYPE_SECT |
-				  FL_SHARED | FL_NG | pgprot;
+			if (*(fl_pte+i)) {
+				ret = -EBUSY;
+				goto fail;
+			}
+
+		for (i = 0; i < 16; i++)
+			*(fl_pte+i) = (pa & 0xFF000000) | FL_SUPERSECTION
+				  | FL_TYPE_SECT | FL_SHARED | FL_NG | pgprot;
+		clean_pte(fl_pte, fl_pte + 16, priv->redirect);
 	}
 
-	if (len == SZ_1M)
-		*fl_pte = (pa & 0xFFF00000) | FL_AP_READ | FL_AP_WRITE | FL_NG |
-					    FL_TYPE_SECT | FL_SHARED | pgprot;
-
-	/* Need a 2nd level table */
-	if ((len == SZ_4K || len == SZ_64K) && (*fl_pte) == 0) {
-		unsigned long *sl;
-		sl = (unsigned long *) __get_free_pages(GFP_ATOMIC,
-							get_order(SZ_4K));
-
-		if (!sl) {
-			pr_debug("Could not allocate second level table\n");
-			ret = -ENOMEM;
+	if (len == SZ_1M) {
+		if (*fl_pte) {
+			ret = -EBUSY;
 			goto fail;
 		}
 
-		memset(sl, 0, SZ_4K);
-		*fl_pte = ((((int)__pa(sl)) & FL_BASE_MASK) | FL_TYPE_TABLE);
+		*fl_pte = (pa & 0xFFF00000) | FL_NG | FL_TYPE_SECT | FL_SHARED
+					    | pgprot;
+		clean_pte(fl_pte, fl_pte + 1, priv->redirect);
+	}
+
+	/* Need a 2nd level table */
+	if (len == SZ_4K || len == SZ_64K) {
+
+		if (*fl_pte == 0) {
+			unsigned long *sl;
+			sl = (unsigned long *) __get_free_pages(GFP_KERNEL,
+							get_order(SZ_4K));
+
+			if (!sl) {
+				pr_debug("Could not allocate second level table\n");
+				ret = -ENOMEM;
+				goto fail;
+			}
+			memset(sl, 0, SZ_4K);
+			clean_pte(sl, sl + NUM_SL_PTE, priv->redirect);
+
+			*fl_pte = ((((int)__pa(sl)) & FL_BASE_MASK) | \
+						      FL_TYPE_TABLE);
+
+			clean_pte(fl_pte, fl_pte + 1, priv->redirect);
+		}
+
+		if (!(*fl_pte & FL_TYPE_TABLE)) {
+			ret = -EBUSY;
+			goto fail;
+		}
 	}
 
 	sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
 	sl_offset = SL_OFFSET(va);
 	sl_pte = sl_table + sl_offset;
 
+	if (len == SZ_4K) {
+		if (*sl_pte) {
+			ret = -EBUSY;
+			goto fail;
+		}
 
-	if (len == SZ_4K)
-		*sl_pte = (pa & SL_BASE_MASK_SMALL) | SL_AP0 | SL_AP1 | SL_NG |
-					  SL_SHARED | SL_TYPE_SMALL | pgprot;
+		*sl_pte = (pa & SL_BASE_MASK_SMALL) | SL_NG | SL_SHARED
+						    | SL_TYPE_SMALL | pgprot;
+		clean_pte(sl_pte, sl_pte + 1, priv->redirect);
+	}
 
 	if (len == SZ_64K) {
 		int i;
 
 		for (i = 0; i < 16; i++)
-			*(sl_pte+i) = (pa & SL_BASE_MASK_LARGE) | SL_AP0 |
-			    SL_NG | SL_AP1 | SL_SHARED | SL_TYPE_LARGE | pgprot;
+			if (*(sl_pte+i)) {
+				ret = -EBUSY;
+				goto fail;
+			}
+
+		for (i = 0; i < 16; i++)
+			*(sl_pte+i) = (pa & SL_BASE_MASK_LARGE) | SL_NG
+					  | SL_SHARED | SL_TYPE_LARGE | pgprot;
+
+		clean_pte(sl_pte, sl_pte + 16, priv->redirect);
 	}
 
-	ret = __flush_iotlb(domain);
+	ret = __flush_iotlb_va(domain, va);
 fail:
-	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+	mutex_unlock(&msm_iommu_lock);
 	return ret;
 }
 
@@ -469,7 +608,6 @@
 			    size_t len)
 {
 	struct msm_priv *priv;
-	unsigned long flags;
 	unsigned long *fl_table;
 	unsigned long *fl_pte;
 	unsigned long fl_offset;
@@ -478,7 +616,7 @@
 	unsigned long sl_offset;
 	int i, ret = 0;
 
-	spin_lock_irqsave(&msm_iommu_lock, flags);
+	mutex_lock(&msm_iommu_lock);
 
 	priv = domain->priv;
 
@@ -507,13 +645,19 @@
 	}
 
 	/* Unmap supersection */
-	if (len == SZ_16M)
+	if (len == SZ_16M) {
 		for (i = 0; i < 16; i++)
 			*(fl_pte+i) = 0;
 
-	if (len == SZ_1M)
+		clean_pte(fl_pte, fl_pte + 16, priv->redirect);
+	}
+
+	if (len == SZ_1M) {
 		*fl_pte = 0;
 
+		clean_pte(fl_pte, fl_pte + 1, priv->redirect);
+	}
+
 	sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
 	sl_offset = SL_OFFSET(va);
 	sl_pte = sl_table + sl_offset;
@@ -521,11 +665,16 @@
 	if (len == SZ_64K) {
 		for (i = 0; i < 16; i++)
 			*(sl_pte+i) = 0;
+
+		clean_pte(sl_pte, sl_pte + 16, priv->redirect);
 	}
 
-	if (len == SZ_4K)
+	if (len == SZ_4K) {
 		*sl_pte = 0;
 
+		clean_pte(sl_pte, sl_pte + 1, priv->redirect);
+	}
+
 	if (len == SZ_4K || len == SZ_64K) {
 		int used = 0;
 
@@ -535,19 +684,211 @@
 		if (!used) {
 			free_page((unsigned long)sl_table);
 			*fl_pte = 0;
+
+			clean_pte(fl_pte, fl_pte + 1, priv->redirect);
 		}
 	}
 
-	ret = __flush_iotlb(domain);
+	ret = __flush_iotlb_va(domain, va);
 
 fail:
-	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+	mutex_unlock(&msm_iommu_lock);
 
 	/* the IOMMU API requires us to return how many bytes were unmapped */
 	len = ret ? 0 : len;
 	return len;
 }
 
+static unsigned int get_phys_addr(struct scatterlist *sg)
+{
+	/*
+	 * Try sg_dma_address first so that we can
+	 * map carveout regions that do not have a
+	 * struct page associated with them.
+	 */
+	unsigned int pa = sg_dma_address(sg);
+	if (pa == 0)
+		pa = sg_phys(sg);
+	return pa;
+}
+
+static int msm_iommu_map_range(struct iommu_domain *domain, unsigned int va,
+			       struct scatterlist *sg, unsigned int len,
+			       int prot)
+{
+	unsigned int pa;
+	unsigned int offset = 0;
+	unsigned int pgprot;
+	unsigned long *fl_table;
+	unsigned long *fl_pte;
+	unsigned long fl_offset;
+	unsigned long *sl_table;
+	unsigned long sl_offset, sl_start;
+	unsigned int chunk_offset = 0;
+	unsigned int chunk_pa;
+	int ret = 0;
+	struct msm_priv *priv;
+
+	mutex_lock(&msm_iommu_lock);
+
+	BUG_ON(len & (SZ_4K - 1));
+
+	priv = domain->priv;
+	fl_table = priv->pgtable;
+
+	pgprot = __get_pgprot(prot, SZ_4K);
+
+	if (!pgprot) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	fl_offset = FL_OFFSET(va);	/* Upper 12 bits */
+	fl_pte = fl_table + fl_offset;	/* int pointers, 4 bytes */
+
+	sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
+	sl_offset = SL_OFFSET(va);
+
+	chunk_pa = get_phys_addr(sg);
+	if (chunk_pa == 0) {
+		pr_debug("No dma address for sg %p\n", sg);
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	while (offset < len) {
+		/* Set up a 2nd level page table if one doesn't exist */
+		if (*fl_pte == 0) {
+			sl_table = (unsigned long *)
+				 __get_free_pages(GFP_KERNEL, get_order(SZ_4K));
+
+			if (!sl_table) {
+				pr_debug("Could not allocate second level table\n");
+				ret = -ENOMEM;
+				goto fail;
+			}
+
+			memset(sl_table, 0, SZ_4K);
+			clean_pte(sl_table, sl_table + NUM_SL_PTE,
+				  priv->redirect);
+
+			*fl_pte = ((((int)__pa(sl_table)) & FL_BASE_MASK) |
+							    FL_TYPE_TABLE);
+			clean_pte(fl_pte, fl_pte + 1, priv->redirect);
+		} else
+			sl_table = (unsigned long *)
+					       __va(((*fl_pte) & FL_BASE_MASK));
+
+		/* Keep track of initial position so we
+		 * don't clean more than we have to
+		 */
+		sl_start = sl_offset;
+
+		/* Build the 2nd level page table */
+		while (offset < len && sl_offset < NUM_SL_PTE) {
+			pa = chunk_pa + chunk_offset;
+			sl_table[sl_offset] = (pa & SL_BASE_MASK_SMALL) |
+				     pgprot | SL_NG | SL_SHARED | SL_TYPE_SMALL;
+			sl_offset++;
+			offset += SZ_4K;
+
+			chunk_offset += SZ_4K;
+
+			if (chunk_offset >= sg->length && offset < len) {
+				chunk_offset = 0;
+				sg = sg_next(sg);
+				chunk_pa = get_phys_addr(sg);
+				if (chunk_pa == 0) {
+					pr_debug("No dma address for sg %p\n",
+						 sg);
+					ret = -EINVAL;
+					goto fail;
+				}
+			}
+		}
+
+		clean_pte(sl_table + sl_start, sl_table + sl_offset,
+			  priv->redirect);
+
+		fl_pte++;
+		sl_offset = 0;
+	}
+	__flush_iotlb(domain);
+fail:
+	mutex_unlock(&msm_iommu_lock);
+	return ret;
+}
+
+
+static int msm_iommu_unmap_range(struct iommu_domain *domain, unsigned int va,
+				 unsigned int len)
+{
+	unsigned int offset = 0;
+	unsigned long *fl_table;
+	unsigned long *fl_pte;
+	unsigned long fl_offset;
+	unsigned long *sl_table;
+	unsigned long sl_start, sl_end;
+	int used, i;
+	struct msm_priv *priv;
+
+	mutex_lock(&msm_iommu_lock);
+
+	BUG_ON(len & (SZ_4K - 1));
+
+	priv = domain->priv;
+	fl_table = priv->pgtable;
+
+	fl_offset = FL_OFFSET(va);	/* Upper 12 bits */
+	fl_pte = fl_table + fl_offset;	/* int pointers, 4 bytes */
+
+	sl_start = SL_OFFSET(va);
+
+	while (offset < len) {
+		sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
+		sl_end = ((len - offset) / SZ_4K) + sl_start;
+
+		if (sl_end > NUM_SL_PTE)
+			sl_end = NUM_SL_PTE;
+
+		memset(sl_table + sl_start, 0, (sl_end - sl_start) * 4);
+		clean_pte(sl_table + sl_start, sl_table + sl_end,
+			  priv->redirect);
+
+		offset += (sl_end - sl_start) * SZ_4K;
+
+		/* Unmap and free the 2nd level table if all mappings in it
+		 * were removed. This saves memory, but the table will need
+		 * to be re-allocated the next time someone tries to map these
+		 * VAs.
+		 */
+		used = 0;
+
+		/* If we just unmapped the whole table, don't bother
+		 * seeing if there are still used entries left.
+		 */
+		if (sl_end - sl_start != NUM_SL_PTE)
+			for (i = 0; i < NUM_SL_PTE; i++)
+				if (sl_table[i]) {
+					used = 1;
+					break;
+				}
+		if (!used) {
+			free_page((unsigned long)sl_table);
+			*fl_pte = 0;
+
+			clean_pte(fl_pte, fl_pte + 1, priv->redirect);
+		}
+
+		sl_start = 0;
+		fl_pte++;
+	}
+
+	__flush_iotlb(domain);
+	mutex_unlock(&msm_iommu_lock);
+	return 0;
+}
+
 static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
 					  unsigned long va)
 {
@@ -555,12 +896,11 @@
 	struct msm_iommu_drvdata *iommu_drvdata;
 	struct msm_iommu_ctx_drvdata *ctx_drvdata;
 	unsigned int par;
-	unsigned long flags;
 	void __iomem *base;
 	phys_addr_t ret = 0;
 	int ctx;
 
-	spin_lock_irqsave(&msm_iommu_lock, flags);
+	mutex_lock(&msm_iommu_lock);
 
 	priv = domain->priv;
 	if (list_empty(&priv->list_attached))
@@ -577,10 +917,9 @@
 	if (ret)
 		goto fail;
 
-	/* Invalidate context TLB */
-	SET_CTX_TLBIALL(base, ctx, 0);
 	SET_V2PPR(base, ctx, va & V2Pxx_VA);
 
+	mb();
 	par = GET_PAR(base, ctx);
 
 	/* We are dealing with a supersection */
@@ -594,7 +933,7 @@
 
 	__disable_clocks(iommu_drvdata);
 fail:
-	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+	mutex_unlock(&msm_iommu_lock);
 	return ret;
 }
 
@@ -633,40 +972,61 @@
 
 irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id)
 {
-	struct msm_iommu_drvdata *drvdata = dev_id;
+	struct msm_iommu_ctx_drvdata *ctx_drvdata = dev_id;
+	struct msm_iommu_drvdata *drvdata;
 	void __iomem *base;
-	unsigned int fsr;
-	int i, ret;
+	unsigned int fsr, num;
+	int ret;
 
-	spin_lock(&msm_iommu_lock);
+	mutex_lock(&msm_iommu_lock);
+	BUG_ON(!ctx_drvdata);
 
-	if (!drvdata) {
-		pr_err("Invalid device ID in context interrupt handler\n");
-		goto fail;
-	}
+	drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
+	BUG_ON(!drvdata);
 
 	base = drvdata->base;
-
-	pr_err("Unexpected IOMMU page fault!\n");
-	pr_err("base = %08x\n", (unsigned int) base);
+	num = ctx_drvdata->num;
 
 	ret = __enable_clocks(drvdata);
 	if (ret)
 		goto fail;
 
-	for (i = 0; i < drvdata->ncb; i++) {
-		fsr = GET_FSR(base, i);
-		if (fsr) {
-			pr_err("Fault occurred in context %d.\n", i);
+	fsr = GET_FSR(base, num);
+
+	if (fsr) {
+		if (!ctx_drvdata->attached_domain) {
+			pr_err("Bad domain in interrupt handler\n");
+			ret = -ENOSYS;
+		} else
+			ret = report_iommu_fault(ctx_drvdata->attached_domain,
+						&ctx_drvdata->pdev->dev,
+						GET_FAR(base, num), 0);
+
+		if (ret == -ENOSYS) {
+			pr_err("Unexpected IOMMU page fault!\n");
+			pr_err("name    = %s\n", drvdata->name);
+			pr_err("context = %s (%d)\n", ctx_drvdata->name, num);
 			pr_err("Interesting registers:\n");
-			print_ctx_regs(base, i);
-			SET_FSR(base, i, 0x4000000F);
+			print_ctx_regs(base, num);
 		}
-	}
+
+		SET_FSR(base, num, fsr);
+		SET_RESUME(base, num, 1);
+
+		ret = IRQ_HANDLED;
+	} else
+		ret = IRQ_NONE;
+
 	__disable_clocks(drvdata);
 fail:
-	spin_unlock(&msm_iommu_lock);
-	return 0;
+	mutex_unlock(&msm_iommu_lock);
+	return ret;
+}
+
+static phys_addr_t msm_iommu_get_pt_base_addr(struct iommu_domain *domain)
+{
+	struct msm_priv *priv = domain->priv;
+	return __pa(priv->pgtable);
 }
 
 static struct iommu_ops msm_iommu_ops = {
@@ -676,8 +1036,11 @@
 	.detach_dev = msm_iommu_detach_dev,
 	.map = msm_iommu_map,
 	.unmap = msm_iommu_unmap,
+	.map_range = msm_iommu_map_range,
+	.unmap_range = msm_iommu_unmap_range,
 	.iova_to_phys = msm_iommu_iova_to_phys,
 	.domain_has_cap = msm_iommu_domain_has_cap,
+	.get_pt_base_addr = msm_iommu_get_pt_base_addr,
 	.pgsize_bitmap = MSM_IOMMU_PGSIZES,
 };
 
@@ -721,6 +1084,9 @@
 
 static int __init msm_iommu_init(void)
 {
+	if (!msm_soc_version_supports_iommu_v1())
+		return -ENODEV;
+
 	setup_iommu_tex_classes();
 	bus_set_iommu(&platform_bus_type, &msm_iommu_ops);
 	return 0;