msm: iommu: Selectively clean PTEs in the cache

When modifying the page tables, only clean the modified
entries in the cache, rather than cleaning the entire
first-level page table and all second-level tables.

Signed-off-by: Stepan Moskovchenko <stepanm@codeaurora.org>
diff --git a/arch/arm/mach-msm/iommu.c b/arch/arm/mach-msm/iommu.c
index 4de6bfd..e6420f0 100644
--- a/arch/arm/mach-msm/iommu.c
+++ b/arch/arm/mach-msm/iommu.c
@@ -37,6 +37,15 @@
 #define RCP15_PRRR(reg)		MRC(reg, p15, 0, c10, c2, 0)
 #define RCP15_NMRR(reg)		MRC(reg, p15, 0, c10, c2, 1)
 
+#ifndef CONFIG_IOMMU_PGTABLES_L2
+static inline void clean_pte(unsigned long *start, unsigned long *end)
+{
+	dmac_flush_range(start, end);
+}
+#else
+static inline void clean_pte(unsigned long *start, unsigned long *end) { }
+#endif
+
 static int msm_iommu_tex_class[4];
 
 DEFINE_SPINLOCK(msm_iommu_lock);
@@ -76,23 +85,7 @@
 	struct msm_iommu_drvdata *iommu_drvdata;
 	struct msm_iommu_ctx_drvdata *ctx_drvdata;
 	int ret = 0;
-#ifndef CONFIG_IOMMU_PGTABLES_L2
-	unsigned long *fl_table = priv->pgtable;
-	int i;
 
-	if (!list_empty(&priv->list_attached)) {
-		dmac_flush_range(fl_table, fl_table + SZ_16K);
-
-		for (i = 0; i < NUM_FL_PTE; i++)
-			if ((fl_table[i] & 0x03) == FL_TYPE_TABLE) {
-				void *sl_table = __va(fl_table[i] &
-								FL_BASE_MASK);
-				dmac_flush_range(sl_table, sl_table + SZ_4K);
-			}
-	}
-#endif
-
-	mb();
 	list_for_each_entry(ctx_drvdata, &priv->list_attached, attached_elm) {
 		if (!ctx_drvdata->pdev || !ctx_drvdata->pdev->dev.parent)
 			BUG();
@@ -105,6 +98,7 @@
 			goto fail;
 
 		SET_CTX_TLBIALL(iommu_drvdata->base, ctx_drvdata->num, 0);
+		mb();
 		__disable_clocks(iommu_drvdata);
 	}
 fail:
@@ -454,6 +448,8 @@
 			*(fl_pte+i) = (pa & 0xFF000000) | FL_SUPERSECTION |
 				  FL_AP_READ | FL_AP_WRITE | FL_TYPE_SECT |
 				  FL_SHARED | FL_NG | pgprot;
+
+		clean_pte(fl_pte, fl_pte + 16);
 	}
 
 	if (len == SZ_1M) {
@@ -464,6 +460,8 @@
 
 		*fl_pte = (pa & 0xFFF00000) | FL_AP_READ | FL_AP_WRITE | FL_NG |
 					    FL_TYPE_SECT | FL_SHARED | pgprot;
+
+		clean_pte(fl_pte, fl_pte + 1);
 	}
 
 	/* Need a 2nd level table */
@@ -483,6 +481,8 @@
 
 			*fl_pte = ((((int)__pa(sl)) & FL_BASE_MASK) | \
 						      FL_TYPE_TABLE);
+
+			clean_pte(fl_pte, fl_pte + 1);
 		}
 
 		if (!(*fl_pte & FL_TYPE_TABLE)) {
@@ -503,6 +503,7 @@
 
 		*sl_pte = (pa & SL_BASE_MASK_SMALL) | SL_AP0 | SL_AP1 | SL_NG |
 					  SL_SHARED | SL_TYPE_SMALL | pgprot;
+		clean_pte(sl_pte, sl_pte + 1);
 	}
 
 	if (len == SZ_64K) {
@@ -517,6 +518,8 @@
 		for (i = 0; i < 16; i++)
 			*(sl_pte+i) = (pa & SL_BASE_MASK_LARGE) | SL_AP0 |
 			    SL_NG | SL_AP1 | SL_SHARED | SL_TYPE_LARGE | pgprot;
+
+		clean_pte(sl_pte, sl_pte + 16);
 	}
 
 	ret = __flush_iotlb(domain);
@@ -573,13 +576,19 @@
 	}
 
 	/* Unmap supersection */
-	if (len == SZ_16M)
+	if (len == SZ_16M) {
 		for (i = 0; i < 16; i++)
 			*(fl_pte+i) = 0;
 
-	if (len == SZ_1M)
+		clean_pte(fl_pte, fl_pte + 16);
+	}
+
+	if (len == SZ_1M) {
 		*fl_pte = 0;
 
+		clean_pte(fl_pte, fl_pte + 1);
+	}
+
 	sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
 	sl_offset = SL_OFFSET(va);
 	sl_pte = sl_table + sl_offset;
@@ -587,11 +596,16 @@
 	if (len == SZ_64K) {
 		for (i = 0; i < 16; i++)
 			*(sl_pte+i) = 0;
+
+		clean_pte(sl_pte, sl_pte + 16);
 	}
 
-	if (len == SZ_4K)
+	if (len == SZ_4K) {
 		*sl_pte = 0;
 
+		clean_pte(sl_pte, sl_pte + 1);
+	}
+
 	if (len == SZ_4K || len == SZ_64K) {
 		int used = 0;
 
@@ -601,6 +615,8 @@
 		if (!used) {
 			free_page((unsigned long)sl_table);
 			*fl_pte = 0;
+
+			clean_pte(fl_pte, fl_pte + 1);
 		}
 	}