Blackfin arch: Faster C implementation of  no-MPU CPLB handler

This is a mixture ofcMichael McTernan's patch and the existing cplb-mpu code.

We ditch the old cplb-nompu implementation, which is a good example of
why a good algorithm in a HLL is preferrable to a bad algorithm written in
assembly.  Rather than try to construct a table of all posible CPLBs and
search it, we just create a (smaller) table of memory regions and
their attributes.  Some of the data structures are now unified for both
the mpu and nompu cases.  A lot of needless complexity in cplbinit.c is
removed.

Further optimizations:
  * compile cplbmgr.c with a lot of -ffixed-reg options, and omit saving
    these registers on the stack when entering a CPLB exception.
  * lose cli/nop/nop/sti sequences for some workarounds - these don't
  * make
    sense in an exception context

Additional code unification should be possible after this.

[Mike Frysinger <vapier.adi@gmail.com>:
 - convert CPP if statements to C if statements
 - remove redundant statements
 - use a do...while loop rather than a for loop to get slightly better
   optimization and to avoid gcc "may be used uninitialized" warnings ...
   we know that the [id]cplb_nr_bounds variables will never be 0, so this
   is OK
 - the no-mpu code was the last user of MAX_MEM_SIZE and with that rewritten,
   we can punt it
 - add some BUG_ON() checks to make sure we dont overflow the small
   cplb_bounds array
 - add i/d cplb entries for the bootrom because there is functions/data in
   there we want to access
 - we do not need a NULL trailing entry as any time we access the bounds
   arrays, we use the nr_bounds variable
]

Signed-off-by: Michael McTernan <mmcternan@airvana.com>
Signed-off-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Bernd Schmidt <bernds_cb1@t-online.de>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/include/asm/cplbinit.h b/arch/blackfin/include/asm/cplbinit.h
index 2aeec87..05b14a6 100644
--- a/arch/blackfin/include/asm/cplbinit.h
+++ b/arch/blackfin/include/asm/cplbinit.h
@@ -32,96 +32,56 @@
 
 #include <asm/blackfin.h>
 #include <asm/cplb.h>
+#include <linux/threads.h>
+
+#ifdef CONFIG_CPLB_SWITCH_TAB_L1
+# define PDT_ATTR __attribute__((l1_data))
+#else
+# define PDT_ATTR
+#endif
+
+struct cplb_entry {
+	unsigned long data, addr;
+};
+
+struct cplb_boundary {
+	unsigned long eaddr; /* End of this region.  */
+	unsigned long data; /* CPLB data value.  */
+};
+
+extern struct cplb_boundary dcplb_bounds[];
+extern struct cplb_boundary icplb_bounds[];
+extern int dcplb_nr_bounds, icplb_nr_bounds;
+
+extern struct cplb_entry dcplb_tbl[NR_CPUS][MAX_CPLBS];
+extern struct cplb_entry icplb_tbl[NR_CPUS][MAX_CPLBS];
+extern int first_switched_icplb;
+extern int first_switched_dcplb;
+
+extern int nr_dcplb_miss[], nr_icplb_miss[], nr_icplb_supv_miss[];
+extern int nr_dcplb_prot[], nr_cplb_flush[];
 
 #ifdef CONFIG_MPU
 
-#include <asm/cplb-mpu.h>
-extern void bfin_icache_init(struct cplb_entry *icplb_tbl);
-extern void bfin_dcache_init(struct cplb_entry *icplb_tbl);
+extern int first_mask_dcplb;
 
-#else
+extern int page_mask_order;
+extern int page_mask_nelts;
 
-#define INITIAL_T 0x1
-#define SWITCH_T  0x2
-#define I_CPLB    0x4
-#define D_CPLB    0x8
+extern unsigned long *current_rwx_mask[NR_CPUS];
 
-#define ASYNC_MEMORY_CPLB_COVERAGE  ((ASYNC_BANK0_SIZE + ASYNC_BANK1_SIZE + \
-				ASYNC_BANK2_SIZE + ASYNC_BANK3_SIZE) / SIZE_4M)
+extern void flush_switched_cplbs(unsigned int);
+extern void set_mask_dcplbs(unsigned long *, unsigned int);
 
-#define CPLB_MEM CONFIG_MAX_MEM_SIZE
-
-/*
-* Number of required data CPLB switchtable entries
-* MEMSIZE / 4 (we mostly install 4M page size CPLBs
-* approx 16 for smaller 1MB page size CPLBs for allignment purposes
-* 1 for L1 Data Memory
-* possibly 1 for L2 Data Memory
-* 1 for CONFIG_DEBUG_HUNT_FOR_ZERO
-* 1 for ASYNC Memory
-*/
-#define MAX_SWITCH_D_CPLBS (((CPLB_MEM / 4) + 16 + 1 + 1 + 1 \
-				 + ASYNC_MEMORY_CPLB_COVERAGE) * 2)
-
-/*
-* Number of required instruction CPLB switchtable entries
-* MEMSIZE / 4 (we mostly install 4M page size CPLBs
-* approx 12 for smaller 1MB page size CPLBs for allignment purposes
-* 1 for L1 Instruction Memory
-* possibly 1 for L2 Instruction Memory
-* 1 for CONFIG_DEBUG_HUNT_FOR_ZERO
-*/
-#define MAX_SWITCH_I_CPLBS (((CPLB_MEM / 4) + 12 + 1 + 1 + 1) * 2)
-
-/* Number of CPLB table entries, used for cplb-nompu. */
-#define CPLB_TBL_ENTRIES (16 * 4)
-
-enum {
-	ZERO_P, L1I_MEM, L1D_MEM, L2_MEM, SDRAM_KERN, SDRAM_RAM_MTD, SDRAM_DMAZ,
-	RES_MEM, ASYNC_MEM, OCB_ROM
-};
-
-struct cplb_desc {
-	u32 start; /* start address */
-	u32 end; /* end address */
-	u32 psize; /* prefered size if any otherwise 1MB or 4MB*/
-	u16 attr;/* attributes */
-	u16 i_conf;/* I-CPLB DATA */
-	u16 d_conf;/* D-CPLB DATA */
-	u16 valid;/* valid */
-	const s8 name[30];/* name */
-};
-
-struct cplb_tab {
-  u_long *tab;
-	u16 pos;
-	u16 size;
-};
-
-extern u_long icplb_tables[NR_CPUS][CPLB_TBL_ENTRIES+1];
-extern u_long dcplb_tables[NR_CPUS][CPLB_TBL_ENTRIES+1];
-
-/* Till here we are discussing about the static memory management model.
- * However, the operating envoronments commonly define more CPLB
- * descriptors to cover the entire addressable memory than will fit into
- * the available on-chip 16 CPLB MMRs. When this happens, the below table
- * will be used which will hold all the potentially required CPLB descriptors
- *
- * This is how Page descriptor Table is implemented in uClinux/Blackfin.
- */
-
-extern u_long ipdt_tables[NR_CPUS][MAX_SWITCH_I_CPLBS+1];
-extern u_long dpdt_tables[NR_CPUS][MAX_SWITCH_D_CPLBS+1];
-#ifdef CONFIG_CPLB_INFO
-extern u_long ipdt_swapcount_tables[NR_CPUS][MAX_SWITCH_I_CPLBS];
-extern u_long dpdt_swapcount_tables[NR_CPUS][MAX_SWITCH_D_CPLBS];
-#endif
-extern void bfin_icache_init(u_long icplbs[]);
-extern void bfin_dcache_init(u_long dcplbs[]);
+extern void __noreturn panic_cplb_error(int seqstat, struct pt_regs *);
 
 #endif /* CONFIG_MPU */
 
+extern void bfin_icache_init(struct cplb_entry *icplb_tbl);
+extern void bfin_dcache_init(struct cplb_entry *icplb_tbl);
+
 #if defined(CONFIG_BFIN_DCACHE) || defined(CONFIG_BFIN_ICACHE)
+extern void generate_cplb_tables_all(void);
 extern void generate_cplb_tables_cpu(unsigned int cpu);
 #endif
 #endif