| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) | 
|  | 3 | * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org) | 
|  | 4 | * Copyright (C) 2000 SiByte, Inc. | 
|  | 5 | * Copyright (C) 2005 Thiemo Seufer | 
|  | 6 | * | 
|  | 7 | * Written by Justin Carlson of SiByte, Inc. | 
|  | 8 | *         and Kip Walker of Broadcom Corp. | 
|  | 9 | * | 
|  | 10 | * | 
|  | 11 | * This program is free software; you can redistribute it and/or | 
|  | 12 | * modify it under the terms of the GNU General Public License | 
|  | 13 | * as published by the Free Software Foundation; either version 2 | 
|  | 14 | * of the License, or (at your option) any later version. | 
|  | 15 | * | 
|  | 16 | * This program is distributed in the hope that it will be useful, | 
|  | 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | 19 | * GNU General Public License for more details. | 
|  | 20 | * | 
|  | 21 | * You should have received a copy of the GNU General Public License | 
|  | 22 | * along with this program; if not, write to the Free Software | 
|  | 23 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. | 
|  | 24 | */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 25 | #include <linux/module.h> | 
|  | 26 | #include <linux/sched.h> | 
|  | 27 | #include <linux/smp.h> | 
|  | 28 |  | 
|  | 29 | #include <asm/io.h> | 
|  | 30 | #include <asm/sibyte/sb1250.h> | 
|  | 31 | #include <asm/sibyte/sb1250_regs.h> | 
|  | 32 | #include <asm/sibyte/sb1250_dma.h> | 
|  | 33 |  | 
|  | 34 | #ifdef CONFIG_SB1_PASS_1_WORKAROUNDS | 
|  | 35 | #define SB1_PREF_LOAD_STREAMED_HINT "0" | 
|  | 36 | #define SB1_PREF_STORE_STREAMED_HINT "1" | 
|  | 37 | #else | 
|  | 38 | #define SB1_PREF_LOAD_STREAMED_HINT "4" | 
|  | 39 | #define SB1_PREF_STORE_STREAMED_HINT "5" | 
|  | 40 | #endif | 
|  | 41 |  | 
|  | 42 | static inline void clear_page_cpu(void *page) | 
|  | 43 | { | 
|  | 44 | unsigned char *addr = (unsigned char *) page; | 
|  | 45 | unsigned char *end = addr + PAGE_SIZE; | 
|  | 46 |  | 
|  | 47 | /* | 
|  | 48 | * JDCXXX - This should be bottlenecked by the write buffer, but these | 
|  | 49 | * things tend to be mildly unpredictable...should check this on the | 
|  | 50 | * performance model | 
|  | 51 | * | 
|  | 52 | * We prefetch 4 lines ahead.  We're also "cheating" slightly here... | 
|  | 53 | * since we know we're on an SB1, we force the assembler to take | 
|  | 54 | * 64-bit operands to speed things up | 
|  | 55 | */ | 
|  | 56 | __asm__ __volatile__( | 
|  | 57 | "	.set	push		\n" | 
|  | 58 | "	.set	mips4		\n" | 
|  | 59 | "	.set	noreorder	\n" | 
|  | 60 | #ifdef CONFIG_CPU_HAS_PREFETCH | 
|  | 61 | "	daddiu	%0, %0, 128	\n" | 
| Maciej W. Rozycki | 202d038 | 2005-04-01 17:53:33 +0000 | [diff] [blame] | 62 | "	pref	" SB1_PREF_STORE_STREAMED_HINT ", -128(%0)  \n" | 
|  | 63 | /* Prefetch the first 4 lines */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 64 | "	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -96(%0)  \n" | 
|  | 65 | "	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -64(%0)  \n" | 
|  | 66 | "	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -32(%0)  \n" | 
|  | 67 | "1:	sd	$0, -128(%0)	\n"  /* Throw out a cacheline of 0's */ | 
|  | 68 | "	sd	$0, -120(%0)	\n" | 
|  | 69 | "	sd	$0, -112(%0)	\n" | 
|  | 70 | "	sd	$0, -104(%0)	\n" | 
|  | 71 | "	daddiu	%0, %0, 32	\n" | 
|  | 72 | "	bnel	%0, %1, 1b	\n" | 
|  | 73 | "	 pref	" SB1_PREF_STORE_STREAMED_HINT ",  -32(%0)  \n" | 
|  | 74 | "	daddiu	%0, %0, -128	\n" | 
|  | 75 | #endif | 
|  | 76 | "	sd	$0, 0(%0)	\n"  /* Throw out a cacheline of 0's */ | 
|  | 77 | "1:	sd	$0, 8(%0)	\n" | 
|  | 78 | "	sd	$0, 16(%0)	\n" | 
|  | 79 | "	sd	$0, 24(%0)	\n" | 
|  | 80 | "	daddiu	%0, %0, 32	\n" | 
|  | 81 | "	bnel	%0, %1, 1b	\n" | 
|  | 82 | "	 sd	$0, 0(%0)	\n" | 
|  | 83 | "	.set	pop		\n" | 
|  | 84 | : "+r" (addr) | 
|  | 85 | : "r" (end) | 
|  | 86 | : "memory"); | 
|  | 87 | } | 
|  | 88 |  | 
|  | 89 | static inline void copy_page_cpu(void *to, void *from) | 
|  | 90 | { | 
|  | 91 | unsigned char *src = (unsigned char *)from; | 
|  | 92 | unsigned char *dst = (unsigned char *)to; | 
|  | 93 | unsigned char *end = src + PAGE_SIZE; | 
|  | 94 |  | 
|  | 95 | /* | 
|  | 96 | * The pref's used here are using "streaming" hints, which cause the | 
|  | 97 | * copied data to be kicked out of the cache sooner.  A page copy often | 
|  | 98 | * ends up copying a lot more data than is commonly used, so this seems | 
|  | 99 | * to make sense in terms of reducing cache pollution, but I've no real | 
|  | 100 | * performance data to back this up | 
|  | 101 | */ | 
|  | 102 | __asm__ __volatile__( | 
|  | 103 | "	.set	push		\n" | 
|  | 104 | "	.set	mips4		\n" | 
|  | 105 | "	.set	noreorder	\n" | 
|  | 106 | #ifdef CONFIG_CPU_HAS_PREFETCH | 
|  | 107 | "	daddiu	%0, %0, 128	\n" | 
|  | 108 | "	daddiu	%1, %1, 128	\n" | 
| Maciej W. Rozycki | 202d038 | 2005-04-01 17:53:33 +0000 | [diff] [blame] | 109 | "	pref	" SB1_PREF_LOAD_STREAMED_HINT  ", -128(%0)\n" | 
|  | 110 | /* Prefetch the first 4 lines */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 111 | "	pref	" SB1_PREF_STORE_STREAMED_HINT ", -128(%1)\n" | 
|  | 112 | "	pref	" SB1_PREF_LOAD_STREAMED_HINT  ",  -96(%0)\n" | 
|  | 113 | "	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -96(%1)\n" | 
|  | 114 | "	pref	" SB1_PREF_LOAD_STREAMED_HINT  ",  -64(%0)\n" | 
|  | 115 | "	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -64(%1)\n" | 
|  | 116 | "	pref	" SB1_PREF_LOAD_STREAMED_HINT  ",  -32(%0)\n" | 
|  | 117 | "1:	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -32(%1)\n" | 
| Ralf Baechle | 875d43e | 2005-09-03 15:56:16 -0700 | [diff] [blame] | 118 | # ifdef CONFIG_64BIT | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 119 | "	ld	$8, -128(%0)	\n"  /* Block copy a cacheline */ | 
|  | 120 | "	ld	$9, -120(%0)	\n" | 
|  | 121 | "	ld	$10, -112(%0)	\n" | 
|  | 122 | "	ld	$11, -104(%0)	\n" | 
|  | 123 | "	sd	$8, -128(%1)	\n" | 
|  | 124 | "	sd	$9, -120(%1)	\n" | 
|  | 125 | "	sd	$10, -112(%1)	\n" | 
|  | 126 | "	sd	$11, -104(%1)	\n" | 
|  | 127 | # else | 
|  | 128 | "	lw	$2, -128(%0)	\n"  /* Block copy a cacheline */ | 
|  | 129 | "	lw	$3, -124(%0)	\n" | 
|  | 130 | "	lw	$6, -120(%0)	\n" | 
|  | 131 | "	lw	$7, -116(%0)	\n" | 
|  | 132 | "	lw	$8, -112(%0)	\n" | 
|  | 133 | "	lw	$9, -108(%0)	\n" | 
|  | 134 | "	lw	$10, -104(%0)	\n" | 
|  | 135 | "	lw	$11, -100(%0)	\n" | 
|  | 136 | "	sw	$2, -128(%1)	\n" | 
|  | 137 | "	sw	$3, -124(%1)	\n" | 
|  | 138 | "	sw	$6, -120(%1)	\n" | 
|  | 139 | "	sw	$7, -116(%1)	\n" | 
|  | 140 | "	sw	$8, -112(%1)	\n" | 
|  | 141 | "	sw	$9, -108(%1)	\n" | 
|  | 142 | "	sw	$10, -104(%1)	\n" | 
|  | 143 | "	sw	$11, -100(%1)	\n" | 
|  | 144 | # endif | 
|  | 145 | "	daddiu	%0, %0, 32	\n" | 
|  | 146 | "	daddiu	%1, %1, 32	\n" | 
|  | 147 | "	bnel	%0, %2, 1b	\n" | 
|  | 148 | "	 pref	" SB1_PREF_LOAD_STREAMED_HINT  ",  -32(%0)\n" | 
|  | 149 | "	daddiu	%0, %0, -128	\n" | 
|  | 150 | "	daddiu	%1, %1, -128	\n" | 
|  | 151 | #endif | 
| Ralf Baechle | 875d43e | 2005-09-03 15:56:16 -0700 | [diff] [blame] | 152 | #ifdef CONFIG_64BIT | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 153 | "	ld	$8, 0(%0)	\n"  /* Block copy a cacheline */ | 
|  | 154 | "1:	ld	$9, 8(%0)	\n" | 
|  | 155 | "	ld	$10, 16(%0)	\n" | 
|  | 156 | "	ld	$11, 24(%0)	\n" | 
|  | 157 | "	sd	$8, 0(%1)	\n" | 
|  | 158 | "	sd	$9, 8(%1)	\n" | 
|  | 159 | "	sd	$10, 16(%1)	\n" | 
|  | 160 | "	sd	$11, 24(%1)	\n" | 
|  | 161 | #else | 
|  | 162 | "	lw	$2, 0(%0)	\n"  /* Block copy a cacheline */ | 
|  | 163 | "1:	lw	$3, 4(%0)	\n" | 
|  | 164 | "	lw	$6, 8(%0)	\n" | 
|  | 165 | "	lw	$7, 12(%0)	\n" | 
|  | 166 | "	lw	$8, 16(%0)	\n" | 
|  | 167 | "	lw	$9, 20(%0)	\n" | 
|  | 168 | "	lw	$10, 24(%0)	\n" | 
|  | 169 | "	lw	$11, 28(%0)	\n" | 
|  | 170 | "	sw	$2, 0(%1)	\n" | 
|  | 171 | "	sw	$3, 4(%1)	\n" | 
|  | 172 | "	sw	$6, 8(%1)	\n" | 
|  | 173 | "	sw	$7, 12(%1)	\n" | 
|  | 174 | "	sw	$8, 16(%1)	\n" | 
|  | 175 | "	sw	$9, 20(%1)	\n" | 
|  | 176 | "	sw	$10, 24(%1)	\n" | 
|  | 177 | "	sw	$11, 28(%1)	\n" | 
|  | 178 | #endif | 
|  | 179 | "	daddiu	%0, %0, 32	\n" | 
|  | 180 | "	daddiu	%1, %1, 32	\n" | 
|  | 181 | "	bnel	%0, %2, 1b	\n" | 
| Ralf Baechle | 875d43e | 2005-09-03 15:56:16 -0700 | [diff] [blame] | 182 | #ifdef CONFIG_64BIT | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 183 | "	 ld	$8, 0(%0)	\n" | 
|  | 184 | #else | 
|  | 185 | "	 lw	$2, 0(%0)	\n" | 
|  | 186 | #endif | 
|  | 187 | "	.set	pop		\n" | 
|  | 188 | : "+r" (src), "+r" (dst) | 
|  | 189 | : "r" (end) | 
| Ralf Baechle | 875d43e | 2005-09-03 15:56:16 -0700 | [diff] [blame] | 190 | #ifdef CONFIG_64BIT | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 191 | : "$8","$9","$10","$11","memory"); | 
|  | 192 | #else | 
|  | 193 | : "$2","$3","$6","$7","$8","$9","$10","$11","memory"); | 
|  | 194 | #endif | 
|  | 195 | } | 
|  | 196 |  | 
|  | 197 |  | 
|  | 198 | #ifdef CONFIG_SIBYTE_DMA_PAGEOPS | 
|  | 199 |  | 
|  | 200 | /* | 
|  | 201 | * Pad descriptors to cacheline, since each is exclusively owned by a | 
| Ralf Baechle | 42a3b4f | 2005-09-03 15:56:17 -0700 | [diff] [blame] | 202 | * particular CPU. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 203 | */ | 
|  | 204 | typedef struct dmadscr_s { | 
|  | 205 | u64 dscr_a; | 
|  | 206 | u64 dscr_b; | 
|  | 207 | u64 pad_a; | 
|  | 208 | u64 pad_b; | 
|  | 209 | } dmadscr_t; | 
|  | 210 |  | 
| Maciej W. Rozycki | 202d038 | 2005-04-01 17:53:33 +0000 | [diff] [blame] | 211 | static dmadscr_t page_descr[DM_NUM_CHANNELS] | 
|  | 212 | __attribute__((aligned(SMP_CACHE_BYTES))); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 213 |  | 
|  | 214 | void sb1_dma_init(void) | 
|  | 215 | { | 
| Thiemo Seufer | 685f779 | 2005-02-25 13:11:18 +0000 | [diff] [blame] | 216 | int i; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 217 |  | 
| Thiemo Seufer | 685f779 | 2005-02-25 13:11:18 +0000 | [diff] [blame] | 218 | for (i = 0; i < DM_NUM_CHANNELS; i++) { | 
| Maciej W. Rozycki | 202d038 | 2005-04-01 17:53:33 +0000 | [diff] [blame] | 219 | const u64 base_val = CPHYSADDR(&page_descr[i]) | | 
|  | 220 | V_DM_DSCR_BASE_RINGSZ(1); | 
|  | 221 | volatile void *base_reg = | 
|  | 222 | IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE)); | 
| Thiemo Seufer | 685f779 | 2005-02-25 13:11:18 +0000 | [diff] [blame] | 223 |  | 
|  | 224 | __raw_writeq(base_val, base_reg); | 
|  | 225 | __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg); | 
|  | 226 | __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg); | 
|  | 227 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 228 | } | 
|  | 229 |  | 
|  | 230 | void clear_page(void *page) | 
|  | 231 | { | 
| Maciej W. Rozycki | 202d038 | 2005-04-01 17:53:33 +0000 | [diff] [blame] | 232 | u64 to_phys = CPHYSADDR(page); | 
| Thiemo Seufer | 685f779 | 2005-02-25 13:11:18 +0000 | [diff] [blame] | 233 | unsigned int cpu = smp_processor_id(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 234 |  | 
| Thiemo Seufer | 685f779 | 2005-02-25 13:11:18 +0000 | [diff] [blame] | 235 | /* if the page is not in KSEG0, use old way */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 236 | if ((long)KSEGX(page) != (long)CKSEG0) | 
|  | 237 | return clear_page_cpu(page); | 
|  | 238 |  | 
| Maciej W. Rozycki | 202d038 | 2005-04-01 17:53:33 +0000 | [diff] [blame] | 239 | page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM | | 
|  | 240 | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 241 | page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); | 
| Maciej W. Rozycki | 65bda1a | 2005-02-22 21:51:30 +0000 | [diff] [blame] | 242 | __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 243 |  | 
|  | 244 | /* | 
|  | 245 | * Don't really want to do it this way, but there's no | 
|  | 246 | * reliable way to delay completion detection. | 
|  | 247 | */ | 
| Maciej W. Rozycki | 65bda1a | 2005-02-22 21:51:30 +0000 | [diff] [blame] | 248 | while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) | 
| Thiemo Seufer | 685f779 | 2005-02-25 13:11:18 +0000 | [diff] [blame] | 249 | & M_DM_DSCR_BASE_INTERRUPT)) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 250 | ; | 
| Maciej W. Rozycki | 65bda1a | 2005-02-22 21:51:30 +0000 | [diff] [blame] | 251 | __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 252 | } | 
|  | 253 |  | 
|  | 254 | void copy_page(void *to, void *from) | 
|  | 255 | { | 
| Maciej W. Rozycki | 202d038 | 2005-04-01 17:53:33 +0000 | [diff] [blame] | 256 | u64 from_phys = CPHYSADDR(from); | 
|  | 257 | u64 to_phys = CPHYSADDR(to); | 
| Thiemo Seufer | 685f779 | 2005-02-25 13:11:18 +0000 | [diff] [blame] | 258 | unsigned int cpu = smp_processor_id(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 259 |  | 
| Thiemo Seufer | 685f779 | 2005-02-25 13:11:18 +0000 | [diff] [blame] | 260 | /* if any page is not in KSEG0, use old way */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 261 | if ((long)KSEGX(to) != (long)CKSEG0 | 
|  | 262 | || (long)KSEGX(from) != (long)CKSEG0) | 
|  | 263 | return copy_page_cpu(to, from); | 
|  | 264 |  | 
| Maciej W. Rozycki | 202d038 | 2005-04-01 17:53:33 +0000 | [diff] [blame] | 265 | page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST | | 
|  | 266 | M_DM_DSCRA_INTERRUPT; | 
| Thiemo Seufer | 685f779 | 2005-02-25 13:11:18 +0000 | [diff] [blame] | 267 | page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); | 
| Maciej W. Rozycki | 202d038 | 2005-04-01 17:53:33 +0000 | [diff] [blame] | 268 | __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 269 |  | 
|  | 270 | /* | 
|  | 271 | * Don't really want to do it this way, but there's no | 
|  | 272 | * reliable way to delay completion detection. | 
|  | 273 | */ | 
| Maciej W. Rozycki | 202d038 | 2005-04-01 17:53:33 +0000 | [diff] [blame] | 274 | while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) | 
| Thiemo Seufer | 685f779 | 2005-02-25 13:11:18 +0000 | [diff] [blame] | 275 | & M_DM_DSCR_BASE_INTERRUPT)) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 276 | ; | 
| Maciej W. Rozycki | 65bda1a | 2005-02-22 21:51:30 +0000 | [diff] [blame] | 277 | __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 278 | } | 
|  | 279 |  | 
|  | 280 | #else /* !CONFIG_SIBYTE_DMA_PAGEOPS */ | 
|  | 281 |  | 
|  | 282 | void clear_page(void *page) | 
|  | 283 | { | 
|  | 284 | return clear_page_cpu(page); | 
|  | 285 | } | 
|  | 286 |  | 
|  | 287 | void copy_page(void *to, void *from) | 
|  | 288 | { | 
|  | 289 | return copy_page_cpu(to, from); | 
|  | 290 | } | 
|  | 291 |  | 
|  | 292 | #endif /* !CONFIG_SIBYTE_DMA_PAGEOPS */ | 
|  | 293 |  | 
|  | 294 | EXPORT_SYMBOL(clear_page); | 
|  | 295 | EXPORT_SYMBOL(copy_page); |