| Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright 2010 Tilera Corporation. All Rights Reserved. | 
 | 3 |  * | 
 | 4 |  *   This program is free software; you can redistribute it and/or | 
 | 5 |  *   modify it under the terms of the GNU General Public License | 
 | 6 |  *   as published by the Free Software Foundation, version 2. | 
 | 7 |  * | 
 | 8 |  *   This program is distributed in the hope that it will be useful, but | 
 | 9 |  *   WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 10 |  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | 
 | 11 |  *   NON INFRINGEMENT.  See the GNU General Public License for | 
 | 12 |  *   more details. | 
 | 13 |  */ | 
 | 14 |  | 
 | 15 | #include <linux/string.h> | 
 | 16 | #include <linux/smp.h> | 
 | 17 | #include <linux/module.h> | 
 | 18 | #include <linux/uaccess.h> | 
 | 19 | #include <asm/fixmap.h> | 
 | 20 | #include <asm/kmap_types.h> | 
 | 21 | #include <asm/tlbflush.h> | 
 | 22 | #include <hv/hypervisor.h> | 
 | 23 | #include <arch/chip.h> | 
 | 24 |  | 
 | 25 |  | 
 | 26 | #if !CHIP_HAS_COHERENT_LOCAL_CACHE() | 
 | 27 |  | 
 | 28 | /* Defined in memcpy.S */ | 
 | 29 | extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n); | 
 | 30 | extern unsigned long __copy_to_user_inatomic_asm( | 
 | 31 | 	void __user *to, const void *from, unsigned long n); | 
 | 32 | extern unsigned long __copy_from_user_inatomic_asm( | 
 | 33 | 	void *to, const void __user *from, unsigned long n); | 
 | 34 | extern unsigned long __copy_from_user_zeroing_asm( | 
 | 35 | 	void *to, const void __user *from, unsigned long n); | 
 | 36 |  | 
 | 37 | typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long); | 
 | 38 |  | 
 | 39 | /* Size above which to consider TLB games for performance */ | 
 | 40 | #define LARGE_COPY_CUTOFF 2048 | 
 | 41 |  | 
 | 42 | /* Communicate to the simulator what we are trying to do. */ | 
 | 43 | #define sim_allow_multiple_caching(b) \ | 
 | 44 |   __insn_mtspr(SPR_SIM_CONTROL, \ | 
 | 45 |    SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS)) | 
 | 46 |  | 
 | 47 | /* | 
 | 48 |  * Copy memory by briefly enabling incoherent cacheline-at-a-time mode. | 
 | 49 |  * | 
 | 50 |  * We set up our own source and destination PTEs that we fully control. | 
 | 51 |  * This is the only way to guarantee that we don't race with another | 
 | 52 |  * thread that is modifying the PTE; we can't afford to try the | 
 | 53 |  * copy_{to,from}_user() technique of catching the interrupt, since | 
 | 54 |  * we must run with interrupts disabled to avoid the risk of some | 
 | 55 |  * other code seeing the incoherent data in our cache.  (Recall that | 
 | 56 |  * our cache is indexed by PA, so even if the other code doesn't use | 
| Chris Metcalf | 38a6f42 | 2010-11-01 15:21:35 -0400 | [diff] [blame] | 57 |  * our kmap_atomic virtual addresses, they'll still hit in cache using | 
| Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 58 |  * the normal VAs that aren't supposed to hit in cache.) | 
 | 59 |  */ | 
 | 60 | static void memcpy_multicache(void *dest, const void *source, | 
 | 61 | 			      pte_t dst_pte, pte_t src_pte, int len) | 
 | 62 | { | 
| Chris Metcalf | 0707ad3 | 2010-06-25 17:04:17 -0400 | [diff] [blame] | 63 | 	int idx; | 
 | 64 | 	unsigned long flags, newsrc, newdst; | 
| Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 65 | 	pmd_t *pmdp; | 
 | 66 | 	pte_t *ptep; | 
| Chris Metcalf | 38a6f42 | 2010-11-01 15:21:35 -0400 | [diff] [blame] | 67 | 	int type0, type1; | 
| Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 68 | 	int cpu = get_cpu(); | 
 | 69 |  | 
 | 70 | 	/* | 
 | 71 | 	 * Disable interrupts so that we don't recurse into memcpy() | 
 | 72 | 	 * in an interrupt handler, nor accidentally reference | 
 | 73 | 	 * the PA of the source from an interrupt routine.  Also | 
 | 74 | 	 * notify the simulator that we're playing games so we don't | 
 | 75 | 	 * generate spurious coherency warnings. | 
 | 76 | 	 */ | 
 | 77 | 	local_irq_save(flags); | 
 | 78 | 	sim_allow_multiple_caching(1); | 
 | 79 |  | 
 | 80 | 	/* Set up the new dest mapping */ | 
| Chris Metcalf | 38a6f42 | 2010-11-01 15:21:35 -0400 | [diff] [blame] | 81 | 	type0 = kmap_atomic_idx_push(); | 
 | 82 | 	idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0; | 
| Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 83 | 	newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1)); | 
 | 84 | 	pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst); | 
 | 85 | 	ptep = pte_offset_kernel(pmdp, newdst); | 
 | 86 | 	if (pte_val(*ptep) != pte_val(dst_pte)) { | 
 | 87 | 		set_pte(ptep, dst_pte); | 
 | 88 | 		local_flush_tlb_page(NULL, newdst, PAGE_SIZE); | 
 | 89 | 	} | 
 | 90 |  | 
 | 91 | 	/* Set up the new source mapping */ | 
| Chris Metcalf | 38a6f42 | 2010-11-01 15:21:35 -0400 | [diff] [blame] | 92 | 	type1 = kmap_atomic_idx_push(); | 
 | 93 | 	idx += (type0 - type1); | 
| Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 94 | 	src_pte = hv_pte_set_nc(src_pte); | 
 | 95 | 	src_pte = hv_pte_clear_writable(src_pte);  /* be paranoid */ | 
 | 96 | 	newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); | 
 | 97 | 	pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); | 
 | 98 | 	ptep = pte_offset_kernel(pmdp, newsrc); | 
| Chris Metcalf | 76c567f | 2011-02-28 16:37:34 -0500 | [diff] [blame] | 99 | 	__set_pte(ptep, src_pte);   /* set_pte() would be confused by this */ | 
| Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 100 | 	local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); | 
 | 101 |  | 
 | 102 | 	/* Actually move the data. */ | 
 | 103 | 	__memcpy_asm((void *)newdst, (const void *)newsrc, len); | 
 | 104 |  | 
 | 105 | 	/* | 
 | 106 | 	 * Remap the source as locally-cached and not OLOC'ed so that | 
 | 107 | 	 * we can inval without also invaling the remote cpu's cache. | 
 | 108 | 	 * This also avoids known errata with inv'ing cacheable oloc data. | 
 | 109 | 	 */ | 
 | 110 | 	src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); | 
 | 111 | 	src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ | 
| Chris Metcalf | 76c567f | 2011-02-28 16:37:34 -0500 | [diff] [blame] | 112 | 	__set_pte(ptep, src_pte);   /* set_pte() would be confused by this */ | 
| Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 113 | 	local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); | 
 | 114 |  | 
 | 115 | 	/* | 
 | 116 | 	 * Do the actual invalidation, covering the full L2 cache line | 
 | 117 | 	 * at the end since __memcpy_asm() is somewhat aggressive. | 
 | 118 | 	 */ | 
 | 119 | 	__inv_buffer((void *)newsrc, len); | 
 | 120 |  | 
 | 121 | 	/* | 
 | 122 | 	 * We're done: notify the simulator that all is back to normal, | 
 | 123 | 	 * and re-enable interrupts and pre-emption. | 
 | 124 | 	 */ | 
| Chris Metcalf | 38a6f42 | 2010-11-01 15:21:35 -0400 | [diff] [blame] | 125 | 	kmap_atomic_idx_pop(); | 
 | 126 | 	kmap_atomic_idx_pop(); | 
| Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 127 | 	sim_allow_multiple_caching(0); | 
 | 128 | 	local_irq_restore(flags); | 
| Chris Metcalf | 0707ad3 | 2010-06-25 17:04:17 -0400 | [diff] [blame] | 129 | 	put_cpu(); | 
| Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 130 | } | 
 | 131 |  | 
 | 132 | /* | 
 | 133 |  * Identify large copies from remotely-cached memory, and copy them | 
 | 134 |  * via memcpy_multicache() if they look good, otherwise fall back | 
 | 135 |  * to the particular kind of copying passed as the memcpy_t function. | 
 | 136 |  */ | 
 | 137 | static unsigned long fast_copy(void *dest, const void *source, int len, | 
 | 138 | 			       memcpy_t func) | 
 | 139 | { | 
 | 140 | 	/* | 
 | 141 | 	 * Check if it's big enough to bother with.  We may end up doing a | 
 | 142 | 	 * small copy via TLB manipulation if we're near a page boundary, | 
 | 143 | 	 * but presumably we'll make it up when we hit the second page. | 
 | 144 | 	 */ | 
 | 145 | 	while (len >= LARGE_COPY_CUTOFF) { | 
 | 146 | 		int copy_size, bytes_left_on_page; | 
 | 147 | 		pte_t *src_ptep, *dst_ptep; | 
 | 148 | 		pte_t src_pte, dst_pte; | 
 | 149 | 		struct page *src_page, *dst_page; | 
 | 150 |  | 
 | 151 | 		/* Is the source page oloc'ed to a remote cpu? */ | 
 | 152 | retry_source: | 
 | 153 | 		src_ptep = virt_to_pte(current->mm, (unsigned long)source); | 
 | 154 | 		if (src_ptep == NULL) | 
 | 155 | 			break; | 
 | 156 | 		src_pte = *src_ptep; | 
 | 157 | 		if (!hv_pte_get_present(src_pte) || | 
 | 158 | 		    !hv_pte_get_readable(src_pte) || | 
 | 159 | 		    hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3) | 
 | 160 | 			break; | 
 | 161 | 		if (get_remote_cache_cpu(src_pte) == smp_processor_id()) | 
 | 162 | 			break; | 
 | 163 | 		src_page = pfn_to_page(hv_pte_get_pfn(src_pte)); | 
 | 164 | 		get_page(src_page); | 
 | 165 | 		if (pte_val(src_pte) != pte_val(*src_ptep)) { | 
 | 166 | 			put_page(src_page); | 
 | 167 | 			goto retry_source; | 
 | 168 | 		} | 
 | 169 | 		if (pte_huge(src_pte)) { | 
 | 170 | 			/* Adjust the PTE to correspond to a small page */ | 
 | 171 | 			int pfn = hv_pte_get_pfn(src_pte); | 
 | 172 | 			pfn += (((unsigned long)source & (HPAGE_SIZE-1)) | 
 | 173 | 				>> PAGE_SHIFT); | 
 | 174 | 			src_pte = pfn_pte(pfn, src_pte); | 
 | 175 | 			src_pte = pte_mksmall(src_pte); | 
 | 176 | 		} | 
 | 177 |  | 
 | 178 | 		/* Is the destination page writable? */ | 
 | 179 | retry_dest: | 
 | 180 | 		dst_ptep = virt_to_pte(current->mm, (unsigned long)dest); | 
 | 181 | 		if (dst_ptep == NULL) { | 
 | 182 | 			put_page(src_page); | 
 | 183 | 			break; | 
 | 184 | 		} | 
 | 185 | 		dst_pte = *dst_ptep; | 
 | 186 | 		if (!hv_pte_get_present(dst_pte) || | 
 | 187 | 		    !hv_pte_get_writable(dst_pte)) { | 
 | 188 | 			put_page(src_page); | 
 | 189 | 			break; | 
 | 190 | 		} | 
 | 191 | 		dst_page = pfn_to_page(hv_pte_get_pfn(dst_pte)); | 
 | 192 | 		if (dst_page == src_page) { | 
 | 193 | 			/* | 
 | 194 | 			 * Source and dest are on the same page; this | 
 | 195 | 			 * potentially exposes us to incoherence if any | 
 | 196 | 			 * part of src and dest overlap on a cache line. | 
 | 197 | 			 * Just give up rather than trying to be precise. | 
 | 198 | 			 */ | 
 | 199 | 			put_page(src_page); | 
 | 200 | 			break; | 
 | 201 | 		} | 
 | 202 | 		get_page(dst_page); | 
 | 203 | 		if (pte_val(dst_pte) != pte_val(*dst_ptep)) { | 
 | 204 | 			put_page(dst_page); | 
 | 205 | 			goto retry_dest; | 
 | 206 | 		} | 
 | 207 | 		if (pte_huge(dst_pte)) { | 
 | 208 | 			/* Adjust the PTE to correspond to a small page */ | 
 | 209 | 			int pfn = hv_pte_get_pfn(dst_pte); | 
 | 210 | 			pfn += (((unsigned long)dest & (HPAGE_SIZE-1)) | 
 | 211 | 				>> PAGE_SHIFT); | 
 | 212 | 			dst_pte = pfn_pte(pfn, dst_pte); | 
 | 213 | 			dst_pte = pte_mksmall(dst_pte); | 
 | 214 | 		} | 
 | 215 |  | 
 | 216 | 		/* All looks good: create a cachable PTE and copy from it */ | 
 | 217 | 		copy_size = len; | 
 | 218 | 		bytes_left_on_page = | 
 | 219 | 			PAGE_SIZE - (((int)source) & (PAGE_SIZE-1)); | 
 | 220 | 		if (copy_size > bytes_left_on_page) | 
 | 221 | 			copy_size = bytes_left_on_page; | 
 | 222 | 		bytes_left_on_page = | 
 | 223 | 			PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1)); | 
 | 224 | 		if (copy_size > bytes_left_on_page) | 
 | 225 | 			copy_size = bytes_left_on_page; | 
 | 226 | 		memcpy_multicache(dest, source, dst_pte, src_pte, copy_size); | 
 | 227 |  | 
 | 228 | 		/* Release the pages */ | 
 | 229 | 		put_page(dst_page); | 
 | 230 | 		put_page(src_page); | 
 | 231 |  | 
 | 232 | 		/* Continue on the next page */ | 
 | 233 | 		dest += copy_size; | 
 | 234 | 		source += copy_size; | 
 | 235 | 		len -= copy_size; | 
 | 236 | 	} | 
 | 237 |  | 
 | 238 | 	return func(dest, source, len); | 
 | 239 | } | 
 | 240 |  | 
 | 241 | void *memcpy(void *to, const void *from, __kernel_size_t n) | 
 | 242 | { | 
 | 243 | 	if (n < LARGE_COPY_CUTOFF) | 
 | 244 | 		return (void *)__memcpy_asm(to, from, n); | 
 | 245 | 	else | 
 | 246 | 		return (void *)fast_copy(to, from, n, __memcpy_asm); | 
 | 247 | } | 
 | 248 |  | 
 | 249 | unsigned long __copy_to_user_inatomic(void __user *to, const void *from, | 
 | 250 | 				      unsigned long n) | 
 | 251 | { | 
 | 252 | 	if (n < LARGE_COPY_CUTOFF) | 
 | 253 | 		return __copy_to_user_inatomic_asm(to, from, n); | 
 | 254 | 	else | 
 | 255 | 		return fast_copy(to, from, n, __copy_to_user_inatomic_asm); | 
 | 256 | } | 
 | 257 |  | 
 | 258 | unsigned long __copy_from_user_inatomic(void *to, const void __user *from, | 
 | 259 | 					unsigned long n) | 
 | 260 | { | 
 | 261 | 	if (n < LARGE_COPY_CUTOFF) | 
 | 262 | 		return __copy_from_user_inatomic_asm(to, from, n); | 
 | 263 | 	else | 
 | 264 | 		return fast_copy(to, from, n, __copy_from_user_inatomic_asm); | 
 | 265 | } | 
 | 266 |  | 
 | 267 | unsigned long __copy_from_user_zeroing(void *to, const void __user *from, | 
 | 268 | 				       unsigned long n) | 
 | 269 | { | 
 | 270 | 	if (n < LARGE_COPY_CUTOFF) | 
 | 271 | 		return __copy_from_user_zeroing_asm(to, from, n); | 
 | 272 | 	else | 
 | 273 | 		return fast_copy(to, from, n, __copy_from_user_zeroing_asm); | 
 | 274 | } | 
 | 275 |  | 
 | 276 | #endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */ |