| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * include/asm-generic/xor.h | 
|  | 3 | * | 
|  | 4 | * Generic optimized RAID-5 checksumming functions. | 
|  | 5 | * | 
|  | 6 | * This program is free software; you can redistribute it and/or modify | 
|  | 7 | * it under the terms of the GNU General Public License as published by | 
|  | 8 | * the Free Software Foundation; either version 2, or (at your option) | 
|  | 9 | * any later version. | 
|  | 10 | * | 
|  | 11 | * You should have received a copy of the GNU General Public License | 
|  | 12 | * (for example /usr/src/linux/COPYING); if not, write to the Free | 
|  | 13 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 
|  | 14 | */ | 
|  | 15 |  | 
|  | 16 | #include <asm/processor.h> | 
|  | 17 |  | 
|  | 18 | static void | 
|  | 19 | xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | 
|  | 20 | { | 
|  | 21 | long lines = bytes / (sizeof (long)) / 8; | 
|  | 22 |  | 
|  | 23 | do { | 
|  | 24 | p1[0] ^= p2[0]; | 
|  | 25 | p1[1] ^= p2[1]; | 
|  | 26 | p1[2] ^= p2[2]; | 
|  | 27 | p1[3] ^= p2[3]; | 
|  | 28 | p1[4] ^= p2[4]; | 
|  | 29 | p1[5] ^= p2[5]; | 
|  | 30 | p1[6] ^= p2[6]; | 
|  | 31 | p1[7] ^= p2[7]; | 
|  | 32 | p1 += 8; | 
|  | 33 | p2 += 8; | 
|  | 34 | } while (--lines > 0); | 
|  | 35 | } | 
|  | 36 |  | 
|  | 37 | static void | 
|  | 38 | xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | 
|  | 39 | unsigned long *p3) | 
|  | 40 | { | 
|  | 41 | long lines = bytes / (sizeof (long)) / 8; | 
|  | 42 |  | 
|  | 43 | do { | 
|  | 44 | p1[0] ^= p2[0] ^ p3[0]; | 
|  | 45 | p1[1] ^= p2[1] ^ p3[1]; | 
|  | 46 | p1[2] ^= p2[2] ^ p3[2]; | 
|  | 47 | p1[3] ^= p2[3] ^ p3[3]; | 
|  | 48 | p1[4] ^= p2[4] ^ p3[4]; | 
|  | 49 | p1[5] ^= p2[5] ^ p3[5]; | 
|  | 50 | p1[6] ^= p2[6] ^ p3[6]; | 
|  | 51 | p1[7] ^= p2[7] ^ p3[7]; | 
|  | 52 | p1 += 8; | 
|  | 53 | p2 += 8; | 
|  | 54 | p3 += 8; | 
|  | 55 | } while (--lines > 0); | 
|  | 56 | } | 
|  | 57 |  | 
|  | 58 | static void | 
|  | 59 | xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | 
|  | 60 | unsigned long *p3, unsigned long *p4) | 
|  | 61 | { | 
|  | 62 | long lines = bytes / (sizeof (long)) / 8; | 
|  | 63 |  | 
|  | 64 | do { | 
|  | 65 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; | 
|  | 66 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; | 
|  | 67 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; | 
|  | 68 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; | 
|  | 69 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; | 
|  | 70 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; | 
|  | 71 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; | 
|  | 72 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; | 
|  | 73 | p1 += 8; | 
|  | 74 | p2 += 8; | 
|  | 75 | p3 += 8; | 
|  | 76 | p4 += 8; | 
|  | 77 | } while (--lines > 0); | 
|  | 78 | } | 
|  | 79 |  | 
|  | 80 | static void | 
|  | 81 | xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | 
|  | 82 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | 
|  | 83 | { | 
|  | 84 | long lines = bytes / (sizeof (long)) / 8; | 
|  | 85 |  | 
|  | 86 | do { | 
|  | 87 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; | 
|  | 88 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; | 
|  | 89 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; | 
|  | 90 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; | 
|  | 91 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; | 
|  | 92 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; | 
|  | 93 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; | 
|  | 94 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; | 
|  | 95 | p1 += 8; | 
|  | 96 | p2 += 8; | 
|  | 97 | p3 += 8; | 
|  | 98 | p4 += 8; | 
|  | 99 | p5 += 8; | 
|  | 100 | } while (--lines > 0); | 
|  | 101 | } | 
|  | 102 |  | 
|  | 103 | static void | 
|  | 104 | xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | 
|  | 105 | { | 
|  | 106 | long lines = bytes / (sizeof (long)) / 8; | 
|  | 107 |  | 
|  | 108 | do { | 
|  | 109 | register long d0, d1, d2, d3, d4, d5, d6, d7; | 
|  | 110 | d0 = p1[0];	/* Pull the stuff into registers	*/ | 
|  | 111 | d1 = p1[1];	/*  ... in bursts, if possible.		*/ | 
|  | 112 | d2 = p1[2]; | 
|  | 113 | d3 = p1[3]; | 
|  | 114 | d4 = p1[4]; | 
|  | 115 | d5 = p1[5]; | 
|  | 116 | d6 = p1[6]; | 
|  | 117 | d7 = p1[7]; | 
|  | 118 | d0 ^= p2[0]; | 
|  | 119 | d1 ^= p2[1]; | 
|  | 120 | d2 ^= p2[2]; | 
|  | 121 | d3 ^= p2[3]; | 
|  | 122 | d4 ^= p2[4]; | 
|  | 123 | d5 ^= p2[5]; | 
|  | 124 | d6 ^= p2[6]; | 
|  | 125 | d7 ^= p2[7]; | 
|  | 126 | p1[0] = d0;	/* Store the result (in bursts)		*/ | 
|  | 127 | p1[1] = d1; | 
|  | 128 | p1[2] = d2; | 
|  | 129 | p1[3] = d3; | 
|  | 130 | p1[4] = d4; | 
|  | 131 | p1[5] = d5; | 
|  | 132 | p1[6] = d6; | 
|  | 133 | p1[7] = d7; | 
|  | 134 | p1 += 8; | 
|  | 135 | p2 += 8; | 
|  | 136 | } while (--lines > 0); | 
|  | 137 | } | 
|  | 138 |  | 
|  | 139 | static void | 
|  | 140 | xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | 
|  | 141 | unsigned long *p3) | 
|  | 142 | { | 
|  | 143 | long lines = bytes / (sizeof (long)) / 8; | 
|  | 144 |  | 
|  | 145 | do { | 
|  | 146 | register long d0, d1, d2, d3, d4, d5, d6, d7; | 
|  | 147 | d0 = p1[0];	/* Pull the stuff into registers	*/ | 
|  | 148 | d1 = p1[1];	/*  ... in bursts, if possible.		*/ | 
|  | 149 | d2 = p1[2]; | 
|  | 150 | d3 = p1[3]; | 
|  | 151 | d4 = p1[4]; | 
|  | 152 | d5 = p1[5]; | 
|  | 153 | d6 = p1[6]; | 
|  | 154 | d7 = p1[7]; | 
|  | 155 | d0 ^= p2[0]; | 
|  | 156 | d1 ^= p2[1]; | 
|  | 157 | d2 ^= p2[2]; | 
|  | 158 | d3 ^= p2[3]; | 
|  | 159 | d4 ^= p2[4]; | 
|  | 160 | d5 ^= p2[5]; | 
|  | 161 | d6 ^= p2[6]; | 
|  | 162 | d7 ^= p2[7]; | 
|  | 163 | d0 ^= p3[0]; | 
|  | 164 | d1 ^= p3[1]; | 
|  | 165 | d2 ^= p3[2]; | 
|  | 166 | d3 ^= p3[3]; | 
|  | 167 | d4 ^= p3[4]; | 
|  | 168 | d5 ^= p3[5]; | 
|  | 169 | d6 ^= p3[6]; | 
|  | 170 | d7 ^= p3[7]; | 
|  | 171 | p1[0] = d0;	/* Store the result (in bursts)		*/ | 
|  | 172 | p1[1] = d1; | 
|  | 173 | p1[2] = d2; | 
|  | 174 | p1[3] = d3; | 
|  | 175 | p1[4] = d4; | 
|  | 176 | p1[5] = d5; | 
|  | 177 | p1[6] = d6; | 
|  | 178 | p1[7] = d7; | 
|  | 179 | p1 += 8; | 
|  | 180 | p2 += 8; | 
|  | 181 | p3 += 8; | 
|  | 182 | } while (--lines > 0); | 
|  | 183 | } | 
|  | 184 |  | 
|  | 185 | static void | 
|  | 186 | xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | 
|  | 187 | unsigned long *p3, unsigned long *p4) | 
|  | 188 | { | 
|  | 189 | long lines = bytes / (sizeof (long)) / 8; | 
|  | 190 |  | 
|  | 191 | do { | 
|  | 192 | register long d0, d1, d2, d3, d4, d5, d6, d7; | 
|  | 193 | d0 = p1[0];	/* Pull the stuff into registers	*/ | 
|  | 194 | d1 = p1[1];	/*  ... in bursts, if possible.		*/ | 
|  | 195 | d2 = p1[2]; | 
|  | 196 | d3 = p1[3]; | 
|  | 197 | d4 = p1[4]; | 
|  | 198 | d5 = p1[5]; | 
|  | 199 | d6 = p1[6]; | 
|  | 200 | d7 = p1[7]; | 
|  | 201 | d0 ^= p2[0]; | 
|  | 202 | d1 ^= p2[1]; | 
|  | 203 | d2 ^= p2[2]; | 
|  | 204 | d3 ^= p2[3]; | 
|  | 205 | d4 ^= p2[4]; | 
|  | 206 | d5 ^= p2[5]; | 
|  | 207 | d6 ^= p2[6]; | 
|  | 208 | d7 ^= p2[7]; | 
|  | 209 | d0 ^= p3[0]; | 
|  | 210 | d1 ^= p3[1]; | 
|  | 211 | d2 ^= p3[2]; | 
|  | 212 | d3 ^= p3[3]; | 
|  | 213 | d4 ^= p3[4]; | 
|  | 214 | d5 ^= p3[5]; | 
|  | 215 | d6 ^= p3[6]; | 
|  | 216 | d7 ^= p3[7]; | 
|  | 217 | d0 ^= p4[0]; | 
|  | 218 | d1 ^= p4[1]; | 
|  | 219 | d2 ^= p4[2]; | 
|  | 220 | d3 ^= p4[3]; | 
|  | 221 | d4 ^= p4[4]; | 
|  | 222 | d5 ^= p4[5]; | 
|  | 223 | d6 ^= p4[6]; | 
|  | 224 | d7 ^= p4[7]; | 
|  | 225 | p1[0] = d0;	/* Store the result (in bursts)		*/ | 
|  | 226 | p1[1] = d1; | 
|  | 227 | p1[2] = d2; | 
|  | 228 | p1[3] = d3; | 
|  | 229 | p1[4] = d4; | 
|  | 230 | p1[5] = d5; | 
|  | 231 | p1[6] = d6; | 
|  | 232 | p1[7] = d7; | 
|  | 233 | p1 += 8; | 
|  | 234 | p2 += 8; | 
|  | 235 | p3 += 8; | 
|  | 236 | p4 += 8; | 
|  | 237 | } while (--lines > 0); | 
|  | 238 | } | 
|  | 239 |  | 
|  | 240 | static void | 
|  | 241 | xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | 
|  | 242 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | 
|  | 243 | { | 
|  | 244 | long lines = bytes / (sizeof (long)) / 8; | 
|  | 245 |  | 
|  | 246 | do { | 
|  | 247 | register long d0, d1, d2, d3, d4, d5, d6, d7; | 
|  | 248 | d0 = p1[0];	/* Pull the stuff into registers	*/ | 
|  | 249 | d1 = p1[1];	/*  ... in bursts, if possible.		*/ | 
|  | 250 | d2 = p1[2]; | 
|  | 251 | d3 = p1[3]; | 
|  | 252 | d4 = p1[4]; | 
|  | 253 | d5 = p1[5]; | 
|  | 254 | d6 = p1[6]; | 
|  | 255 | d7 = p1[7]; | 
|  | 256 | d0 ^= p2[0]; | 
|  | 257 | d1 ^= p2[1]; | 
|  | 258 | d2 ^= p2[2]; | 
|  | 259 | d3 ^= p2[3]; | 
|  | 260 | d4 ^= p2[4]; | 
|  | 261 | d5 ^= p2[5]; | 
|  | 262 | d6 ^= p2[6]; | 
|  | 263 | d7 ^= p2[7]; | 
|  | 264 | d0 ^= p3[0]; | 
|  | 265 | d1 ^= p3[1]; | 
|  | 266 | d2 ^= p3[2]; | 
|  | 267 | d3 ^= p3[3]; | 
|  | 268 | d4 ^= p3[4]; | 
|  | 269 | d5 ^= p3[5]; | 
|  | 270 | d6 ^= p3[6]; | 
|  | 271 | d7 ^= p3[7]; | 
|  | 272 | d0 ^= p4[0]; | 
|  | 273 | d1 ^= p4[1]; | 
|  | 274 | d2 ^= p4[2]; | 
|  | 275 | d3 ^= p4[3]; | 
|  | 276 | d4 ^= p4[4]; | 
|  | 277 | d5 ^= p4[5]; | 
|  | 278 | d6 ^= p4[6]; | 
|  | 279 | d7 ^= p4[7]; | 
|  | 280 | d0 ^= p5[0]; | 
|  | 281 | d1 ^= p5[1]; | 
|  | 282 | d2 ^= p5[2]; | 
|  | 283 | d3 ^= p5[3]; | 
|  | 284 | d4 ^= p5[4]; | 
|  | 285 | d5 ^= p5[5]; | 
|  | 286 | d6 ^= p5[6]; | 
|  | 287 | d7 ^= p5[7]; | 
|  | 288 | p1[0] = d0;	/* Store the result (in bursts)		*/ | 
|  | 289 | p1[1] = d1; | 
|  | 290 | p1[2] = d2; | 
|  | 291 | p1[3] = d3; | 
|  | 292 | p1[4] = d4; | 
|  | 293 | p1[5] = d5; | 
|  | 294 | p1[6] = d6; | 
|  | 295 | p1[7] = d7; | 
|  | 296 | p1 += 8; | 
|  | 297 | p2 += 8; | 
|  | 298 | p3 += 8; | 
|  | 299 | p4 += 8; | 
|  | 300 | p5 += 8; | 
|  | 301 | } while (--lines > 0); | 
|  | 302 | } | 
|  | 303 |  | 
|  | 304 | static void | 
|  | 305 | xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | 
|  | 306 | { | 
|  | 307 | long lines = bytes / (sizeof (long)) / 8 - 1; | 
|  | 308 | prefetchw(p1); | 
|  | 309 | prefetch(p2); | 
|  | 310 |  | 
|  | 311 | do { | 
|  | 312 | prefetchw(p1+8); | 
|  | 313 | prefetch(p2+8); | 
|  | 314 | once_more: | 
|  | 315 | p1[0] ^= p2[0]; | 
|  | 316 | p1[1] ^= p2[1]; | 
|  | 317 | p1[2] ^= p2[2]; | 
|  | 318 | p1[3] ^= p2[3]; | 
|  | 319 | p1[4] ^= p2[4]; | 
|  | 320 | p1[5] ^= p2[5]; | 
|  | 321 | p1[6] ^= p2[6]; | 
|  | 322 | p1[7] ^= p2[7]; | 
|  | 323 | p1 += 8; | 
|  | 324 | p2 += 8; | 
|  | 325 | } while (--lines > 0); | 
|  | 326 | if (lines == 0) | 
|  | 327 | goto once_more; | 
|  | 328 | } | 
|  | 329 |  | 
|  | 330 | static void | 
|  | 331 | xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | 
|  | 332 | unsigned long *p3) | 
|  | 333 | { | 
|  | 334 | long lines = bytes / (sizeof (long)) / 8 - 1; | 
|  | 335 | prefetchw(p1); | 
|  | 336 | prefetch(p2); | 
|  | 337 | prefetch(p3); | 
|  | 338 |  | 
|  | 339 | do { | 
|  | 340 | prefetchw(p1+8); | 
|  | 341 | prefetch(p2+8); | 
|  | 342 | prefetch(p3+8); | 
|  | 343 | once_more: | 
|  | 344 | p1[0] ^= p2[0] ^ p3[0]; | 
|  | 345 | p1[1] ^= p2[1] ^ p3[1]; | 
|  | 346 | p1[2] ^= p2[2] ^ p3[2]; | 
|  | 347 | p1[3] ^= p2[3] ^ p3[3]; | 
|  | 348 | p1[4] ^= p2[4] ^ p3[4]; | 
|  | 349 | p1[5] ^= p2[5] ^ p3[5]; | 
|  | 350 | p1[6] ^= p2[6] ^ p3[6]; | 
|  | 351 | p1[7] ^= p2[7] ^ p3[7]; | 
|  | 352 | p1 += 8; | 
|  | 353 | p2 += 8; | 
|  | 354 | p3 += 8; | 
|  | 355 | } while (--lines > 0); | 
|  | 356 | if (lines == 0) | 
|  | 357 | goto once_more; | 
|  | 358 | } | 
|  | 359 |  | 
|  | 360 | static void | 
|  | 361 | xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | 
|  | 362 | unsigned long *p3, unsigned long *p4) | 
|  | 363 | { | 
|  | 364 | long lines = bytes / (sizeof (long)) / 8 - 1; | 
|  | 365 |  | 
|  | 366 | prefetchw(p1); | 
|  | 367 | prefetch(p2); | 
|  | 368 | prefetch(p3); | 
|  | 369 | prefetch(p4); | 
|  | 370 |  | 
|  | 371 | do { | 
|  | 372 | prefetchw(p1+8); | 
|  | 373 | prefetch(p2+8); | 
|  | 374 | prefetch(p3+8); | 
|  | 375 | prefetch(p4+8); | 
|  | 376 | once_more: | 
|  | 377 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; | 
|  | 378 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; | 
|  | 379 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; | 
|  | 380 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; | 
|  | 381 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; | 
|  | 382 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; | 
|  | 383 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; | 
|  | 384 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; | 
|  | 385 | p1 += 8; | 
|  | 386 | p2 += 8; | 
|  | 387 | p3 += 8; | 
|  | 388 | p4 += 8; | 
|  | 389 | } while (--lines > 0); | 
|  | 390 | if (lines == 0) | 
|  | 391 | goto once_more; | 
|  | 392 | } | 
|  | 393 |  | 
|  | 394 | static void | 
|  | 395 | xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | 
|  | 396 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | 
|  | 397 | { | 
|  | 398 | long lines = bytes / (sizeof (long)) / 8 - 1; | 
|  | 399 |  | 
|  | 400 | prefetchw(p1); | 
|  | 401 | prefetch(p2); | 
|  | 402 | prefetch(p3); | 
|  | 403 | prefetch(p4); | 
|  | 404 | prefetch(p5); | 
|  | 405 |  | 
|  | 406 | do { | 
|  | 407 | prefetchw(p1+8); | 
|  | 408 | prefetch(p2+8); | 
|  | 409 | prefetch(p3+8); | 
|  | 410 | prefetch(p4+8); | 
|  | 411 | prefetch(p5+8); | 
|  | 412 | once_more: | 
|  | 413 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; | 
|  | 414 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; | 
|  | 415 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; | 
|  | 416 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; | 
|  | 417 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; | 
|  | 418 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; | 
|  | 419 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; | 
|  | 420 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; | 
|  | 421 | p1 += 8; | 
|  | 422 | p2 += 8; | 
|  | 423 | p3 += 8; | 
|  | 424 | p4 += 8; | 
|  | 425 | p5 += 8; | 
|  | 426 | } while (--lines > 0); | 
|  | 427 | if (lines == 0) | 
|  | 428 | goto once_more; | 
|  | 429 | } | 
|  | 430 |  | 
|  | 431 | static void | 
|  | 432 | xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | 
|  | 433 | { | 
|  | 434 | long lines = bytes / (sizeof (long)) / 8 - 1; | 
|  | 435 |  | 
|  | 436 | prefetchw(p1); | 
|  | 437 | prefetch(p2); | 
|  | 438 |  | 
|  | 439 | do { | 
|  | 440 | register long d0, d1, d2, d3, d4, d5, d6, d7; | 
|  | 441 |  | 
|  | 442 | prefetchw(p1+8); | 
|  | 443 | prefetch(p2+8); | 
|  | 444 | once_more: | 
|  | 445 | d0 = p1[0];	/* Pull the stuff into registers	*/ | 
|  | 446 | d1 = p1[1];	/*  ... in bursts, if possible.		*/ | 
|  | 447 | d2 = p1[2]; | 
|  | 448 | d3 = p1[3]; | 
|  | 449 | d4 = p1[4]; | 
|  | 450 | d5 = p1[5]; | 
|  | 451 | d6 = p1[6]; | 
|  | 452 | d7 = p1[7]; | 
|  | 453 | d0 ^= p2[0]; | 
|  | 454 | d1 ^= p2[1]; | 
|  | 455 | d2 ^= p2[2]; | 
|  | 456 | d3 ^= p2[3]; | 
|  | 457 | d4 ^= p2[4]; | 
|  | 458 | d5 ^= p2[5]; | 
|  | 459 | d6 ^= p2[6]; | 
|  | 460 | d7 ^= p2[7]; | 
|  | 461 | p1[0] = d0;	/* Store the result (in bursts)		*/ | 
|  | 462 | p1[1] = d1; | 
|  | 463 | p1[2] = d2; | 
|  | 464 | p1[3] = d3; | 
|  | 465 | p1[4] = d4; | 
|  | 466 | p1[5] = d5; | 
|  | 467 | p1[6] = d6; | 
|  | 468 | p1[7] = d7; | 
|  | 469 | p1 += 8; | 
|  | 470 | p2 += 8; | 
|  | 471 | } while (--lines > 0); | 
|  | 472 | if (lines == 0) | 
|  | 473 | goto once_more; | 
|  | 474 | } | 
|  | 475 |  | 
|  | 476 | static void | 
|  | 477 | xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | 
|  | 478 | unsigned long *p3) | 
|  | 479 | { | 
|  | 480 | long lines = bytes / (sizeof (long)) / 8 - 1; | 
|  | 481 |  | 
|  | 482 | prefetchw(p1); | 
|  | 483 | prefetch(p2); | 
|  | 484 | prefetch(p3); | 
|  | 485 |  | 
|  | 486 | do { | 
|  | 487 | register long d0, d1, d2, d3, d4, d5, d6, d7; | 
|  | 488 |  | 
|  | 489 | prefetchw(p1+8); | 
|  | 490 | prefetch(p2+8); | 
|  | 491 | prefetch(p3+8); | 
|  | 492 | once_more: | 
|  | 493 | d0 = p1[0];	/* Pull the stuff into registers	*/ | 
|  | 494 | d1 = p1[1];	/*  ... in bursts, if possible.		*/ | 
|  | 495 | d2 = p1[2]; | 
|  | 496 | d3 = p1[3]; | 
|  | 497 | d4 = p1[4]; | 
|  | 498 | d5 = p1[5]; | 
|  | 499 | d6 = p1[6]; | 
|  | 500 | d7 = p1[7]; | 
|  | 501 | d0 ^= p2[0]; | 
|  | 502 | d1 ^= p2[1]; | 
|  | 503 | d2 ^= p2[2]; | 
|  | 504 | d3 ^= p2[3]; | 
|  | 505 | d4 ^= p2[4]; | 
|  | 506 | d5 ^= p2[5]; | 
|  | 507 | d6 ^= p2[6]; | 
|  | 508 | d7 ^= p2[7]; | 
|  | 509 | d0 ^= p3[0]; | 
|  | 510 | d1 ^= p3[1]; | 
|  | 511 | d2 ^= p3[2]; | 
|  | 512 | d3 ^= p3[3]; | 
|  | 513 | d4 ^= p3[4]; | 
|  | 514 | d5 ^= p3[5]; | 
|  | 515 | d6 ^= p3[6]; | 
|  | 516 | d7 ^= p3[7]; | 
|  | 517 | p1[0] = d0;	/* Store the result (in bursts)		*/ | 
|  | 518 | p1[1] = d1; | 
|  | 519 | p1[2] = d2; | 
|  | 520 | p1[3] = d3; | 
|  | 521 | p1[4] = d4; | 
|  | 522 | p1[5] = d5; | 
|  | 523 | p1[6] = d6; | 
|  | 524 | p1[7] = d7; | 
|  | 525 | p1 += 8; | 
|  | 526 | p2 += 8; | 
|  | 527 | p3 += 8; | 
|  | 528 | } while (--lines > 0); | 
|  | 529 | if (lines == 0) | 
|  | 530 | goto once_more; | 
|  | 531 | } | 
|  | 532 |  | 
|  | 533 | static void | 
|  | 534 | xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | 
|  | 535 | unsigned long *p3, unsigned long *p4) | 
|  | 536 | { | 
|  | 537 | long lines = bytes / (sizeof (long)) / 8 - 1; | 
|  | 538 |  | 
|  | 539 | prefetchw(p1); | 
|  | 540 | prefetch(p2); | 
|  | 541 | prefetch(p3); | 
|  | 542 | prefetch(p4); | 
|  | 543 |  | 
|  | 544 | do { | 
|  | 545 | register long d0, d1, d2, d3, d4, d5, d6, d7; | 
|  | 546 |  | 
|  | 547 | prefetchw(p1+8); | 
|  | 548 | prefetch(p2+8); | 
|  | 549 | prefetch(p3+8); | 
|  | 550 | prefetch(p4+8); | 
|  | 551 | once_more: | 
|  | 552 | d0 = p1[0];	/* Pull the stuff into registers	*/ | 
|  | 553 | d1 = p1[1];	/*  ... in bursts, if possible.		*/ | 
|  | 554 | d2 = p1[2]; | 
|  | 555 | d3 = p1[3]; | 
|  | 556 | d4 = p1[4]; | 
|  | 557 | d5 = p1[5]; | 
|  | 558 | d6 = p1[6]; | 
|  | 559 | d7 = p1[7]; | 
|  | 560 | d0 ^= p2[0]; | 
|  | 561 | d1 ^= p2[1]; | 
|  | 562 | d2 ^= p2[2]; | 
|  | 563 | d3 ^= p2[3]; | 
|  | 564 | d4 ^= p2[4]; | 
|  | 565 | d5 ^= p2[5]; | 
|  | 566 | d6 ^= p2[6]; | 
|  | 567 | d7 ^= p2[7]; | 
|  | 568 | d0 ^= p3[0]; | 
|  | 569 | d1 ^= p3[1]; | 
|  | 570 | d2 ^= p3[2]; | 
|  | 571 | d3 ^= p3[3]; | 
|  | 572 | d4 ^= p3[4]; | 
|  | 573 | d5 ^= p3[5]; | 
|  | 574 | d6 ^= p3[6]; | 
|  | 575 | d7 ^= p3[7]; | 
|  | 576 | d0 ^= p4[0]; | 
|  | 577 | d1 ^= p4[1]; | 
|  | 578 | d2 ^= p4[2]; | 
|  | 579 | d3 ^= p4[3]; | 
|  | 580 | d4 ^= p4[4]; | 
|  | 581 | d5 ^= p4[5]; | 
|  | 582 | d6 ^= p4[6]; | 
|  | 583 | d7 ^= p4[7]; | 
|  | 584 | p1[0] = d0;	/* Store the result (in bursts)		*/ | 
|  | 585 | p1[1] = d1; | 
|  | 586 | p1[2] = d2; | 
|  | 587 | p1[3] = d3; | 
|  | 588 | p1[4] = d4; | 
|  | 589 | p1[5] = d5; | 
|  | 590 | p1[6] = d6; | 
|  | 591 | p1[7] = d7; | 
|  | 592 | p1 += 8; | 
|  | 593 | p2 += 8; | 
|  | 594 | p3 += 8; | 
|  | 595 | p4 += 8; | 
|  | 596 | } while (--lines > 0); | 
|  | 597 | if (lines == 0) | 
|  | 598 | goto once_more; | 
|  | 599 | } | 
|  | 600 |  | 
|  | 601 | static void | 
|  | 602 | xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | 
|  | 603 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | 
|  | 604 | { | 
|  | 605 | long lines = bytes / (sizeof (long)) / 8 - 1; | 
|  | 606 |  | 
|  | 607 | prefetchw(p1); | 
|  | 608 | prefetch(p2); | 
|  | 609 | prefetch(p3); | 
|  | 610 | prefetch(p4); | 
|  | 611 | prefetch(p5); | 
|  | 612 |  | 
|  | 613 | do { | 
|  | 614 | register long d0, d1, d2, d3, d4, d5, d6, d7; | 
|  | 615 |  | 
|  | 616 | prefetchw(p1+8); | 
|  | 617 | prefetch(p2+8); | 
|  | 618 | prefetch(p3+8); | 
|  | 619 | prefetch(p4+8); | 
|  | 620 | prefetch(p5+8); | 
|  | 621 | once_more: | 
|  | 622 | d0 = p1[0];	/* Pull the stuff into registers	*/ | 
|  | 623 | d1 = p1[1];	/*  ... in bursts, if possible.		*/ | 
|  | 624 | d2 = p1[2]; | 
|  | 625 | d3 = p1[3]; | 
|  | 626 | d4 = p1[4]; | 
|  | 627 | d5 = p1[5]; | 
|  | 628 | d6 = p1[6]; | 
|  | 629 | d7 = p1[7]; | 
|  | 630 | d0 ^= p2[0]; | 
|  | 631 | d1 ^= p2[1]; | 
|  | 632 | d2 ^= p2[2]; | 
|  | 633 | d3 ^= p2[3]; | 
|  | 634 | d4 ^= p2[4]; | 
|  | 635 | d5 ^= p2[5]; | 
|  | 636 | d6 ^= p2[6]; | 
|  | 637 | d7 ^= p2[7]; | 
|  | 638 | d0 ^= p3[0]; | 
|  | 639 | d1 ^= p3[1]; | 
|  | 640 | d2 ^= p3[2]; | 
|  | 641 | d3 ^= p3[3]; | 
|  | 642 | d4 ^= p3[4]; | 
|  | 643 | d5 ^= p3[5]; | 
|  | 644 | d6 ^= p3[6]; | 
|  | 645 | d7 ^= p3[7]; | 
|  | 646 | d0 ^= p4[0]; | 
|  | 647 | d1 ^= p4[1]; | 
|  | 648 | d2 ^= p4[2]; | 
|  | 649 | d3 ^= p4[3]; | 
|  | 650 | d4 ^= p4[4]; | 
|  | 651 | d5 ^= p4[5]; | 
|  | 652 | d6 ^= p4[6]; | 
|  | 653 | d7 ^= p4[7]; | 
|  | 654 | d0 ^= p5[0]; | 
|  | 655 | d1 ^= p5[1]; | 
|  | 656 | d2 ^= p5[2]; | 
|  | 657 | d3 ^= p5[3]; | 
|  | 658 | d4 ^= p5[4]; | 
|  | 659 | d5 ^= p5[5]; | 
|  | 660 | d6 ^= p5[6]; | 
|  | 661 | d7 ^= p5[7]; | 
|  | 662 | p1[0] = d0;	/* Store the result (in bursts)		*/ | 
|  | 663 | p1[1] = d1; | 
|  | 664 | p1[2] = d2; | 
|  | 665 | p1[3] = d3; | 
|  | 666 | p1[4] = d4; | 
|  | 667 | p1[5] = d5; | 
|  | 668 | p1[6] = d6; | 
|  | 669 | p1[7] = d7; | 
|  | 670 | p1 += 8; | 
|  | 671 | p2 += 8; | 
|  | 672 | p3 += 8; | 
|  | 673 | p4 += 8; | 
|  | 674 | p5 += 8; | 
|  | 675 | } while (--lines > 0); | 
|  | 676 | if (lines == 0) | 
|  | 677 | goto once_more; | 
|  | 678 | } | 
|  | 679 |  | 
|  | 680 | static struct xor_block_template xor_block_8regs = { | 
|  | 681 | .name = "8regs", | 
|  | 682 | .do_2 = xor_8regs_2, | 
|  | 683 | .do_3 = xor_8regs_3, | 
|  | 684 | .do_4 = xor_8regs_4, | 
|  | 685 | .do_5 = xor_8regs_5, | 
|  | 686 | }; | 
|  | 687 |  | 
|  | 688 | static struct xor_block_template xor_block_32regs = { | 
|  | 689 | .name = "32regs", | 
|  | 690 | .do_2 = xor_32regs_2, | 
|  | 691 | .do_3 = xor_32regs_3, | 
|  | 692 | .do_4 = xor_32regs_4, | 
|  | 693 | .do_5 = xor_32regs_5, | 
|  | 694 | }; | 
|  | 695 |  | 
|  | 696 | static struct xor_block_template xor_block_8regs_p = { | 
|  | 697 | .name = "8regs_prefetch", | 
|  | 698 | .do_2 = xor_8regs_p_2, | 
|  | 699 | .do_3 = xor_8regs_p_3, | 
|  | 700 | .do_4 = xor_8regs_p_4, | 
|  | 701 | .do_5 = xor_8regs_p_5, | 
|  | 702 | }; | 
|  | 703 |  | 
|  | 704 | static struct xor_block_template xor_block_32regs_p = { | 
|  | 705 | .name = "32regs_prefetch", | 
|  | 706 | .do_2 = xor_32regs_p_2, | 
|  | 707 | .do_3 = xor_32regs_p_3, | 
|  | 708 | .do_4 = xor_32regs_p_4, | 
|  | 709 | .do_5 = xor_32regs_p_5, | 
|  | 710 | }; | 
|  | 711 |  | 
|  | 712 | #define XOR_TRY_TEMPLATES			\ | 
|  | 713 | do {					\ | 
|  | 714 | xor_speed(&xor_block_8regs);	\ | 
|  | 715 | xor_speed(&xor_block_8regs_p);	\ | 
|  | 716 | xor_speed(&xor_block_32regs);	\ | 
|  | 717 | xor_speed(&xor_block_32regs_p);	\ | 
|  | 718 | } while (0) |