| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * arch/ia64/lib/xor.S | 
|  | 3 | * | 
|  | 4 | * Optimized RAID-5 checksumming functions for IA-64. | 
|  | 5 | * | 
|  | 6 | * This program is free software; you can redistribute it and/or modify | 
|  | 7 | * it under the terms of the GNU General Public License as published by | 
|  | 8 | * the Free Software Foundation; either version 2, or (at your option) | 
|  | 9 | * any later version. | 
|  | 10 | * | 
|  | 11 | * You should have received a copy of the GNU General Public License | 
|  | 12 | * (for example /usr/src/linux/COPYING); if not, write to the Free | 
|  | 13 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 
|  | 14 | */ | 
|  | 15 |  | 
|  | 16 | #include <asm/asmmacro.h> | 
|  | 17 |  | 
|  | 18 | GLOBAL_ENTRY(xor_ia64_2) | 
|  | 19 | .prologue | 
|  | 20 | .fframe 0 | 
|  | 21 | .save ar.pfs, r31 | 
|  | 22 | alloc r31 = ar.pfs, 3, 0, 13, 16 | 
|  | 23 | .save ar.lc, r30 | 
|  | 24 | mov r30 = ar.lc | 
|  | 25 | .save pr, r29 | 
|  | 26 | mov r29 = pr | 
|  | 27 | ;; | 
|  | 28 | .body | 
|  | 29 | mov r8 = in1 | 
|  | 30 | mov ar.ec = 6 + 2 | 
|  | 31 | shr in0 = in0, 3 | 
|  | 32 | ;; | 
|  | 33 | adds in0 = -1, in0 | 
|  | 34 | mov r16 = in1 | 
|  | 35 | mov r17 = in2 | 
|  | 36 | ;; | 
|  | 37 | mov ar.lc = in0 | 
|  | 38 | mov pr.rot = 1 << 16 | 
|  | 39 | ;; | 
|  | 40 | .rotr s1[6+1], s2[6+1], d[2] | 
|  | 41 | .rotp p[6+2] | 
|  | 42 | 0: | 
|  | 43 | (p[0])	ld8.nta s1[0] = [r16], 8 | 
|  | 44 | (p[0])	ld8.nta s2[0] = [r17], 8 | 
|  | 45 | (p[6])	xor d[0] = s1[6], s2[6] | 
|  | 46 | (p[6+1])st8.nta [r8] = d[1], 8 | 
|  | 47 | nop.f 0 | 
|  | 48 | br.ctop.dptk.few 0b | 
|  | 49 | ;; | 
|  | 50 | mov ar.lc = r30 | 
|  | 51 | mov pr = r29, -1 | 
|  | 52 | br.ret.sptk.few rp | 
|  | 53 | END(xor_ia64_2) | 
|  | 54 |  | 
|  | 55 | GLOBAL_ENTRY(xor_ia64_3) | 
|  | 56 | .prologue | 
|  | 57 | .fframe 0 | 
|  | 58 | .save ar.pfs, r31 | 
|  | 59 | alloc r31 = ar.pfs, 4, 0, 20, 24 | 
|  | 60 | .save ar.lc, r30 | 
|  | 61 | mov r30 = ar.lc | 
|  | 62 | .save pr, r29 | 
|  | 63 | mov r29 = pr | 
|  | 64 | ;; | 
|  | 65 | .body | 
|  | 66 | mov r8 = in1 | 
|  | 67 | mov ar.ec = 6 + 2 | 
|  | 68 | shr in0 = in0, 3 | 
|  | 69 | ;; | 
|  | 70 | adds in0 = -1, in0 | 
|  | 71 | mov r16 = in1 | 
|  | 72 | mov r17 = in2 | 
|  | 73 | ;; | 
|  | 74 | mov r18 = in3 | 
|  | 75 | mov ar.lc = in0 | 
|  | 76 | mov pr.rot = 1 << 16 | 
|  | 77 | ;; | 
|  | 78 | .rotr s1[6+1], s2[6+1], s3[6+1], d[2] | 
|  | 79 | .rotp p[6+2] | 
|  | 80 | 0: | 
|  | 81 | (p[0])	ld8.nta s1[0] = [r16], 8 | 
|  | 82 | (p[0])	ld8.nta s2[0] = [r17], 8 | 
|  | 83 | (p[6])	xor d[0] = s1[6], s2[6] | 
|  | 84 | ;; | 
|  | 85 | (p[0])	ld8.nta s3[0] = [r18], 8 | 
|  | 86 | (p[6+1])st8.nta [r8] = d[1], 8 | 
|  | 87 | (p[6])	xor d[0] = d[0], s3[6] | 
|  | 88 | br.ctop.dptk.few 0b | 
|  | 89 | ;; | 
|  | 90 | mov ar.lc = r30 | 
|  | 91 | mov pr = r29, -1 | 
|  | 92 | br.ret.sptk.few rp | 
|  | 93 | END(xor_ia64_3) | 
|  | 94 |  | 
|  | 95 | GLOBAL_ENTRY(xor_ia64_4) | 
|  | 96 | .prologue | 
|  | 97 | .fframe 0 | 
|  | 98 | .save ar.pfs, r31 | 
|  | 99 | alloc r31 = ar.pfs, 5, 0, 27, 32 | 
|  | 100 | .save ar.lc, r30 | 
|  | 101 | mov r30 = ar.lc | 
|  | 102 | .save pr, r29 | 
|  | 103 | mov r29 = pr | 
|  | 104 | ;; | 
|  | 105 | .body | 
|  | 106 | mov r8 = in1 | 
|  | 107 | mov ar.ec = 6 + 2 | 
|  | 108 | shr in0 = in0, 3 | 
|  | 109 | ;; | 
|  | 110 | adds in0 = -1, in0 | 
|  | 111 | mov r16 = in1 | 
|  | 112 | mov r17 = in2 | 
|  | 113 | ;; | 
|  | 114 | mov r18 = in3 | 
|  | 115 | mov ar.lc = in0 | 
|  | 116 | mov pr.rot = 1 << 16 | 
|  | 117 | mov r19 = in4 | 
|  | 118 | ;; | 
|  | 119 | .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2] | 
|  | 120 | .rotp p[6+2] | 
|  | 121 | 0: | 
|  | 122 | (p[0])	ld8.nta s1[0] = [r16], 8 | 
|  | 123 | (p[0])	ld8.nta s2[0] = [r17], 8 | 
|  | 124 | (p[6])	xor d[0] = s1[6], s2[6] | 
|  | 125 | (p[0])	ld8.nta s3[0] = [r18], 8 | 
|  | 126 | (p[0])	ld8.nta s4[0] = [r19], 8 | 
|  | 127 | (p[6])	xor r20 = s3[6], s4[6] | 
|  | 128 | ;; | 
|  | 129 | (p[6+1])st8.nta [r8] = d[1], 8 | 
|  | 130 | (p[6])	xor d[0] = d[0], r20 | 
|  | 131 | br.ctop.dptk.few 0b | 
|  | 132 | ;; | 
|  | 133 | mov ar.lc = r30 | 
|  | 134 | mov pr = r29, -1 | 
|  | 135 | br.ret.sptk.few rp | 
|  | 136 | END(xor_ia64_4) | 
|  | 137 |  | 
|  | 138 | GLOBAL_ENTRY(xor_ia64_5) | 
|  | 139 | .prologue | 
|  | 140 | .fframe 0 | 
|  | 141 | .save ar.pfs, r31 | 
|  | 142 | alloc r31 = ar.pfs, 6, 0, 34, 40 | 
|  | 143 | .save ar.lc, r30 | 
|  | 144 | mov r30 = ar.lc | 
|  | 145 | .save pr, r29 | 
|  | 146 | mov r29 = pr | 
|  | 147 | ;; | 
|  | 148 | .body | 
|  | 149 | mov r8 = in1 | 
|  | 150 | mov ar.ec = 6 + 2 | 
|  | 151 | shr in0 = in0, 3 | 
|  | 152 | ;; | 
|  | 153 | adds in0 = -1, in0 | 
|  | 154 | mov r16 = in1 | 
|  | 155 | mov r17 = in2 | 
|  | 156 | ;; | 
|  | 157 | mov r18 = in3 | 
|  | 158 | mov ar.lc = in0 | 
|  | 159 | mov pr.rot = 1 << 16 | 
|  | 160 | mov r19 = in4 | 
|  | 161 | mov r20 = in5 | 
|  | 162 | ;; | 
|  | 163 | .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2] | 
|  | 164 | .rotp p[6+2] | 
|  | 165 | 0: | 
|  | 166 | (p[0])	ld8.nta s1[0] = [r16], 8 | 
|  | 167 | (p[0])	ld8.nta s2[0] = [r17], 8 | 
|  | 168 | (p[6])	xor d[0] = s1[6], s2[6] | 
|  | 169 | (p[0])	ld8.nta s3[0] = [r18], 8 | 
|  | 170 | (p[0])	ld8.nta s4[0] = [r19], 8 | 
|  | 171 | (p[6])	xor r21 = s3[6], s4[6] | 
|  | 172 | ;; | 
|  | 173 | (p[0])	ld8.nta s5[0] = [r20], 8 | 
|  | 174 | (p[6+1])st8.nta [r8] = d[1], 8 | 
|  | 175 | (p[6])	xor d[0] = d[0], r21 | 
|  | 176 | ;; | 
|  | 177 | (p[6])	  xor d[0] = d[0], s5[6] | 
|  | 178 | nop.f 0 | 
|  | 179 | br.ctop.dptk.few 0b | 
|  | 180 | ;; | 
|  | 181 | mov ar.lc = r30 | 
|  | 182 | mov pr = r29, -1 | 
|  | 183 | br.ret.sptk.few rp | 
|  | 184 | END(xor_ia64_5) |