| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | // ------------------------------------------------------------------------- | 
|  | 2 | // Copyright (c) 2001, Dr Brian Gladman <                 >, Worcester, UK. | 
|  | 3 | // All rights reserved. | 
|  | 4 | // | 
|  | 5 | // LICENSE TERMS | 
|  | 6 | // | 
|  | 7 | // The free distribution and use of this software in both source and binary | 
|  | 8 | // form is allowed (with or without changes) provided that: | 
|  | 9 | // | 
|  | 10 | //   1. distributions of this source code include the above copyright | 
|  | 11 | //      notice, this list of conditions and the following disclaimer// | 
|  | 12 | // | 
|  | 13 | //   2. distributions in binary form include the above copyright | 
|  | 14 | //      notice, this list of conditions and the following disclaimer | 
|  | 15 | //      in the documentation and/or other associated materials// | 
|  | 16 | // | 
|  | 17 | //   3. the copyright holder's name is not used to endorse products | 
|  | 18 | //      built using this software without specific written permission. | 
|  | 19 | // | 
|  | 20 | // | 
|  | 21 | // ALTERNATIVELY, provided that this notice is retained in full, this product | 
|  | 22 | // may be distributed under the terms of the GNU General Public License (GPL), | 
|  | 23 | // in which case the provisions of the GPL apply INSTEAD OF those given above. | 
|  | 24 | // | 
|  | 25 | // Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org> | 
|  | 26 | // Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> | 
|  | 27 |  | 
|  | 28 | // DISCLAIMER | 
|  | 29 | // | 
|  | 30 | // This software is provided 'as is' with no explicit or implied warranties | 
|  | 31 | // in respect of its properties including, but not limited to, correctness | 
|  | 32 | // and fitness for purpose. | 
|  | 33 | // ------------------------------------------------------------------------- | 
|  | 34 | // Issue Date: 29/07/2002 | 
|  | 35 |  | 
|  | 36 | .file "aes-i586-asm.S" | 
|  | 37 | .text | 
|  | 38 |  | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 39 | #include <asm/asm-offsets.h> | 
|  | 40 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 41 | #define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words) | 
|  | 42 |  | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 43 | /* offsets to parameters with one register pushed onto stack */ | 
|  | 44 | #define tfm 8 | 
|  | 45 | #define out_blk 12 | 
|  | 46 | #define in_blk 16 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 47 |  | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 48 | /* offsets in crypto_tfm structure */ | 
|  | 49 | #define ekey (crypto_tfm_ctx_offset + 0) | 
|  | 50 | #define nrnd (crypto_tfm_ctx_offset + 256) | 
|  | 51 | #define dkey (crypto_tfm_ctx_offset + 260) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 52 |  | 
|  | 53 | // register mapping for encrypt and decrypt subroutines | 
|  | 54 |  | 
|  | 55 | #define r0  eax | 
|  | 56 | #define r1  ebx | 
|  | 57 | #define r2  ecx | 
|  | 58 | #define r3  edx | 
|  | 59 | #define r4  esi | 
|  | 60 | #define r5  edi | 
|  | 61 |  | 
|  | 62 | #define eaxl  al | 
|  | 63 | #define eaxh  ah | 
|  | 64 | #define ebxl  bl | 
|  | 65 | #define ebxh  bh | 
|  | 66 | #define ecxl  cl | 
|  | 67 | #define ecxh  ch | 
|  | 68 | #define edxl  dl | 
|  | 69 | #define edxh  dh | 
|  | 70 |  | 
|  | 71 | #define _h(reg) reg##h | 
|  | 72 | #define h(reg) _h(reg) | 
|  | 73 |  | 
|  | 74 | #define _l(reg) reg##l | 
|  | 75 | #define l(reg) _l(reg) | 
|  | 76 |  | 
|  | 77 | // This macro takes a 32-bit word representing a column and uses | 
|  | 78 | // each of its four bytes to index into four tables of 256 32-bit | 
|  | 79 | // words to obtain values that are then xored into the appropriate | 
|  | 80 | // output registers r0, r1, r4 or r5. | 
|  | 81 |  | 
|  | 82 | // Parameters: | 
|  | 83 | // table table base address | 
|  | 84 | //   %1  out_state[0] | 
|  | 85 | //   %2  out_state[1] | 
|  | 86 | //   %3  out_state[2] | 
|  | 87 | //   %4  out_state[3] | 
|  | 88 | //   idx input register for the round (destroyed) | 
|  | 89 | //   tmp scratch register for the round | 
|  | 90 | // sched key schedule | 
|  | 91 |  | 
|  | 92 | #define do_col(table, a1,a2,a3,a4, idx, tmp)	\ | 
|  | 93 | movzx   %l(idx),%tmp;			\ | 
|  | 94 | xor     table(,%tmp,4),%a1;		\ | 
|  | 95 | movzx   %h(idx),%tmp;			\ | 
|  | 96 | shr     $16,%idx;			\ | 
|  | 97 | xor     table+tlen(,%tmp,4),%a2;	\ | 
|  | 98 | movzx   %l(idx),%tmp;			\ | 
|  | 99 | movzx   %h(idx),%idx;			\ | 
|  | 100 | xor     table+2*tlen(,%tmp,4),%a3;	\ | 
|  | 101 | xor     table+3*tlen(,%idx,4),%a4; | 
|  | 102 |  | 
|  | 103 | // initialise output registers from the key schedule | 
|  | 104 | // NB1: original value of a3 is in idx on exit | 
|  | 105 | // NB2: original values of a1,a2,a4 aren't used | 
|  | 106 | #define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ | 
|  | 107 | mov     0 sched,%a1;			\ | 
|  | 108 | movzx   %l(idx),%tmp;			\ | 
|  | 109 | mov     12 sched,%a2;			\ | 
|  | 110 | xor     table(,%tmp,4),%a1;		\ | 
|  | 111 | mov     4 sched,%a4;			\ | 
|  | 112 | movzx   %h(idx),%tmp;			\ | 
|  | 113 | shr     $16,%idx;			\ | 
|  | 114 | xor     table+tlen(,%tmp,4),%a2;	\ | 
|  | 115 | movzx   %l(idx),%tmp;			\ | 
|  | 116 | movzx   %h(idx),%idx;			\ | 
|  | 117 | xor     table+3*tlen(,%idx,4),%a4;	\ | 
|  | 118 | mov     %a3,%idx;			\ | 
|  | 119 | mov     8 sched,%a3;			\ | 
|  | 120 | xor     table+2*tlen(,%tmp,4),%a3; | 
|  | 121 |  | 
|  | 122 | // initialise output registers from the key schedule | 
|  | 123 | // NB1: original value of a3 is in idx on exit | 
|  | 124 | // NB2: original values of a1,a2,a4 aren't used | 
|  | 125 | #define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ | 
|  | 126 | mov     0 sched,%a1;			\ | 
|  | 127 | movzx   %l(idx),%tmp;			\ | 
|  | 128 | mov     4 sched,%a2;			\ | 
|  | 129 | xor     table(,%tmp,4),%a1;		\ | 
|  | 130 | mov     12 sched,%a4;			\ | 
|  | 131 | movzx   %h(idx),%tmp;			\ | 
|  | 132 | shr     $16,%idx;			\ | 
|  | 133 | xor     table+tlen(,%tmp,4),%a2;	\ | 
|  | 134 | movzx   %l(idx),%tmp;			\ | 
|  | 135 | movzx   %h(idx),%idx;			\ | 
|  | 136 | xor     table+3*tlen(,%idx,4),%a4;	\ | 
|  | 137 | mov     %a3,%idx;			\ | 
|  | 138 | mov     8 sched,%a3;			\ | 
|  | 139 | xor     table+2*tlen(,%tmp,4),%a3; | 
|  | 140 |  | 
|  | 141 |  | 
|  | 142 | // original Gladman had conditional saves to MMX regs. | 
|  | 143 | #define save(a1, a2)		\ | 
|  | 144 | mov     %a2,4*a1(%esp) | 
|  | 145 |  | 
|  | 146 | #define restore(a1, a2)		\ | 
|  | 147 | mov     4*a2(%esp),%a1 | 
|  | 148 |  | 
|  | 149 | // These macros perform a forward encryption cycle. They are entered with | 
|  | 150 | // the first previous round column values in r0,r1,r4,r5 and | 
|  | 151 | // exit with the final values in the same registers, using stack | 
|  | 152 | // for temporary storage. | 
|  | 153 |  | 
|  | 154 | // round column values | 
|  | 155 | // on entry: r0,r1,r4,r5 | 
|  | 156 | // on exit:  r2,r1,r4,r5 | 
|  | 157 | #define fwd_rnd1(arg, table)						\ | 
|  | 158 | save   (0,r1);							\ | 
|  | 159 | save   (1,r5);							\ | 
|  | 160 | \ | 
|  | 161 | /* compute new column values */					\ | 
|  | 162 | do_fcol(table, r2,r5,r4,r1, r0,r3, arg);	/* idx=r0 */	\ | 
|  | 163 | do_col (table, r4,r1,r2,r5, r0,r3);		/* idx=r4 */	\ | 
|  | 164 | restore(r0,0);							\ | 
|  | 165 | do_col (table, r1,r2,r5,r4, r0,r3);		/* idx=r1 */	\ | 
|  | 166 | restore(r0,1);							\ | 
|  | 167 | do_col (table, r5,r4,r1,r2, r0,r3);		/* idx=r5 */ | 
|  | 168 |  | 
|  | 169 | // round column values | 
|  | 170 | // on entry: r2,r1,r4,r5 | 
|  | 171 | // on exit:  r0,r1,r4,r5 | 
|  | 172 | #define fwd_rnd2(arg, table)						\ | 
|  | 173 | save   (0,r1);							\ | 
|  | 174 | save   (1,r5);							\ | 
|  | 175 | \ | 
|  | 176 | /* compute new column values */					\ | 
|  | 177 | do_fcol(table, r0,r5,r4,r1, r2,r3, arg);	/* idx=r2 */	\ | 
|  | 178 | do_col (table, r4,r1,r0,r5, r2,r3);		/* idx=r4 */	\ | 
|  | 179 | restore(r2,0);							\ | 
|  | 180 | do_col (table, r1,r0,r5,r4, r2,r3);		/* idx=r1 */	\ | 
|  | 181 | restore(r2,1);							\ | 
|  | 182 | do_col (table, r5,r4,r1,r0, r2,r3);		/* idx=r5 */ | 
|  | 183 |  | 
|  | 184 | // These macros performs an inverse encryption cycle. They are entered with | 
|  | 185 | // the first previous round column values in r0,r1,r4,r5 and | 
|  | 186 | // exit with the final values in the same registers, using stack | 
|  | 187 | // for temporary storage | 
|  | 188 |  | 
|  | 189 | // round column values | 
|  | 190 | // on entry: r0,r1,r4,r5 | 
|  | 191 | // on exit:  r2,r1,r4,r5 | 
|  | 192 | #define inv_rnd1(arg, table)						\ | 
|  | 193 | save    (0,r1);							\ | 
|  | 194 | save    (1,r5);							\ | 
|  | 195 | \ | 
|  | 196 | /* compute new column values */					\ | 
|  | 197 | do_icol(table, r2,r1,r4,r5, r0,r3, arg);	/* idx=r0 */	\ | 
|  | 198 | do_col (table, r4,r5,r2,r1, r0,r3);		/* idx=r4 */	\ | 
|  | 199 | restore(r0,0);							\ | 
|  | 200 | do_col (table, r1,r4,r5,r2, r0,r3);		/* idx=r1 */	\ | 
|  | 201 | restore(r0,1);							\ | 
|  | 202 | do_col (table, r5,r2,r1,r4, r0,r3);		/* idx=r5 */ | 
|  | 203 |  | 
|  | 204 | // round column values | 
|  | 205 | // on entry: r2,r1,r4,r5 | 
|  | 206 | // on exit:  r0,r1,r4,r5 | 
|  | 207 | #define inv_rnd2(arg, table)						\ | 
|  | 208 | save    (0,r1);							\ | 
|  | 209 | save    (1,r5);							\ | 
|  | 210 | \ | 
|  | 211 | /* compute new column values */					\ | 
|  | 212 | do_icol(table, r0,r1,r4,r5, r2,r3, arg);	/* idx=r2 */	\ | 
|  | 213 | do_col (table, r4,r5,r0,r1, r2,r3);		/* idx=r4 */	\ | 
|  | 214 | restore(r2,0);							\ | 
|  | 215 | do_col (table, r1,r4,r5,r0, r2,r3);		/* idx=r1 */	\ | 
|  | 216 | restore(r2,1);							\ | 
|  | 217 | do_col (table, r5,r0,r1,r4, r2,r3);		/* idx=r5 */ | 
|  | 218 |  | 
|  | 219 | // AES (Rijndael) Encryption Subroutine | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 220 | /* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 221 |  | 
|  | 222 | .global  aes_enc_blk | 
|  | 223 |  | 
|  | 224 | .extern  ft_tab | 
|  | 225 | .extern  fl_tab | 
|  | 226 |  | 
|  | 227 | .align 4 | 
|  | 228 |  | 
|  | 229 | aes_enc_blk: | 
|  | 230 | push    %ebp | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 231 | mov     tfm(%esp),%ebp | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 232 |  | 
|  | 233 | // CAUTION: the order and the values used in these assigns | 
|  | 234 | // rely on the register mappings | 
|  | 235 |  | 
|  | 236 | 1:	push    %ebx | 
|  | 237 | mov     in_blk+4(%esp),%r2 | 
|  | 238 | push    %esi | 
|  | 239 | mov     nrnd(%ebp),%r3   // number of rounds | 
|  | 240 | push    %edi | 
|  | 241 | #if ekey != 0 | 
|  | 242 | lea     ekey(%ebp),%ebp  // key pointer | 
|  | 243 | #endif | 
|  | 244 |  | 
|  | 245 | // input four columns and xor in first round key | 
|  | 246 |  | 
|  | 247 | mov     (%r2),%r0 | 
|  | 248 | mov     4(%r2),%r1 | 
|  | 249 | mov     8(%r2),%r4 | 
|  | 250 | mov     12(%r2),%r5 | 
|  | 251 | xor     (%ebp),%r0 | 
|  | 252 | xor     4(%ebp),%r1 | 
|  | 253 | xor     8(%ebp),%r4 | 
|  | 254 | xor     12(%ebp),%r5 | 
|  | 255 |  | 
| Denis Vlasenko | e6a3a92 | 2005-11-29 22:23:20 +1100 | [diff] [blame] | 256 | sub     $8,%esp		// space for register saves on stack | 
|  | 257 | add     $16,%ebp	// increment to next round key | 
|  | 258 | cmp     $12,%r3 | 
|  | 259 | jb      4f		// 10 rounds for 128-bit key | 
|  | 260 | lea     32(%ebp),%ebp | 
|  | 261 | je      3f		// 12 rounds for 192-bit key | 
|  | 262 | lea     32(%ebp),%ebp | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 263 |  | 
| Denis Vlasenko | e6a3a92 | 2005-11-29 22:23:20 +1100 | [diff] [blame] | 264 | 2:	fwd_rnd1( -64(%ebp) ,ft_tab)	// 14 rounds for 256-bit key | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 265 | fwd_rnd2( -48(%ebp) ,ft_tab) | 
| Denis Vlasenko | e6a3a92 | 2005-11-29 22:23:20 +1100 | [diff] [blame] | 266 | 3:	fwd_rnd1( -32(%ebp) ,ft_tab)	// 12 rounds for 192-bit key | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 267 | fwd_rnd2( -16(%ebp) ,ft_tab) | 
|  | 268 | 4:	fwd_rnd1(    (%ebp) ,ft_tab)	// 10 rounds for 128-bit key | 
|  | 269 | fwd_rnd2( +16(%ebp) ,ft_tab) | 
|  | 270 | fwd_rnd1( +32(%ebp) ,ft_tab) | 
|  | 271 | fwd_rnd2( +48(%ebp) ,ft_tab) | 
|  | 272 | fwd_rnd1( +64(%ebp) ,ft_tab) | 
|  | 273 | fwd_rnd2( +80(%ebp) ,ft_tab) | 
|  | 274 | fwd_rnd1( +96(%ebp) ,ft_tab) | 
|  | 275 | fwd_rnd2(+112(%ebp) ,ft_tab) | 
|  | 276 | fwd_rnd1(+128(%ebp) ,ft_tab) | 
|  | 277 | fwd_rnd2(+144(%ebp) ,fl_tab)	// last round uses a different table | 
|  | 278 |  | 
|  | 279 | // move final values to the output array.  CAUTION: the | 
|  | 280 | // order of these assigns rely on the register mappings | 
|  | 281 |  | 
|  | 282 | add     $8,%esp | 
|  | 283 | mov     out_blk+12(%esp),%ebp | 
|  | 284 | mov     %r5,12(%ebp) | 
|  | 285 | pop     %edi | 
|  | 286 | mov     %r4,8(%ebp) | 
|  | 287 | pop     %esi | 
|  | 288 | mov     %r1,4(%ebp) | 
|  | 289 | pop     %ebx | 
|  | 290 | mov     %r0,(%ebp) | 
|  | 291 | pop     %ebp | 
|  | 292 | mov     $1,%eax | 
|  | 293 | ret | 
|  | 294 |  | 
|  | 295 | // AES (Rijndael) Decryption Subroutine | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 296 | /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 297 |  | 
|  | 298 | .global  aes_dec_blk | 
|  | 299 |  | 
|  | 300 | .extern  it_tab | 
|  | 301 | .extern  il_tab | 
|  | 302 |  | 
|  | 303 | .align 4 | 
|  | 304 |  | 
|  | 305 | aes_dec_blk: | 
|  | 306 | push    %ebp | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 307 | mov     tfm(%esp),%ebp | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 308 |  | 
|  | 309 | // CAUTION: the order and the values used in these assigns | 
|  | 310 | // rely on the register mappings | 
|  | 311 |  | 
|  | 312 | 1:	push    %ebx | 
|  | 313 | mov     in_blk+4(%esp),%r2 | 
|  | 314 | push    %esi | 
|  | 315 | mov     nrnd(%ebp),%r3   // number of rounds | 
|  | 316 | push    %edi | 
|  | 317 | #if dkey != 0 | 
|  | 318 | lea     dkey(%ebp),%ebp  // key pointer | 
|  | 319 | #endif | 
|  | 320 | mov     %r3,%r0 | 
|  | 321 | shl     $4,%r0 | 
|  | 322 | add     %r0,%ebp | 
|  | 323 |  | 
|  | 324 | // input four columns and xor in first round key | 
|  | 325 |  | 
|  | 326 | mov     (%r2),%r0 | 
|  | 327 | mov     4(%r2),%r1 | 
|  | 328 | mov     8(%r2),%r4 | 
|  | 329 | mov     12(%r2),%r5 | 
|  | 330 | xor     (%ebp),%r0 | 
|  | 331 | xor     4(%ebp),%r1 | 
|  | 332 | xor     8(%ebp),%r4 | 
|  | 333 | xor     12(%ebp),%r5 | 
|  | 334 |  | 
| Denis Vlasenko | e6a3a92 | 2005-11-29 22:23:20 +1100 | [diff] [blame] | 335 | sub     $8,%esp		// space for register saves on stack | 
|  | 336 | sub     $16,%ebp	// increment to next round key | 
|  | 337 | cmp     $12,%r3 | 
|  | 338 | jb      4f		// 10 rounds for 128-bit key | 
|  | 339 | lea     -32(%ebp),%ebp | 
|  | 340 | je      3f		// 12 rounds for 192-bit key | 
|  | 341 | lea     -32(%ebp),%ebp | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 342 |  | 
| Denis Vlasenko | e6a3a92 | 2005-11-29 22:23:20 +1100 | [diff] [blame] | 343 | 2:	inv_rnd1( +64(%ebp), it_tab)	// 14 rounds for 256-bit key | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 344 | inv_rnd2( +48(%ebp), it_tab) | 
| Denis Vlasenko | e6a3a92 | 2005-11-29 22:23:20 +1100 | [diff] [blame] | 345 | 3:	inv_rnd1( +32(%ebp), it_tab)	// 12 rounds for 192-bit key | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 346 | inv_rnd2( +16(%ebp), it_tab) | 
|  | 347 | 4:	inv_rnd1(    (%ebp), it_tab)	// 10 rounds for 128-bit key | 
|  | 348 | inv_rnd2( -16(%ebp), it_tab) | 
|  | 349 | inv_rnd1( -32(%ebp), it_tab) | 
|  | 350 | inv_rnd2( -48(%ebp), it_tab) | 
|  | 351 | inv_rnd1( -64(%ebp), it_tab) | 
|  | 352 | inv_rnd2( -80(%ebp), it_tab) | 
|  | 353 | inv_rnd1( -96(%ebp), it_tab) | 
|  | 354 | inv_rnd2(-112(%ebp), it_tab) | 
|  | 355 | inv_rnd1(-128(%ebp), it_tab) | 
|  | 356 | inv_rnd2(-144(%ebp), il_tab)	// last round uses a different table | 
|  | 357 |  | 
|  | 358 | // move final values to the output array.  CAUTION: the | 
|  | 359 | // order of these assigns rely on the register mappings | 
|  | 360 |  | 
|  | 361 | add     $8,%esp | 
|  | 362 | mov     out_blk+12(%esp),%ebp | 
|  | 363 | mov     %r5,12(%ebp) | 
|  | 364 | pop     %edi | 
|  | 365 | mov     %r4,8(%ebp) | 
|  | 366 | pop     %esi | 
|  | 367 | mov     %r1,4(%ebp) | 
|  | 368 | pop     %ebx | 
|  | 369 | mov     %r0,(%ebp) | 
|  | 370 | pop     %ebp | 
|  | 371 | mov     $1,%eax | 
|  | 372 | ret | 
|  | 373 |  |