|  | /* | 
|  | * Implement AES algorithm in Intel AES-NI instructions. | 
|  | * | 
|  | * The white paper of AES-NI instructions can be downloaded from: | 
|  | *   http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf | 
|  | * | 
|  | * Copyright (C) 2008, Intel Corp. | 
|  | *    Author: Huang Ying <ying.huang@intel.com> | 
|  | *            Vinodh Gopal <vinodh.gopal@intel.com> | 
|  | *            Kahraman Akdemir | 
|  | * | 
|  | * This program is free software; you can redistribute it and/or modify | 
|  | * it under the terms of the GNU General Public License as published by | 
|  | * the Free Software Foundation; either version 2 of the License, or | 
|  | * (at your option) any later version. | 
|  | */ | 
|  |  | 
|  | #include <linux/linkage.h> | 
|  |  | 
|  | .text | 
|  |  | 
|  | #define STATE1	%xmm0 | 
|  | #define STATE2	%xmm4 | 
|  | #define STATE3	%xmm5 | 
|  | #define STATE4	%xmm6 | 
|  | #define STATE	STATE1 | 
|  | #define IN1	%xmm1 | 
|  | #define IN2	%xmm7 | 
|  | #define IN3	%xmm8 | 
|  | #define IN4	%xmm9 | 
|  | #define IN	IN1 | 
|  | #define KEY	%xmm2 | 
|  | #define IV	%xmm3 | 
|  |  | 
|  | #define KEYP	%rdi | 
|  | #define OUTP	%rsi | 
|  | #define INP	%rdx | 
|  | #define LEN	%rcx | 
|  | #define IVP	%r8 | 
|  | #define KLEN	%r9d | 
|  | #define T1	%r10 | 
|  | #define TKEYP	T1 | 
|  | #define T2	%r11 | 
|  |  | 
|  | _key_expansion_128: | 
|  | _key_expansion_256a: | 
|  | pshufd $0b11111111, %xmm1, %xmm1 | 
|  | shufps $0b00010000, %xmm0, %xmm4 | 
|  | pxor %xmm4, %xmm0 | 
|  | shufps $0b10001100, %xmm0, %xmm4 | 
|  | pxor %xmm4, %xmm0 | 
|  | pxor %xmm1, %xmm0 | 
|  | movaps %xmm0, (%rcx) | 
|  | add $0x10, %rcx | 
|  | ret | 
|  |  | 
|  | _key_expansion_192a: | 
|  | pshufd $0b01010101, %xmm1, %xmm1 | 
|  | shufps $0b00010000, %xmm0, %xmm4 | 
|  | pxor %xmm4, %xmm0 | 
|  | shufps $0b10001100, %xmm0, %xmm4 | 
|  | pxor %xmm4, %xmm0 | 
|  | pxor %xmm1, %xmm0 | 
|  |  | 
|  | movaps %xmm2, %xmm5 | 
|  | movaps %xmm2, %xmm6 | 
|  | pslldq $4, %xmm5 | 
|  | pshufd $0b11111111, %xmm0, %xmm3 | 
|  | pxor %xmm3, %xmm2 | 
|  | pxor %xmm5, %xmm2 | 
|  |  | 
|  | movaps %xmm0, %xmm1 | 
|  | shufps $0b01000100, %xmm0, %xmm6 | 
|  | movaps %xmm6, (%rcx) | 
|  | shufps $0b01001110, %xmm2, %xmm1 | 
|  | movaps %xmm1, 16(%rcx) | 
|  | add $0x20, %rcx | 
|  | ret | 
|  |  | 
|  | _key_expansion_192b: | 
|  | pshufd $0b01010101, %xmm1, %xmm1 | 
|  | shufps $0b00010000, %xmm0, %xmm4 | 
|  | pxor %xmm4, %xmm0 | 
|  | shufps $0b10001100, %xmm0, %xmm4 | 
|  | pxor %xmm4, %xmm0 | 
|  | pxor %xmm1, %xmm0 | 
|  |  | 
|  | movaps %xmm2, %xmm5 | 
|  | pslldq $4, %xmm5 | 
|  | pshufd $0b11111111, %xmm0, %xmm3 | 
|  | pxor %xmm3, %xmm2 | 
|  | pxor %xmm5, %xmm2 | 
|  |  | 
|  | movaps %xmm0, (%rcx) | 
|  | add $0x10, %rcx | 
|  | ret | 
|  |  | 
|  | _key_expansion_256b: | 
|  | pshufd $0b10101010, %xmm1, %xmm1 | 
|  | shufps $0b00010000, %xmm2, %xmm4 | 
|  | pxor %xmm4, %xmm2 | 
|  | shufps $0b10001100, %xmm2, %xmm4 | 
|  | pxor %xmm4, %xmm2 | 
|  | pxor %xmm1, %xmm2 | 
|  | movaps %xmm2, (%rcx) | 
|  | add $0x10, %rcx | 
|  | ret | 
|  |  | 
|  | /* | 
|  | * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, | 
|  | *                   unsigned int key_len) | 
|  | */ | 
|  | ENTRY(aesni_set_key) | 
|  | movups (%rsi), %xmm0		# user key (first 16 bytes) | 
|  | movaps %xmm0, (%rdi) | 
|  | lea 0x10(%rdi), %rcx		# key addr | 
|  | movl %edx, 480(%rdi) | 
|  | pxor %xmm4, %xmm4		# xmm4 is assumed 0 in _key_expansion_x | 
|  | cmp $24, %dl | 
|  | jb .Lenc_key128 | 
|  | je .Lenc_key192 | 
|  | movups 0x10(%rsi), %xmm2	# other user key | 
|  | movaps %xmm2, (%rcx) | 
|  | add $0x10, %rcx | 
|  | # aeskeygenassist $0x1, %xmm2, %xmm1	# round 1 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 | 
|  | call _key_expansion_256a | 
|  | # aeskeygenassist $0x1, %xmm0, %xmm1 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 | 
|  | call _key_expansion_256b | 
|  | # aeskeygenassist $0x2, %xmm2, %xmm1	# round 2 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 | 
|  | call _key_expansion_256a | 
|  | # aeskeygenassist $0x2, %xmm0, %xmm1 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 | 
|  | call _key_expansion_256b | 
|  | # aeskeygenassist $0x4, %xmm2, %xmm1	# round 3 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 | 
|  | call _key_expansion_256a | 
|  | # aeskeygenassist $0x4, %xmm0, %xmm1 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 | 
|  | call _key_expansion_256b | 
|  | # aeskeygenassist $0x8, %xmm2, %xmm1	# round 4 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 | 
|  | call _key_expansion_256a | 
|  | # aeskeygenassist $0x8, %xmm0, %xmm1 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 | 
|  | call _key_expansion_256b | 
|  | # aeskeygenassist $0x10, %xmm2, %xmm1	# round 5 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 | 
|  | call _key_expansion_256a | 
|  | # aeskeygenassist $0x10, %xmm0, %xmm1 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 | 
|  | call _key_expansion_256b | 
|  | # aeskeygenassist $0x20, %xmm2, %xmm1	# round 6 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 | 
|  | call _key_expansion_256a | 
|  | # aeskeygenassist $0x20, %xmm0, %xmm1 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 | 
|  | call _key_expansion_256b | 
|  | # aeskeygenassist $0x40, %xmm2, %xmm1	# round 7 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 | 
|  | call _key_expansion_256a | 
|  | jmp .Ldec_key | 
|  | .Lenc_key192: | 
|  | movq 0x10(%rsi), %xmm2		# other user key | 
|  | # aeskeygenassist $0x1, %xmm2, %xmm1	# round 1 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 | 
|  | call _key_expansion_192a | 
|  | # aeskeygenassist $0x2, %xmm2, %xmm1	# round 2 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 | 
|  | call _key_expansion_192b | 
|  | # aeskeygenassist $0x4, %xmm2, %xmm1	# round 3 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 | 
|  | call _key_expansion_192a | 
|  | # aeskeygenassist $0x8, %xmm2, %xmm1	# round 4 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 | 
|  | call _key_expansion_192b | 
|  | # aeskeygenassist $0x10, %xmm2, %xmm1	# round 5 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 | 
|  | call _key_expansion_192a | 
|  | # aeskeygenassist $0x20, %xmm2, %xmm1	# round 6 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 | 
|  | call _key_expansion_192b | 
|  | # aeskeygenassist $0x40, %xmm2, %xmm1	# round 7 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 | 
|  | call _key_expansion_192a | 
|  | # aeskeygenassist $0x80, %xmm2, %xmm1	# round 8 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80 | 
|  | call _key_expansion_192b | 
|  | jmp .Ldec_key | 
|  | .Lenc_key128: | 
|  | # aeskeygenassist $0x1, %xmm0, %xmm1	# round 1 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 | 
|  | call _key_expansion_128 | 
|  | # aeskeygenassist $0x2, %xmm0, %xmm1	# round 2 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 | 
|  | call _key_expansion_128 | 
|  | # aeskeygenassist $0x4, %xmm0, %xmm1	# round 3 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 | 
|  | call _key_expansion_128 | 
|  | # aeskeygenassist $0x8, %xmm0, %xmm1	# round 4 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 | 
|  | call _key_expansion_128 | 
|  | # aeskeygenassist $0x10, %xmm0, %xmm1	# round 5 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 | 
|  | call _key_expansion_128 | 
|  | # aeskeygenassist $0x20, %xmm0, %xmm1	# round 6 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 | 
|  | call _key_expansion_128 | 
|  | # aeskeygenassist $0x40, %xmm0, %xmm1	# round 7 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40 | 
|  | call _key_expansion_128 | 
|  | # aeskeygenassist $0x80, %xmm0, %xmm1	# round 8 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80 | 
|  | call _key_expansion_128 | 
|  | # aeskeygenassist $0x1b, %xmm0, %xmm1	# round 9 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b | 
|  | call _key_expansion_128 | 
|  | # aeskeygenassist $0x36, %xmm0, %xmm1	# round 10 | 
|  | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36 | 
|  | call _key_expansion_128 | 
|  | .Ldec_key: | 
|  | sub $0x10, %rcx | 
|  | movaps (%rdi), %xmm0 | 
|  | movaps (%rcx), %xmm1 | 
|  | movaps %xmm0, 240(%rcx) | 
|  | movaps %xmm1, 240(%rdi) | 
|  | add $0x10, %rdi | 
|  | lea 240-16(%rcx), %rsi | 
|  | .align 4 | 
|  | .Ldec_key_loop: | 
|  | movaps (%rdi), %xmm0 | 
|  | # aesimc %xmm0, %xmm1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8 | 
|  | movaps %xmm1, (%rsi) | 
|  | add $0x10, %rdi | 
|  | sub $0x10, %rsi | 
|  | cmp %rcx, %rdi | 
|  | jb .Ldec_key_loop | 
|  | xor %rax, %rax | 
|  | ret | 
|  |  | 
|  | /* | 
|  | * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) | 
|  | */ | 
|  | ENTRY(aesni_enc) | 
|  | movl 480(KEYP), KLEN		# key length | 
|  | movups (INP), STATE		# input | 
|  | call _aesni_enc1 | 
|  | movups STATE, (OUTP)		# output | 
|  | ret | 
|  |  | 
|  | /* | 
|  | * _aesni_enc1:		internal ABI | 
|  | * input: | 
|  | *	KEYP:		key struct pointer | 
|  | *	KLEN:		round count | 
|  | *	STATE:		initial state (input) | 
|  | * output: | 
|  | *	STATE:		finial state (output) | 
|  | * changed: | 
|  | *	KEY | 
|  | *	TKEYP (T1) | 
|  | */ | 
|  | _aesni_enc1: | 
|  | movaps (KEYP), KEY		# key | 
|  | mov KEYP, TKEYP | 
|  | pxor KEY, STATE		# round 0 | 
|  | add $0x30, TKEYP | 
|  | cmp $24, KLEN | 
|  | jb .Lenc128 | 
|  | lea 0x20(TKEYP), TKEYP | 
|  | je .Lenc192 | 
|  | add $0x20, TKEYP | 
|  | movaps -0x60(TKEYP), KEY | 
|  | # aesenc KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | movaps -0x50(TKEYP), KEY | 
|  | # aesenc KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | .align 4 | 
|  | .Lenc192: | 
|  | movaps -0x40(TKEYP), KEY | 
|  | # aesenc KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | movaps -0x30(TKEYP), KEY | 
|  | # aesenc KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | .align 4 | 
|  | .Lenc128: | 
|  | movaps -0x20(TKEYP), KEY | 
|  | # aesenc KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | movaps -0x10(TKEYP), KEY | 
|  | # aesenc KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | movaps (TKEYP), KEY | 
|  | # aesenc KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | movaps 0x10(TKEYP), KEY | 
|  | # aesenc KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | movaps 0x20(TKEYP), KEY | 
|  | # aesenc KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | movaps 0x30(TKEYP), KEY | 
|  | # aesenc KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | movaps 0x40(TKEYP), KEY | 
|  | # aesenc KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | movaps 0x50(TKEYP), KEY | 
|  | # aesenc KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | movaps 0x60(TKEYP), KEY | 
|  | # aesenc KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | movaps 0x70(TKEYP), KEY | 
|  | # aesenclast KEY, STATE	# last round | 
|  | .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 | 
|  | ret | 
|  |  | 
|  | /* | 
|  | * _aesni_enc4:	internal ABI | 
|  | * input: | 
|  | *	KEYP:		key struct pointer | 
|  | *	KLEN:		round count | 
|  | *	STATE1:		initial state (input) | 
|  | *	STATE2 | 
|  | *	STATE3 | 
|  | *	STATE4 | 
|  | * output: | 
|  | *	STATE1:		finial state (output) | 
|  | *	STATE2 | 
|  | *	STATE3 | 
|  | *	STATE4 | 
|  | * changed: | 
|  | *	KEY | 
|  | *	TKEYP (T1) | 
|  | */ | 
|  | _aesni_enc4: | 
|  | movaps (KEYP), KEY		# key | 
|  | mov KEYP, TKEYP | 
|  | pxor KEY, STATE1		# round 0 | 
|  | pxor KEY, STATE2 | 
|  | pxor KEY, STATE3 | 
|  | pxor KEY, STATE4 | 
|  | add $0x30, TKEYP | 
|  | cmp $24, KLEN | 
|  | jb .L4enc128 | 
|  | lea 0x20(TKEYP), TKEYP | 
|  | je .L4enc192 | 
|  | add $0x20, TKEYP | 
|  | movaps -0x60(TKEYP), KEY | 
|  | # aesenc KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | # aesenc KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 
|  | # aesenc KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | 
|  | # aesenc KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | 
|  | movaps -0x50(TKEYP), KEY | 
|  | # aesenc KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | # aesenc KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 
|  | # aesenc KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | 
|  | # aesenc KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | 
|  | #.align 4 | 
|  | .L4enc192: | 
|  | movaps -0x40(TKEYP), KEY | 
|  | # aesenc KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | # aesenc KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 
|  | # aesenc KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | 
|  | # aesenc KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | 
|  | movaps -0x30(TKEYP), KEY | 
|  | # aesenc KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | # aesenc KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 
|  | # aesenc KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | 
|  | # aesenc KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | 
|  | #.align 4 | 
|  | .L4enc128: | 
|  | movaps -0x20(TKEYP), KEY | 
|  | # aesenc KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | # aesenc KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 
|  | # aesenc KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | 
|  | # aesenc KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | 
|  | movaps -0x10(TKEYP), KEY | 
|  | # aesenc KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | # aesenc KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 
|  | # aesenc KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | 
|  | # aesenc KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | 
|  | movaps (TKEYP), KEY | 
|  | # aesenc KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | # aesenc KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 
|  | # aesenc KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | 
|  | # aesenc KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | 
|  | movaps 0x10(TKEYP), KEY | 
|  | # aesenc KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | # aesenc KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 
|  | # aesenc KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | 
|  | # aesenc KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | 
|  | movaps 0x20(TKEYP), KEY | 
|  | # aesenc KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | # aesenc KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 
|  | # aesenc KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | 
|  | # aesenc KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | 
|  | movaps 0x30(TKEYP), KEY | 
|  | # aesenc KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | # aesenc KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 
|  | # aesenc KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | 
|  | # aesenc KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | 
|  | movaps 0x40(TKEYP), KEY | 
|  | # aesenc KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | # aesenc KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 
|  | # aesenc KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | 
|  | # aesenc KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | 
|  | movaps 0x50(TKEYP), KEY | 
|  | # aesenc KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | # aesenc KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 
|  | # aesenc KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | 
|  | # aesenc KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | 
|  | movaps 0x60(TKEYP), KEY | 
|  | # aesenc KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 
|  | # aesenc KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 
|  | # aesenc KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | 
|  | # aesenc KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | 
|  | movaps 0x70(TKEYP), KEY | 
|  | # aesenclast KEY, STATE1	# last round | 
|  | .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 | 
|  | # aesenclast KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2 | 
|  | # aesenclast KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdd, 0xea | 
|  | # aesenclast KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2 | 
|  | ret | 
|  |  | 
|  | /* | 
|  | * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) | 
|  | */ | 
|  | ENTRY(aesni_dec) | 
|  | mov 480(KEYP), KLEN		# key length | 
|  | add $240, KEYP | 
|  | movups (INP), STATE		# input | 
|  | call _aesni_dec1 | 
|  | movups STATE, (OUTP)		#output | 
|  | ret | 
|  |  | 
|  | /* | 
|  | * _aesni_dec1:		internal ABI | 
|  | * input: | 
|  | *	KEYP:		key struct pointer | 
|  | *	KLEN:		key length | 
|  | *	STATE:		initial state (input) | 
|  | * output: | 
|  | *	STATE:		finial state (output) | 
|  | * changed: | 
|  | *	KEY | 
|  | *	TKEYP (T1) | 
|  | */ | 
|  | _aesni_dec1: | 
|  | movaps (KEYP), KEY		# key | 
|  | mov KEYP, TKEYP | 
|  | pxor KEY, STATE		# round 0 | 
|  | add $0x30, TKEYP | 
|  | cmp $24, KLEN | 
|  | jb .Ldec128 | 
|  | lea 0x20(TKEYP), TKEYP | 
|  | je .Ldec192 | 
|  | add $0x20, TKEYP | 
|  | movaps -0x60(TKEYP), KEY | 
|  | # aesdec KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | movaps -0x50(TKEYP), KEY | 
|  | # aesdec KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | .align 4 | 
|  | .Ldec192: | 
|  | movaps -0x40(TKEYP), KEY | 
|  | # aesdec KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | movaps -0x30(TKEYP), KEY | 
|  | # aesdec KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | .align 4 | 
|  | .Ldec128: | 
|  | movaps -0x20(TKEYP), KEY | 
|  | # aesdec KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | movaps -0x10(TKEYP), KEY | 
|  | # aesdec KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | movaps (TKEYP), KEY | 
|  | # aesdec KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | movaps 0x10(TKEYP), KEY | 
|  | # aesdec KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | movaps 0x20(TKEYP), KEY | 
|  | # aesdec KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | movaps 0x30(TKEYP), KEY | 
|  | # aesdec KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | movaps 0x40(TKEYP), KEY | 
|  | # aesdec KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | movaps 0x50(TKEYP), KEY | 
|  | # aesdec KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | movaps 0x60(TKEYP), KEY | 
|  | # aesdec KEY, STATE | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | movaps 0x70(TKEYP), KEY | 
|  | # aesdeclast KEY, STATE		# last round | 
|  | .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 | 
|  | ret | 
|  |  | 
|  | /* | 
|  | * _aesni_dec4:	internal ABI | 
|  | * input: | 
|  | *	KEYP:		key struct pointer | 
|  | *	KLEN:		key length | 
|  | *	STATE1:		initial state (input) | 
|  | *	STATE2 | 
|  | *	STATE3 | 
|  | *	STATE4 | 
|  | * output: | 
|  | *	STATE1:		finial state (output) | 
|  | *	STATE2 | 
|  | *	STATE3 | 
|  | *	STATE4 | 
|  | * changed: | 
|  | *	KEY | 
|  | *	TKEYP (T1) | 
|  | */ | 
|  | _aesni_dec4: | 
|  | movaps (KEYP), KEY		# key | 
|  | mov KEYP, TKEYP | 
|  | pxor KEY, STATE1		# round 0 | 
|  | pxor KEY, STATE2 | 
|  | pxor KEY, STATE3 | 
|  | pxor KEY, STATE4 | 
|  | add $0x30, TKEYP | 
|  | cmp $24, KLEN | 
|  | jb .L4dec128 | 
|  | lea 0x20(TKEYP), TKEYP | 
|  | je .L4dec192 | 
|  | add $0x20, TKEYP | 
|  | movaps -0x60(TKEYP), KEY | 
|  | # aesdec KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | # aesdec KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 
|  | # aesdec KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | 
|  | # aesdec KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | 
|  | movaps -0x50(TKEYP), KEY | 
|  | # aesdec KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | # aesdec KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 
|  | # aesdec KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | 
|  | # aesdec KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | 
|  | .align 4 | 
|  | .L4dec192: | 
|  | movaps -0x40(TKEYP), KEY | 
|  | # aesdec KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | # aesdec KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 
|  | # aesdec KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | 
|  | # aesdec KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | 
|  | movaps -0x30(TKEYP), KEY | 
|  | # aesdec KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | # aesdec KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 
|  | # aesdec KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | 
|  | # aesdec KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | 
|  | .align 4 | 
|  | .L4dec128: | 
|  | movaps -0x20(TKEYP), KEY | 
|  | # aesdec KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | # aesdec KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 
|  | # aesdec KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | 
|  | # aesdec KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | 
|  | movaps -0x10(TKEYP), KEY | 
|  | # aesdec KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | # aesdec KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 
|  | # aesdec KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | 
|  | # aesdec KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | 
|  | movaps (TKEYP), KEY | 
|  | # aesdec KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | # aesdec KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 
|  | # aesdec KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | 
|  | # aesdec KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | 
|  | movaps 0x10(TKEYP), KEY | 
|  | # aesdec KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | # aesdec KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 
|  | # aesdec KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | 
|  | # aesdec KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | 
|  | movaps 0x20(TKEYP), KEY | 
|  | # aesdec KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | # aesdec KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 
|  | # aesdec KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | 
|  | # aesdec KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | 
|  | movaps 0x30(TKEYP), KEY | 
|  | # aesdec KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | # aesdec KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 
|  | # aesdec KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | 
|  | # aesdec KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | 
|  | movaps 0x40(TKEYP), KEY | 
|  | # aesdec KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | # aesdec KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 
|  | # aesdec KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | 
|  | # aesdec KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | 
|  | movaps 0x50(TKEYP), KEY | 
|  | # aesdec KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | # aesdec KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 
|  | # aesdec KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | 
|  | # aesdec KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | 
|  | movaps 0x60(TKEYP), KEY | 
|  | # aesdec KEY, STATE1 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 
|  | # aesdec KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 
|  | # aesdec KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | 
|  | # aesdec KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | 
|  | movaps 0x70(TKEYP), KEY | 
|  | # aesdeclast KEY, STATE1	# last round | 
|  | .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 | 
|  | # aesdeclast KEY, STATE2 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2 | 
|  | # aesdeclast KEY, STATE3 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdf, 0xea | 
|  | # aesdeclast KEY, STATE4 | 
|  | .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2 | 
|  | ret | 
|  |  | 
|  | /* | 
|  | * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | 
|  | *		      size_t len) | 
|  | */ | 
|  | ENTRY(aesni_ecb_enc) | 
|  | test LEN, LEN		# check length | 
|  | jz .Lecb_enc_ret | 
|  | mov 480(KEYP), KLEN | 
|  | cmp $16, LEN | 
|  | jb .Lecb_enc_ret | 
|  | cmp $64, LEN | 
|  | jb .Lecb_enc_loop1 | 
|  | .align 4 | 
|  | .Lecb_enc_loop4: | 
|  | movups (INP), STATE1 | 
|  | movups 0x10(INP), STATE2 | 
|  | movups 0x20(INP), STATE3 | 
|  | movups 0x30(INP), STATE4 | 
|  | call _aesni_enc4 | 
|  | movups STATE1, (OUTP) | 
|  | movups STATE2, 0x10(OUTP) | 
|  | movups STATE3, 0x20(OUTP) | 
|  | movups STATE4, 0x30(OUTP) | 
|  | sub $64, LEN | 
|  | add $64, INP | 
|  | add $64, OUTP | 
|  | cmp $64, LEN | 
|  | jge .Lecb_enc_loop4 | 
|  | cmp $16, LEN | 
|  | jb .Lecb_enc_ret | 
|  | .align 4 | 
|  | .Lecb_enc_loop1: | 
|  | movups (INP), STATE1 | 
|  | call _aesni_enc1 | 
|  | movups STATE1, (OUTP) | 
|  | sub $16, LEN | 
|  | add $16, INP | 
|  | add $16, OUTP | 
|  | cmp $16, LEN | 
|  | jge .Lecb_enc_loop1 | 
|  | .Lecb_enc_ret: | 
|  | ret | 
|  |  | 
|  | /* | 
|  | * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | 
|  | *		      size_t len); | 
|  | */ | 
|  | ENTRY(aesni_ecb_dec) | 
|  | test LEN, LEN | 
|  | jz .Lecb_dec_ret | 
|  | mov 480(KEYP), KLEN | 
|  | add $240, KEYP | 
|  | cmp $16, LEN | 
|  | jb .Lecb_dec_ret | 
|  | cmp $64, LEN | 
|  | jb .Lecb_dec_loop1 | 
|  | .align 4 | 
|  | .Lecb_dec_loop4: | 
|  | movups (INP), STATE1 | 
|  | movups 0x10(INP), STATE2 | 
|  | movups 0x20(INP), STATE3 | 
|  | movups 0x30(INP), STATE4 | 
|  | call _aesni_dec4 | 
|  | movups STATE1, (OUTP) | 
|  | movups STATE2, 0x10(OUTP) | 
|  | movups STATE3, 0x20(OUTP) | 
|  | movups STATE4, 0x30(OUTP) | 
|  | sub $64, LEN | 
|  | add $64, INP | 
|  | add $64, OUTP | 
|  | cmp $64, LEN | 
|  | jge .Lecb_dec_loop4 | 
|  | cmp $16, LEN | 
|  | jb .Lecb_dec_ret | 
|  | .align 4 | 
|  | .Lecb_dec_loop1: | 
|  | movups (INP), STATE1 | 
|  | call _aesni_dec1 | 
|  | movups STATE1, (OUTP) | 
|  | sub $16, LEN | 
|  | add $16, INP | 
|  | add $16, OUTP | 
|  | cmp $16, LEN | 
|  | jge .Lecb_dec_loop1 | 
|  | .Lecb_dec_ret: | 
|  | ret | 
|  |  | 
|  | /* | 
|  | * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | 
|  | *		      size_t len, u8 *iv) | 
|  | */ | 
|  | ENTRY(aesni_cbc_enc) | 
|  | cmp $16, LEN | 
|  | jb .Lcbc_enc_ret | 
|  | mov 480(KEYP), KLEN | 
|  | movups (IVP), STATE	# load iv as initial state | 
|  | .align 4 | 
|  | .Lcbc_enc_loop: | 
|  | movups (INP), IN	# load input | 
|  | pxor IN, STATE | 
|  | call _aesni_enc1 | 
|  | movups STATE, (OUTP)	# store output | 
|  | sub $16, LEN | 
|  | add $16, INP | 
|  | add $16, OUTP | 
|  | cmp $16, LEN | 
|  | jge .Lcbc_enc_loop | 
|  | movups STATE, (IVP) | 
|  | .Lcbc_enc_ret: | 
|  | ret | 
|  |  | 
|  | /* | 
|  | * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | 
|  | *		      size_t len, u8 *iv) | 
|  | */ | 
|  | ENTRY(aesni_cbc_dec) | 
|  | cmp $16, LEN | 
|  | jb .Lcbc_dec_ret | 
|  | mov 480(KEYP), KLEN | 
|  | add $240, KEYP | 
|  | movups (IVP), IV | 
|  | cmp $64, LEN | 
|  | jb .Lcbc_dec_loop1 | 
|  | .align 4 | 
|  | .Lcbc_dec_loop4: | 
|  | movups (INP), IN1 | 
|  | movaps IN1, STATE1 | 
|  | movups 0x10(INP), IN2 | 
|  | movaps IN2, STATE2 | 
|  | movups 0x20(INP), IN3 | 
|  | movaps IN3, STATE3 | 
|  | movups 0x30(INP), IN4 | 
|  | movaps IN4, STATE4 | 
|  | call _aesni_dec4 | 
|  | pxor IV, STATE1 | 
|  | pxor IN1, STATE2 | 
|  | pxor IN2, STATE3 | 
|  | pxor IN3, STATE4 | 
|  | movaps IN4, IV | 
|  | movups STATE1, (OUTP) | 
|  | movups STATE2, 0x10(OUTP) | 
|  | movups STATE3, 0x20(OUTP) | 
|  | movups STATE4, 0x30(OUTP) | 
|  | sub $64, LEN | 
|  | add $64, INP | 
|  | add $64, OUTP | 
|  | cmp $64, LEN | 
|  | jge .Lcbc_dec_loop4 | 
|  | cmp $16, LEN | 
|  | jb .Lcbc_dec_ret | 
|  | .align 4 | 
|  | .Lcbc_dec_loop1: | 
|  | movups (INP), IN | 
|  | movaps IN, STATE | 
|  | call _aesni_dec1 | 
|  | pxor IV, STATE | 
|  | movups STATE, (OUTP) | 
|  | movaps IN, IV | 
|  | sub $16, LEN | 
|  | add $16, INP | 
|  | add $16, OUTP | 
|  | cmp $16, LEN | 
|  | jge .Lcbc_dec_loop1 | 
|  | movups IV, (IVP) | 
|  | .Lcbc_dec_ret: | 
|  | ret |