| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * Cryptographic API. | 
|  | 3 | * | 
|  | 4 | * Support for VIA PadLock hardware crypto engine. | 
|  | 5 | * | 
|  | 6 | * Copyright (c) 2004  Michal Ludvig <michal@logix.cz> | 
|  | 7 | * | 
|  | 8 | * Key expansion routine taken from crypto/aes.c | 
|  | 9 | * | 
|  | 10 | * This program is free software; you can redistribute it and/or modify | 
|  | 11 | * it under the terms of the GNU General Public License as published by | 
|  | 12 | * the Free Software Foundation; either version 2 of the License, or | 
|  | 13 | * (at your option) any later version. | 
|  | 14 | * | 
|  | 15 | * --------------------------------------------------------------------------- | 
|  | 16 | * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK. | 
|  | 17 | * All rights reserved. | 
|  | 18 | * | 
|  | 19 | * LICENSE TERMS | 
|  | 20 | * | 
|  | 21 | * The free distribution and use of this software in both source and binary | 
|  | 22 | * form is allowed (with or without changes) provided that: | 
|  | 23 | * | 
|  | 24 | *   1. distributions of this source code include the above copyright | 
|  | 25 | *      notice, this list of conditions and the following disclaimer; | 
|  | 26 | * | 
|  | 27 | *   2. distributions in binary form include the above copyright | 
|  | 28 | *      notice, this list of conditions and the following disclaimer | 
|  | 29 | *      in the documentation and/or other associated materials; | 
|  | 30 | * | 
|  | 31 | *   3. the copyright holder's name is not used to endorse products | 
|  | 32 | *      built using this software without specific written permission. | 
|  | 33 | * | 
|  | 34 | * ALTERNATIVELY, provided that this notice is retained in full, this product | 
|  | 35 | * may be distributed under the terms of the GNU General Public License (GPL), | 
|  | 36 | * in which case the provisions of the GPL apply INSTEAD OF those given above. | 
|  | 37 | * | 
|  | 38 | * DISCLAIMER | 
|  | 39 | * | 
|  | 40 | * This software is provided 'as is' with no explicit or implied warranties | 
|  | 41 | * in respect of its properties, including, but not limited to, correctness | 
|  | 42 | * and/or fitness for purpose. | 
|  | 43 | * --------------------------------------------------------------------------- | 
|  | 44 | */ | 
|  | 45 |  | 
|  | 46 | #include <linux/module.h> | 
|  | 47 | #include <linux/init.h> | 
|  | 48 | #include <linux/types.h> | 
|  | 49 | #include <linux/errno.h> | 
|  | 50 | #include <linux/crypto.h> | 
|  | 51 | #include <linux/interrupt.h> | 
| Herbert Xu | 6789b2d | 2005-07-06 13:52:27 -0700 | [diff] [blame] | 52 | #include <linux/kernel.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 53 | #include <asm/byteorder.h> | 
|  | 54 | #include "padlock.h" | 
|  | 55 |  | 
|  | 56 | #define AES_MIN_KEY_SIZE	16	/* in uint8_t units */ | 
|  | 57 | #define AES_MAX_KEY_SIZE	32	/* ditto */ | 
|  | 58 | #define AES_BLOCK_SIZE		16	/* ditto */ | 
|  | 59 | #define AES_EXTENDED_KEY_SIZE	64	/* in uint32_t units */ | 
|  | 60 | #define AES_EXTENDED_KEY_SIZE_B	(AES_EXTENDED_KEY_SIZE * sizeof(uint32_t)) | 
|  | 61 |  | 
| Michal Ludvig | cc08632 | 2006-07-15 11:08:50 +1000 | [diff] [blame] | 62 | /* Whenever making any changes to the following | 
|  | 63 | * structure *make sure* you keep E, d_data | 
|  | 64 | * and cword aligned on 16 Bytes boundaries!!! */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 65 | struct aes_ctx { | 
| Herbert Xu | 6789b2d | 2005-07-06 13:52:27 -0700 | [diff] [blame] | 66 | struct { | 
|  | 67 | struct cword encrypt; | 
|  | 68 | struct cword decrypt; | 
|  | 69 | } cword; | 
| Herbert Xu | 82062c7 | 2006-05-16 22:20:34 +1000 | [diff] [blame] | 70 | u32 *D; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 71 | int key_length; | 
| Michal Ludvig | cc08632 | 2006-07-15 11:08:50 +1000 | [diff] [blame] | 72 | u32 E[AES_EXTENDED_KEY_SIZE] | 
|  | 73 | __attribute__ ((__aligned__(PADLOCK_ALIGNMENT))); | 
|  | 74 | u32 d_data[AES_EXTENDED_KEY_SIZE] | 
|  | 75 | __attribute__ ((__aligned__(PADLOCK_ALIGNMENT))); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 76 | }; | 
|  | 77 |  | 
|  | 78 | /* ====== Key management routines ====== */ | 
|  | 79 |  | 
|  | 80 | static inline uint32_t | 
|  | 81 | generic_rotr32 (const uint32_t x, const unsigned bits) | 
|  | 82 | { | 
|  | 83 | const unsigned n = bits % 32; | 
|  | 84 | return (x >> n) | (x << (32 - n)); | 
|  | 85 | } | 
|  | 86 |  | 
|  | 87 | static inline uint32_t | 
|  | 88 | generic_rotl32 (const uint32_t x, const unsigned bits) | 
|  | 89 | { | 
|  | 90 | const unsigned n = bits % 32; | 
|  | 91 | return (x << n) | (x >> (32 - n)); | 
|  | 92 | } | 
|  | 93 |  | 
|  | 94 | #define rotl generic_rotl32 | 
|  | 95 | #define rotr generic_rotr32 | 
|  | 96 |  | 
|  | 97 | /* | 
|  | 98 | * #define byte(x, nr) ((unsigned char)((x) >> (nr*8))) | 
|  | 99 | */ | 
|  | 100 | static inline uint8_t | 
|  | 101 | byte(const uint32_t x, const unsigned n) | 
|  | 102 | { | 
|  | 103 | return x >> (n << 3); | 
|  | 104 | } | 
|  | 105 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 106 | #define E_KEY ctx->E | 
|  | 107 | #define D_KEY ctx->D | 
|  | 108 |  | 
|  | 109 | static uint8_t pow_tab[256]; | 
|  | 110 | static uint8_t log_tab[256]; | 
|  | 111 | static uint8_t sbx_tab[256]; | 
|  | 112 | static uint8_t isb_tab[256]; | 
|  | 113 | static uint32_t rco_tab[10]; | 
|  | 114 | static uint32_t ft_tab[4][256]; | 
|  | 115 | static uint32_t it_tab[4][256]; | 
|  | 116 |  | 
|  | 117 | static uint32_t fl_tab[4][256]; | 
|  | 118 | static uint32_t il_tab[4][256]; | 
|  | 119 |  | 
|  | 120 | static inline uint8_t | 
|  | 121 | f_mult (uint8_t a, uint8_t b) | 
|  | 122 | { | 
|  | 123 | uint8_t aa = log_tab[a], cc = aa + log_tab[b]; | 
|  | 124 |  | 
|  | 125 | return pow_tab[cc + (cc < aa ? 1 : 0)]; | 
|  | 126 | } | 
|  | 127 |  | 
|  | 128 | #define ff_mult(a,b)    (a && b ? f_mult(a, b) : 0) | 
|  | 129 |  | 
|  | 130 | #define f_rn(bo, bi, n, k)					\ | 
|  | 131 | bo[n] =  ft_tab[0][byte(bi[n],0)] ^				\ | 
|  | 132 | ft_tab[1][byte(bi[(n + 1) & 3],1)] ^		\ | 
|  | 133 | ft_tab[2][byte(bi[(n + 2) & 3],2)] ^		\ | 
|  | 134 | ft_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n) | 
|  | 135 |  | 
|  | 136 | #define i_rn(bo, bi, n, k)					\ | 
|  | 137 | bo[n] =  it_tab[0][byte(bi[n],0)] ^				\ | 
|  | 138 | it_tab[1][byte(bi[(n + 3) & 3],1)] ^		\ | 
|  | 139 | it_tab[2][byte(bi[(n + 2) & 3],2)] ^		\ | 
|  | 140 | it_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) | 
|  | 141 |  | 
|  | 142 | #define ls_box(x)				\ | 
|  | 143 | ( fl_tab[0][byte(x, 0)] ^			\ | 
|  | 144 | fl_tab[1][byte(x, 1)] ^			\ | 
|  | 145 | fl_tab[2][byte(x, 2)] ^			\ | 
|  | 146 | fl_tab[3][byte(x, 3)] ) | 
|  | 147 |  | 
|  | 148 | #define f_rl(bo, bi, n, k)					\ | 
|  | 149 | bo[n] =  fl_tab[0][byte(bi[n],0)] ^				\ | 
|  | 150 | fl_tab[1][byte(bi[(n + 1) & 3],1)] ^		\ | 
|  | 151 | fl_tab[2][byte(bi[(n + 2) & 3],2)] ^		\ | 
|  | 152 | fl_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n) | 
|  | 153 |  | 
|  | 154 | #define i_rl(bo, bi, n, k)					\ | 
|  | 155 | bo[n] =  il_tab[0][byte(bi[n],0)] ^				\ | 
|  | 156 | il_tab[1][byte(bi[(n + 3) & 3],1)] ^		\ | 
|  | 157 | il_tab[2][byte(bi[(n + 2) & 3],2)] ^		\ | 
|  | 158 | il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) | 
|  | 159 |  | 
|  | 160 | static void | 
|  | 161 | gen_tabs (void) | 
|  | 162 | { | 
|  | 163 | uint32_t i, t; | 
|  | 164 | uint8_t p, q; | 
|  | 165 |  | 
|  | 166 | /* log and power tables for GF(2**8) finite field with | 
|  | 167 | 0x011b as modular polynomial - the simplest prmitive | 
|  | 168 | root is 0x03, used here to generate the tables */ | 
|  | 169 |  | 
|  | 170 | for (i = 0, p = 1; i < 256; ++i) { | 
|  | 171 | pow_tab[i] = (uint8_t) p; | 
|  | 172 | log_tab[p] = (uint8_t) i; | 
|  | 173 |  | 
|  | 174 | p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0); | 
|  | 175 | } | 
|  | 176 |  | 
|  | 177 | log_tab[1] = 0; | 
|  | 178 |  | 
|  | 179 | for (i = 0, p = 1; i < 10; ++i) { | 
|  | 180 | rco_tab[i] = p; | 
|  | 181 |  | 
|  | 182 | p = (p << 1) ^ (p & 0x80 ? 0x01b : 0); | 
|  | 183 | } | 
|  | 184 |  | 
|  | 185 | for (i = 0; i < 256; ++i) { | 
|  | 186 | p = (i ? pow_tab[255 - log_tab[i]] : 0); | 
|  | 187 | q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2)); | 
|  | 188 | p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2)); | 
|  | 189 | sbx_tab[i] = p; | 
|  | 190 | isb_tab[p] = (uint8_t) i; | 
|  | 191 | } | 
|  | 192 |  | 
|  | 193 | for (i = 0; i < 256; ++i) { | 
|  | 194 | p = sbx_tab[i]; | 
|  | 195 |  | 
|  | 196 | t = p; | 
|  | 197 | fl_tab[0][i] = t; | 
|  | 198 | fl_tab[1][i] = rotl (t, 8); | 
|  | 199 | fl_tab[2][i] = rotl (t, 16); | 
|  | 200 | fl_tab[3][i] = rotl (t, 24); | 
|  | 201 |  | 
|  | 202 | t = ((uint32_t) ff_mult (2, p)) | | 
|  | 203 | ((uint32_t) p << 8) | | 
|  | 204 | ((uint32_t) p << 16) | ((uint32_t) ff_mult (3, p) << 24); | 
|  | 205 |  | 
|  | 206 | ft_tab[0][i] = t; | 
|  | 207 | ft_tab[1][i] = rotl (t, 8); | 
|  | 208 | ft_tab[2][i] = rotl (t, 16); | 
|  | 209 | ft_tab[3][i] = rotl (t, 24); | 
|  | 210 |  | 
|  | 211 | p = isb_tab[i]; | 
|  | 212 |  | 
|  | 213 | t = p; | 
|  | 214 | il_tab[0][i] = t; | 
|  | 215 | il_tab[1][i] = rotl (t, 8); | 
|  | 216 | il_tab[2][i] = rotl (t, 16); | 
|  | 217 | il_tab[3][i] = rotl (t, 24); | 
|  | 218 |  | 
|  | 219 | t = ((uint32_t) ff_mult (14, p)) | | 
|  | 220 | ((uint32_t) ff_mult (9, p) << 8) | | 
|  | 221 | ((uint32_t) ff_mult (13, p) << 16) | | 
|  | 222 | ((uint32_t) ff_mult (11, p) << 24); | 
|  | 223 |  | 
|  | 224 | it_tab[0][i] = t; | 
|  | 225 | it_tab[1][i] = rotl (t, 8); | 
|  | 226 | it_tab[2][i] = rotl (t, 16); | 
|  | 227 | it_tab[3][i] = rotl (t, 24); | 
|  | 228 | } | 
|  | 229 | } | 
|  | 230 |  | 
|  | 231 | #define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b) | 
|  | 232 |  | 
|  | 233 | #define imix_col(y,x)       \ | 
|  | 234 | u   = star_x(x);        \ | 
|  | 235 | v   = star_x(u);        \ | 
|  | 236 | w   = star_x(v);        \ | 
|  | 237 | t   = w ^ (x);          \ | 
|  | 238 | (y)  = u ^ v ^ w;        \ | 
|  | 239 | (y) ^= rotr(u ^ t,  8) ^ \ | 
|  | 240 | rotr(v ^ t, 16) ^ \ | 
|  | 241 | rotr(t,24) | 
|  | 242 |  | 
|  | 243 | /* initialise the key schedule from the user supplied key */ | 
|  | 244 |  | 
|  | 245 | #define loop4(i)                                    \ | 
|  | 246 | {   t = rotr(t,  8); t = ls_box(t) ^ rco_tab[i];    \ | 
|  | 247 | t ^= E_KEY[4 * i];     E_KEY[4 * i + 4] = t;    \ | 
|  | 248 | t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t;    \ | 
|  | 249 | t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t;    \ | 
|  | 250 | t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t;    \ | 
|  | 251 | } | 
|  | 252 |  | 
|  | 253 | #define loop6(i)                                    \ | 
|  | 254 | {   t = rotr(t,  8); t = ls_box(t) ^ rco_tab[i];    \ | 
|  | 255 | t ^= E_KEY[6 * i];     E_KEY[6 * i + 6] = t;    \ | 
|  | 256 | t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t;    \ | 
|  | 257 | t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t;    \ | 
|  | 258 | t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t;    \ | 
|  | 259 | t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t;   \ | 
|  | 260 | t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t;   \ | 
|  | 261 | } | 
|  | 262 |  | 
|  | 263 | #define loop8(i)                                    \ | 
|  | 264 | {   t = rotr(t,  8); ; t = ls_box(t) ^ rco_tab[i];  \ | 
|  | 265 | t ^= E_KEY[8 * i];     E_KEY[8 * i + 8] = t;    \ | 
|  | 266 | t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t;    \ | 
|  | 267 | t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t;   \ | 
|  | 268 | t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t;   \ | 
|  | 269 | t  = E_KEY[8 * i + 4] ^ ls_box(t);    \ | 
|  | 270 | E_KEY[8 * i + 12] = t;                \ | 
|  | 271 | t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t;   \ | 
|  | 272 | t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t;   \ | 
|  | 273 | t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t;   \ | 
|  | 274 | } | 
|  | 275 |  | 
|  | 276 | /* Tells whether the ACE is capable to generate | 
|  | 277 | the extended key for a given key_len. */ | 
|  | 278 | static inline int | 
|  | 279 | aes_hw_extkey_available(uint8_t key_len) | 
|  | 280 | { | 
|  | 281 | /* TODO: We should check the actual CPU model/stepping | 
|  | 282 | as it's possible that the capability will be | 
|  | 283 | added in the next CPU revisions. */ | 
|  | 284 | if (key_len == 16) | 
|  | 285 | return 1; | 
|  | 286 | return 0; | 
|  | 287 | } | 
|  | 288 |  | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 289 | static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm) | 
| Herbert Xu | 6789b2d | 2005-07-06 13:52:27 -0700 | [diff] [blame] | 290 | { | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 291 | unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm); | 
| Herbert Xu | f10b789 | 2006-01-25 22:34:01 +1100 | [diff] [blame] | 292 | unsigned long align = PADLOCK_ALIGNMENT; | 
|  | 293 |  | 
|  | 294 | if (align <= crypto_tfm_ctx_alignment()) | 
|  | 295 | align = 1; | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 296 | return (struct aes_ctx *)ALIGN(addr, align); | 
| Herbert Xu | 6789b2d | 2005-07-06 13:52:27 -0700 | [diff] [blame] | 297 | } | 
|  | 298 |  | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 299 | static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, | 
|  | 300 | unsigned int key_len, u32 *flags) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 301 | { | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 302 | struct aes_ctx *ctx = aes_ctx(tfm); | 
| Herbert Xu | 06ace7a | 2005-10-30 21:25:15 +1100 | [diff] [blame] | 303 | const __le32 *key = (const __le32 *)in_key; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 304 | uint32_t i, t, u, v, w; | 
|  | 305 | uint32_t P[AES_EXTENDED_KEY_SIZE]; | 
|  | 306 | uint32_t rounds; | 
|  | 307 |  | 
|  | 308 | if (key_len != 16 && key_len != 24 && key_len != 32) { | 
|  | 309 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | 
|  | 310 | return -EINVAL; | 
|  | 311 | } | 
|  | 312 |  | 
|  | 313 | ctx->key_length = key_len; | 
|  | 314 |  | 
| Herbert Xu | 6789b2d | 2005-07-06 13:52:27 -0700 | [diff] [blame] | 315 | /* | 
|  | 316 | * If the hardware is capable of generating the extended key | 
|  | 317 | * itself we must supply the plain key for both encryption | 
|  | 318 | * and decryption. | 
|  | 319 | */ | 
| Herbert Xu | 82062c7 | 2006-05-16 22:20:34 +1000 | [diff] [blame] | 320 | ctx->D = ctx->E; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 321 |  | 
| Herbert Xu | 06ace7a | 2005-10-30 21:25:15 +1100 | [diff] [blame] | 322 | E_KEY[0] = le32_to_cpu(key[0]); | 
|  | 323 | E_KEY[1] = le32_to_cpu(key[1]); | 
|  | 324 | E_KEY[2] = le32_to_cpu(key[2]); | 
|  | 325 | E_KEY[3] = le32_to_cpu(key[3]); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 326 |  | 
| Herbert Xu | 6789b2d | 2005-07-06 13:52:27 -0700 | [diff] [blame] | 327 | /* Prepare control words. */ | 
|  | 328 | memset(&ctx->cword, 0, sizeof(ctx->cword)); | 
|  | 329 |  | 
|  | 330 | ctx->cword.decrypt.encdec = 1; | 
|  | 331 | ctx->cword.encrypt.rounds = 10 + (key_len - 16) / 4; | 
|  | 332 | ctx->cword.decrypt.rounds = ctx->cword.encrypt.rounds; | 
|  | 333 | ctx->cword.encrypt.ksize = (key_len - 16) / 8; | 
|  | 334 | ctx->cword.decrypt.ksize = ctx->cword.encrypt.ksize; | 
|  | 335 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 336 | /* Don't generate extended keys if the hardware can do it. */ | 
|  | 337 | if (aes_hw_extkey_available(key_len)) | 
|  | 338 | return 0; | 
|  | 339 |  | 
| Herbert Xu | 6789b2d | 2005-07-06 13:52:27 -0700 | [diff] [blame] | 340 | ctx->D = ctx->d_data; | 
|  | 341 | ctx->cword.encrypt.keygen = 1; | 
|  | 342 | ctx->cword.decrypt.keygen = 1; | 
|  | 343 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 344 | switch (key_len) { | 
|  | 345 | case 16: | 
|  | 346 | t = E_KEY[3]; | 
|  | 347 | for (i = 0; i < 10; ++i) | 
|  | 348 | loop4 (i); | 
|  | 349 | break; | 
|  | 350 |  | 
|  | 351 | case 24: | 
| Herbert Xu | 06ace7a | 2005-10-30 21:25:15 +1100 | [diff] [blame] | 352 | E_KEY[4] = le32_to_cpu(key[4]); | 
|  | 353 | t = E_KEY[5] = le32_to_cpu(key[5]); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 354 | for (i = 0; i < 8; ++i) | 
|  | 355 | loop6 (i); | 
|  | 356 | break; | 
|  | 357 |  | 
|  | 358 | case 32: | 
| Herbert Xu | 102d60a | 2006-02-22 23:43:40 +1100 | [diff] [blame] | 359 | E_KEY[4] = le32_to_cpu(key[4]); | 
|  | 360 | E_KEY[5] = le32_to_cpu(key[5]); | 
|  | 361 | E_KEY[6] = le32_to_cpu(key[6]); | 
|  | 362 | t = E_KEY[7] = le32_to_cpu(key[7]); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 363 | for (i = 0; i < 7; ++i) | 
|  | 364 | loop8 (i); | 
|  | 365 | break; | 
|  | 366 | } | 
|  | 367 |  | 
|  | 368 | D_KEY[0] = E_KEY[0]; | 
|  | 369 | D_KEY[1] = E_KEY[1]; | 
|  | 370 | D_KEY[2] = E_KEY[2]; | 
|  | 371 | D_KEY[3] = E_KEY[3]; | 
|  | 372 |  | 
|  | 373 | for (i = 4; i < key_len + 24; ++i) { | 
|  | 374 | imix_col (D_KEY[i], E_KEY[i]); | 
|  | 375 | } | 
|  | 376 |  | 
|  | 377 | /* PadLock needs a different format of the decryption key. */ | 
|  | 378 | rounds = 10 + (key_len - 16) / 4; | 
|  | 379 |  | 
|  | 380 | for (i = 0; i < rounds; i++) { | 
|  | 381 | P[((i + 1) * 4) + 0] = D_KEY[((rounds - i - 1) * 4) + 0]; | 
|  | 382 | P[((i + 1) * 4) + 1] = D_KEY[((rounds - i - 1) * 4) + 1]; | 
|  | 383 | P[((i + 1) * 4) + 2] = D_KEY[((rounds - i - 1) * 4) + 2]; | 
|  | 384 | P[((i + 1) * 4) + 3] = D_KEY[((rounds - i - 1) * 4) + 3]; | 
|  | 385 | } | 
|  | 386 |  | 
|  | 387 | P[0] = E_KEY[(rounds * 4) + 0]; | 
|  | 388 | P[1] = E_KEY[(rounds * 4) + 1]; | 
|  | 389 | P[2] = E_KEY[(rounds * 4) + 2]; | 
|  | 390 | P[3] = E_KEY[(rounds * 4) + 3]; | 
|  | 391 |  | 
|  | 392 | memcpy(D_KEY, P, AES_EXTENDED_KEY_SIZE_B); | 
|  | 393 |  | 
|  | 394 | return 0; | 
|  | 395 | } | 
|  | 396 |  | 
|  | 397 | /* ====== Encryption/decryption routines ====== */ | 
|  | 398 |  | 
| Herbert Xu | 28e8c3a | 2005-07-06 13:52:43 -0700 | [diff] [blame] | 399 | /* These are the real call to PadLock. */ | 
| Herbert Xu | 6789b2d | 2005-07-06 13:52:27 -0700 | [diff] [blame] | 400 | static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, | 
|  | 401 | void *control_word, u32 count) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 402 | { | 
|  | 403 | asm volatile ("pushfl; popfl");		/* enforce key reload. */ | 
|  | 404 | asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */ | 
|  | 405 | : "+S"(input), "+D"(output) | 
|  | 406 | : "d"(control_word), "b"(key), "c"(count)); | 
|  | 407 | } | 
|  | 408 |  | 
| Herbert Xu | 476df25 | 2005-07-06 13:54:09 -0700 | [diff] [blame] | 409 | static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, | 
|  | 410 | u8 *iv, void *control_word, u32 count) | 
| Herbert Xu | 28e8c3a | 2005-07-06 13:52:43 -0700 | [diff] [blame] | 411 | { | 
|  | 412 | /* Enforce key reload. */ | 
|  | 413 | asm volatile ("pushfl; popfl"); | 
|  | 414 | /* rep xcryptcbc */ | 
|  | 415 | asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" | 
|  | 416 | : "+S" (input), "+D" (output), "+a" (iv) | 
|  | 417 | : "d" (control_word), "b" (key), "c" (count)); | 
| Herbert Xu | 476df25 | 2005-07-06 13:54:09 -0700 | [diff] [blame] | 418 | return iv; | 
| Herbert Xu | 28e8c3a | 2005-07-06 13:52:43 -0700 | [diff] [blame] | 419 | } | 
|  | 420 |  | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 421 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 422 | { | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 423 | struct aes_ctx *ctx = aes_ctx(tfm); | 
| Herbert Xu | 6789b2d | 2005-07-06 13:52:27 -0700 | [diff] [blame] | 424 | padlock_xcrypt_ecb(in, out, ctx->E, &ctx->cword.encrypt, 1); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 425 | } | 
|  | 426 |  | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 427 | static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 428 | { | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 429 | struct aes_ctx *ctx = aes_ctx(tfm); | 
| Herbert Xu | 6789b2d | 2005-07-06 13:52:27 -0700 | [diff] [blame] | 430 | padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, 1); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 431 | } | 
|  | 432 |  | 
| Herbert Xu | 28e8c3a | 2005-07-06 13:52:43 -0700 | [diff] [blame] | 433 | static unsigned int aes_encrypt_ecb(const struct cipher_desc *desc, u8 *out, | 
|  | 434 | const u8 *in, unsigned int nbytes) | 
|  | 435 | { | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 436 | struct aes_ctx *ctx = aes_ctx(desc->tfm); | 
| Herbert Xu | 28e8c3a | 2005-07-06 13:52:43 -0700 | [diff] [blame] | 437 | padlock_xcrypt_ecb(in, out, ctx->E, &ctx->cword.encrypt, | 
|  | 438 | nbytes / AES_BLOCK_SIZE); | 
|  | 439 | return nbytes & ~(AES_BLOCK_SIZE - 1); | 
|  | 440 | } | 
|  | 441 |  | 
|  | 442 | static unsigned int aes_decrypt_ecb(const struct cipher_desc *desc, u8 *out, | 
|  | 443 | const u8 *in, unsigned int nbytes) | 
|  | 444 | { | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 445 | struct aes_ctx *ctx = aes_ctx(desc->tfm); | 
| Herbert Xu | 28e8c3a | 2005-07-06 13:52:43 -0700 | [diff] [blame] | 446 | padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, | 
|  | 447 | nbytes / AES_BLOCK_SIZE); | 
|  | 448 | return nbytes & ~(AES_BLOCK_SIZE - 1); | 
|  | 449 | } | 
|  | 450 |  | 
|  | 451 | static unsigned int aes_encrypt_cbc(const struct cipher_desc *desc, u8 *out, | 
|  | 452 | const u8 *in, unsigned int nbytes) | 
|  | 453 | { | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 454 | struct aes_ctx *ctx = aes_ctx(desc->tfm); | 
| Herbert Xu | 476df25 | 2005-07-06 13:54:09 -0700 | [diff] [blame] | 455 | u8 *iv; | 
|  | 456 |  | 
|  | 457 | iv = padlock_xcrypt_cbc(in, out, ctx->E, desc->info, | 
|  | 458 | &ctx->cword.encrypt, nbytes / AES_BLOCK_SIZE); | 
|  | 459 | memcpy(desc->info, iv, AES_BLOCK_SIZE); | 
|  | 460 |  | 
| Herbert Xu | 28e8c3a | 2005-07-06 13:52:43 -0700 | [diff] [blame] | 461 | return nbytes & ~(AES_BLOCK_SIZE - 1); | 
|  | 462 | } | 
|  | 463 |  | 
|  | 464 | static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out, | 
|  | 465 | const u8 *in, unsigned int nbytes) | 
|  | 466 | { | 
| Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 467 | struct aes_ctx *ctx = aes_ctx(desc->tfm); | 
| Herbert Xu | 28e8c3a | 2005-07-06 13:52:43 -0700 | [diff] [blame] | 468 | padlock_xcrypt_cbc(in, out, ctx->D, desc->info, &ctx->cword.decrypt, | 
|  | 469 | nbytes / AES_BLOCK_SIZE); | 
|  | 470 | return nbytes & ~(AES_BLOCK_SIZE - 1); | 
|  | 471 | } | 
|  | 472 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 473 | static struct crypto_alg aes_alg = { | 
|  | 474 | .cra_name		=	"aes", | 
| Herbert Xu | c8a19c9 | 2005-11-05 18:06:26 +1100 | [diff] [blame] | 475 | .cra_driver_name	=	"aes-padlock", | 
|  | 476 | .cra_priority		=	300, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 477 | .cra_flags		=	CRYPTO_ALG_TYPE_CIPHER, | 
|  | 478 | .cra_blocksize		=	AES_BLOCK_SIZE, | 
| Herbert Xu | fbdae9f | 2005-07-06 13:53:29 -0700 | [diff] [blame] | 479 | .cra_ctxsize		=	sizeof(struct aes_ctx), | 
| Herbert Xu | 6789b2d | 2005-07-06 13:52:27 -0700 | [diff] [blame] | 480 | .cra_alignmask		=	PADLOCK_ALIGNMENT - 1, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 481 | .cra_module		=	THIS_MODULE, | 
|  | 482 | .cra_list		=	LIST_HEAD_INIT(aes_alg.cra_list), | 
|  | 483 | .cra_u			=	{ | 
|  | 484 | .cipher = { | 
|  | 485 | .cia_min_keysize	=	AES_MIN_KEY_SIZE, | 
|  | 486 | .cia_max_keysize	=	AES_MAX_KEY_SIZE, | 
|  | 487 | .cia_setkey	   	= 	aes_set_key, | 
|  | 488 | .cia_encrypt	 	=	aes_encrypt, | 
| Herbert Xu | 28e8c3a | 2005-07-06 13:52:43 -0700 | [diff] [blame] | 489 | .cia_decrypt	  	=	aes_decrypt, | 
|  | 490 | .cia_encrypt_ecb 	=	aes_encrypt_ecb, | 
|  | 491 | .cia_decrypt_ecb  	=	aes_decrypt_ecb, | 
|  | 492 | .cia_encrypt_cbc 	=	aes_encrypt_cbc, | 
|  | 493 | .cia_decrypt_cbc  	=	aes_decrypt_cbc, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 494 | } | 
|  | 495 | } | 
|  | 496 | }; | 
|  | 497 |  | 
|  | 498 | int __init padlock_init_aes(void) | 
|  | 499 | { | 
|  | 500 | printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); | 
|  | 501 |  | 
|  | 502 | gen_tabs(); | 
|  | 503 | return crypto_register_alg(&aes_alg); | 
|  | 504 | } | 
|  | 505 |  | 
|  | 506 | void __exit padlock_fini_aes(void) | 
|  | 507 | { | 
|  | 508 | crypto_unregister_alg(&aes_alg); | 
|  | 509 | } |