1 | /* |
2 | * Copyright (c) 2007-2016, Cameron Rich |
3 | * |
4 | * All rights reserved. |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted provided that the following conditions are met: |
8 | * |
9 | * * Redistributions of source code must retain the above copyright notice, |
10 | * this list of conditions and the following disclaimer. |
11 | * * Redistributions in binary form must reproduce the above copyright notice, |
12 | * this list of conditions and the following disclaimer in the documentation |
13 | * and/or other materials provided with the distribution. |
14 | * * Neither the name of the axTLS project nor the names of its contributors |
15 | * may be used to endorse or promote products derived from this software |
16 | * without specific prior written permission. |
17 | * |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
22 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
23 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
24 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
25 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
26 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
27 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
28 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | */ |
30 | |
31 | /** |
32 | * AES implementation - this is a small code version. There are much faster |
33 | * versions around but they are much larger in size (i.e. they use large |
34 | * submix tables). |
35 | */ |
36 | |
37 | #include <string.h> |
38 | #include "os_port.h" |
39 | #include "crypto.h" |
40 | |
41 | #define rot1(x) (((x) << 24) | ((x) >> 8)) |
42 | #define rot2(x) (((x) << 16) | ((x) >> 16)) |
43 | #define rot3(x) (((x) << 8) | ((x) >> 24)) |
44 | |
45 | /* |
46 | * This cute trick does 4 'mul by two' at once. Stolen from |
47 | * Dr B. R. Gladman <brg@gladman.uk.net> but I'm sure the u-(u>>7) is |
48 | * a standard graphics trick |
49 | * The key to this is that we need to xor with 0x1b if the top bit is set. |
50 | * a 1xxx xxxx 0xxx 0xxx First we mask the 7bit, |
51 | * b 1000 0000 0000 0000 then we shift right by 7 putting the 7bit in 0bit, |
52 | * c 0000 0001 0000 0000 we then subtract (c) from (b) |
53 | * d 0111 1111 0000 0000 and now we and with our mask |
54 | * e 0001 1011 0000 0000 |
55 | */ |
56 | #define mt 0x80808080 |
57 | #define ml 0x7f7f7f7f |
58 | #define mh 0xfefefefe |
59 | #define mm 0x1b1b1b1b |
60 | #define mul2(x,t) ((t)=((x)&mt), \ |
61 | ((((x)+(x))&mh)^(((t)-((t)>>7))&mm))) |
62 | |
63 | #define inv_mix_col(x,f2,f4,f8,f9) (\ |
64 | (f2)=mul2(x,f2), \ |
65 | (f4)=mul2(f2,f4), \ |
66 | (f8)=mul2(f4,f8), \ |
67 | (f9)=(x)^(f8), \ |
68 | (f8)=((f2)^(f4)^(f8)), \ |
69 | (f2)^=(f9), \ |
70 | (f4)^=(f9), \ |
71 | (f8)^=rot3(f2), \ |
72 | (f8)^=rot2(f4), \ |
73 | (f8)^rot1(f9)) |
74 | |
75 | /* |
76 | * AES S-box |
77 | */ |
78 | static const uint8_t aes_sbox[256] = |
79 | { |
80 | 0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5, |
81 | 0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76, |
82 | 0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0, |
83 | 0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0, |
84 | 0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC, |
85 | 0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15, |
86 | 0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A, |
87 | 0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75, |
88 | 0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0, |
89 | 0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84, |
90 | 0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B, |
91 | 0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF, |
92 | 0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85, |
93 | 0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8, |
94 | 0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5, |
95 | 0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2, |
96 | 0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17, |
97 | 0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73, |
98 | 0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88, |
99 | 0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB, |
100 | 0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C, |
101 | 0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79, |
102 | 0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9, |
103 | 0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08, |
104 | 0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6, |
105 | 0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A, |
106 | 0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E, |
107 | 0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E, |
108 | 0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94, |
109 | 0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF, |
110 | 0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68, |
111 | 0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16, |
112 | }; |
113 | |
114 | /* |
115 | * AES is-box |
116 | */ |
117 | static const uint8_t aes_isbox[256] = |
118 | { |
119 | 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38, |
120 | 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, |
121 | 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87, |
122 | 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, |
123 | 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d, |
124 | 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, |
125 | 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2, |
126 | 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, |
127 | 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16, |
128 | 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, |
129 | 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda, |
130 | 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, |
131 | 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a, |
132 | 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, |
133 | 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02, |
134 | 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, |
135 | 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea, |
136 | 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, |
137 | 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85, |
138 | 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, |
139 | 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89, |
140 | 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, |
141 | 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20, |
142 | 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, |
143 | 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31, |
144 | 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, |
145 | 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d, |
146 | 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, |
147 | 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0, |
148 | 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, |
149 | 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26, |
150 | 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d |
151 | }; |
152 | |
153 | static const unsigned char Rcon[30]= |
154 | { |
155 | 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, |
156 | 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f, |
157 | 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4, |
158 | 0xb3,0x7d,0xfa,0xef,0xc5,0x91, |
159 | }; |
160 | |
161 | /* ----- static functions ----- */ |
162 | |
163 | /* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial |
164 | x^8+x^4+x^3+x+1 */ |
165 | static unsigned char AES_xtime(uint32_t x) |
166 | { |
167 | return (x&0x80) ? (x<<1)^0x1b : x<<1; |
168 | } |
169 | |
170 | /** |
171 | * Set up AES with the key/iv and cipher size. |
172 | */ |
173 | void AES_set_key(AES_CTX *ctx, const uint8_t *key, |
174 | const uint8_t *iv, AES_MODE mode) |
175 | { |
176 | int i, ii; |
177 | uint32_t *W, tmp, tmp2; |
178 | const unsigned char *ip; |
179 | int words; |
180 | |
181 | switch (mode) |
182 | { |
183 | case AES_MODE_128: |
184 | i = 10; |
185 | words = 4; |
186 | break; |
187 | |
188 | case AES_MODE_256: |
189 | i = 14; |
190 | words = 8; |
191 | break; |
192 | |
193 | default: /* fail silently */ |
194 | return; |
195 | } |
196 | |
197 | ctx->rounds = i; |
198 | ctx->key_size = words; |
199 | W = ctx->ks; |
200 | for (i = 0; i < words; i+=2) |
201 | { |
202 | W[i+0]= ((uint32_t)key[ 0]<<24)| |
203 | ((uint32_t)key[ 1]<<16)| |
204 | ((uint32_t)key[ 2]<< 8)| |
205 | ((uint32_t)key[ 3] ); |
206 | W[i+1]= ((uint32_t)key[ 4]<<24)| |
207 | ((uint32_t)key[ 5]<<16)| |
208 | ((uint32_t)key[ 6]<< 8)| |
209 | ((uint32_t)key[ 7] ); |
210 | key += 8; |
211 | } |
212 | |
213 | ip = Rcon; |
214 | ii = 4 * (ctx->rounds+1); |
215 | for (i = words; i<ii; i++) |
216 | { |
217 | tmp = W[i-1]; |
218 | |
219 | if ((i % words) == 0) |
220 | { |
221 | tmp2 =(uint32_t)aes_sbox[(tmp )&0xff]<< 8; |
222 | tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16; |
223 | tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24; |
224 | tmp2|=(uint32_t)aes_sbox[(tmp>>24) ]; |
225 | tmp=tmp2^(((unsigned int)*ip)<<24); |
226 | ip++; |
227 | } |
228 | |
229 | if ((words == 8) && ((i % words) == 4)) |
230 | { |
231 | tmp2 =(uint32_t)aes_sbox[(tmp )&0xff] ; |
232 | tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8; |
233 | tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16; |
234 | tmp2|=(uint32_t)aes_sbox[(tmp>>24) ]<<24; |
235 | tmp=tmp2; |
236 | } |
237 | |
238 | W[i]=W[i-words]^tmp; |
239 | } |
240 | |
241 | /* copy the iv across */ |
242 | if (iv) |
243 | { |
244 | memcpy(ctx->iv, iv, 16); |
245 | } |
246 | } |
247 | |
248 | /** |
249 | * Change a key for decryption. |
250 | */ |
251 | void AES_convert_key(AES_CTX *ctx) |
252 | { |
253 | int i; |
254 | uint32_t *k,w,t1,t2,t3,t4; |
255 | |
256 | k = ctx->ks; |
257 | k += 4; |
258 | |
259 | for (i= ctx->rounds*4; i > 4; i--) |
260 | { |
261 | w= *k; |
262 | w = inv_mix_col(w,t1,t2,t3,t4); |
263 | *k++ =w; |
264 | } |
265 | } |
266 | |
267 | /** |
268 | * Encrypt a byte sequence (with a block size 16) using the AES cipher. |
269 | */ |
270 | void AES_cbc_encrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length) |
271 | { |
272 | int i; |
273 | uint32_t tin[4], tout[4], iv[4]; |
274 | |
275 | memcpy(iv, ctx->iv, AES_IV_SIZE); |
276 | for (i = 0; i < 4; i++) |
277 | tout[i] = ntohl(iv[i]); |
278 | |
279 | for (length -= AES_BLOCKSIZE; length >= 0; length -= AES_BLOCKSIZE) |
280 | { |
281 | uint32_t msg_32[4]; |
282 | uint32_t out_32[4]; |
283 | memcpy(msg_32, msg, AES_BLOCKSIZE); |
284 | msg += AES_BLOCKSIZE; |
285 | |
286 | for (i = 0; i < 4; i++) |
287 | tin[i] = ntohl(msg_32[i])^tout[i]; |
288 | |
289 | AES_encrypt(ctx, tin); |
290 | |
291 | for (i = 0; i < 4; i++) |
292 | { |
293 | tout[i] = tin[i]; |
294 | out_32[i] = htonl(tout[i]); |
295 | } |
296 | |
297 | memcpy(out, out_32, AES_BLOCKSIZE); |
298 | out += AES_BLOCKSIZE; |
299 | } |
300 | |
301 | for (i = 0; i < 4; i++) |
302 | iv[i] = htonl(tout[i]); |
303 | memcpy(ctx->iv, iv, AES_IV_SIZE); |
304 | } |
305 | |
306 | /** |
307 | * Decrypt a byte sequence (with a block size 16) using the AES cipher. |
308 | */ |
309 | void AES_cbc_decrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length) |
310 | { |
311 | int i; |
312 | uint32_t tin[4], xor[4], tout[4], data[4], iv[4]; |
313 | |
314 | memcpy(iv, ctx->iv, AES_IV_SIZE); |
315 | for (i = 0; i < 4; i++) |
316 | xor[i] = ntohl(iv[i]); |
317 | |
318 | for (length -= 16; length >= 0; length -= 16) |
319 | { |
320 | uint32_t msg_32[4]; |
321 | uint32_t out_32[4]; |
322 | memcpy(msg_32, msg, AES_BLOCKSIZE); |
323 | msg += AES_BLOCKSIZE; |
324 | |
325 | for (i = 0; i < 4; i++) |
326 | { |
327 | tin[i] = ntohl(msg_32[i]); |
328 | data[i] = tin[i]; |
329 | } |
330 | |
331 | AES_decrypt(ctx, data); |
332 | |
333 | for (i = 0; i < 4; i++) |
334 | { |
335 | tout[i] = data[i]^xor[i]; |
336 | xor[i] = tin[i]; |
337 | out_32[i] = htonl(tout[i]); |
338 | } |
339 | |
340 | memcpy(out, out_32, AES_BLOCKSIZE); |
341 | out += AES_BLOCKSIZE; |
342 | } |
343 | |
344 | for (i = 0; i < 4; i++) |
345 | iv[i] = htonl(xor[i]); |
346 | memcpy(ctx->iv, iv, AES_IV_SIZE); |
347 | } |
348 | |
349 | /** |
350 | * Encrypt a single block (16 bytes) of data |
351 | */ |
352 | void AES_encrypt(const AES_CTX *ctx, uint32_t *data) |
353 | { |
354 | /* To make this code smaller, generate the sbox entries on the fly. |
355 | * This will have a really heavy effect upon performance. |
356 | */ |
357 | uint32_t tmp[4]; |
358 | uint32_t tmp1, old_a0, a0, a1, a2, a3, row; |
359 | int curr_rnd; |
360 | int rounds = ctx->rounds; |
361 | const uint32_t *k = ctx->ks; |
362 | |
363 | /* Pre-round key addition */ |
364 | for (row = 0; row < 4; row++) |
365 | data[row] ^= *(k++); |
366 | |
367 | /* Encrypt one block. */ |
368 | for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++) |
369 | { |
370 | /* Perform ByteSub and ShiftRow operations together */ |
371 | for (row = 0; row < 4; row++) |
372 | { |
373 | a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF]; |
374 | a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF]; |
375 | a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF]; |
376 | a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF]; |
377 | |
378 | /* Perform MixColumn iff not last round */ |
379 | if (curr_rnd < (rounds - 1)) |
380 | { |
381 | tmp1 = a0 ^ a1 ^ a2 ^ a3; |
382 | old_a0 = a0; |
383 | a0 ^= tmp1 ^ AES_xtime(a0 ^ a1); |
384 | a1 ^= tmp1 ^ AES_xtime(a1 ^ a2); |
385 | a2 ^= tmp1 ^ AES_xtime(a2 ^ a3); |
386 | a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0); |
387 | } |
388 | |
389 | tmp[row] = ((a0 << 24) | (a1 << 16) | (a2 << 8) | a3); |
390 | } |
391 | |
392 | /* KeyAddition - note that it is vital that this loop is separate from |
393 | the MixColumn operation, which must be atomic...*/ |
394 | for (row = 0; row < 4; row++) |
395 | data[row] = tmp[row] ^ *(k++); |
396 | } |
397 | } |
398 | |
399 | /** |
400 | * Decrypt a single block (16 bytes) of data |
401 | */ |
402 | void AES_decrypt(const AES_CTX *ctx, uint32_t *data) |
403 | { |
404 | uint32_t tmp[4]; |
405 | uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6; |
406 | uint32_t a0, a1, a2, a3, row; |
407 | int curr_rnd; |
408 | int rounds = ctx->rounds; |
409 | const uint32_t *k = ctx->ks + ((rounds+1)*4); |
410 | |
411 | /* pre-round key addition */ |
412 | for (row=4; row > 0;row--) |
413 | data[row-1] ^= *(--k); |
414 | |
415 | /* Decrypt one block */ |
416 | for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++) |
417 | { |
418 | /* Perform ByteSub and ShiftRow operations together */ |
419 | for (row = 4; row > 0; row--) |
420 | { |
421 | a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF]; |
422 | a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF]; |
423 | a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF]; |
424 | a3 = aes_isbox[(data[row%4])&0xFF]; |
425 | |
426 | /* Perform MixColumn iff not last round */ |
427 | if (curr_rnd<(rounds-1)) |
428 | { |
429 | /* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E) |
430 | are quite large compared to encryption; this |
431 | operation slows decryption down noticeably. */ |
432 | xt0 = AES_xtime(a0^a1); |
433 | xt1 = AES_xtime(a1^a2); |
434 | xt2 = AES_xtime(a2^a3); |
435 | xt3 = AES_xtime(a3^a0); |
436 | xt4 = AES_xtime(xt0^xt1); |
437 | xt5 = AES_xtime(xt1^xt2); |
438 | xt6 = AES_xtime(xt4^xt5); |
439 | |
440 | xt0 ^= a1^a2^a3^xt4^xt6; |
441 | xt1 ^= a0^a2^a3^xt5^xt6; |
442 | xt2 ^= a0^a1^a3^xt4^xt6; |
443 | xt3 ^= a0^a1^a2^xt5^xt6; |
444 | tmp[row-1] = ((xt0<<24)|(xt1<<16)|(xt2<<8)|xt3); |
445 | } |
446 | else |
447 | tmp[row-1] = ((a0<<24)|(a1<<16)|(a2<<8)|a3); |
448 | } |
449 | |
450 | for (row = 4; row > 0; row--) |
451 | data[row-1] = tmp[row-1] ^ *(--k); |
452 | } |
453 | } |
454 | |