1/*
2 * Copyright (c) 2007-2016, Cameron Rich
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 * * Neither the name of the axTLS project nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/**
32 * AES implementation - this is a small code version. There are much faster
33 * versions around but they are much larger in size (i.e. they use large
34 * submix tables).
35 */
36
37#include <string.h>
38#include "os_port.h"
39#include "crypto.h"
40
41#define rot1(x) (((x) << 24) | ((x) >> 8))
42#define rot2(x) (((x) << 16) | ((x) >> 16))
43#define rot3(x) (((x) << 8) | ((x) >> 24))
44
45/*
46 * This cute trick does 4 'mul by two' at once. Stolen from
47 * Dr B. R. Gladman <brg@gladman.uk.net> but I'm sure the u-(u>>7) is
48 * a standard graphics trick
49 * The key to this is that we need to xor with 0x1b if the top bit is set.
50 * a 1xxx xxxx 0xxx 0xxx First we mask the 7bit,
51 * b 1000 0000 0000 0000 then we shift right by 7 putting the 7bit in 0bit,
52 * c 0000 0001 0000 0000 we then subtract (c) from (b)
53 * d 0111 1111 0000 0000 and now we and with our mask
54 * e 0001 1011 0000 0000
55 */
56#define mt 0x80808080
57#define ml 0x7f7f7f7f
58#define mh 0xfefefefe
59#define mm 0x1b1b1b1b
60#define mul2(x,t) ((t)=((x)&mt), \
61 ((((x)+(x))&mh)^(((t)-((t)>>7))&mm)))
62
63#define inv_mix_col(x,f2,f4,f8,f9) (\
64 (f2)=mul2(x,f2), \
65 (f4)=mul2(f2,f4), \
66 (f8)=mul2(f4,f8), \
67 (f9)=(x)^(f8), \
68 (f8)=((f2)^(f4)^(f8)), \
69 (f2)^=(f9), \
70 (f4)^=(f9), \
71 (f8)^=rot3(f2), \
72 (f8)^=rot2(f4), \
73 (f8)^rot1(f9))
74
75/*
76 * AES S-box
77 */
78static const uint8_t aes_sbox[256] =
79{
80 0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,
81 0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76,
82 0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,
83 0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0,
84 0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,
85 0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15,
86 0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,
87 0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75,
88 0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,
89 0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84,
90 0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,
91 0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF,
92 0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,
93 0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8,
94 0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,
95 0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2,
96 0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,
97 0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73,
98 0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,
99 0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB,
100 0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,
101 0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79,
102 0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,
103 0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08,
104 0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,
105 0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A,
106 0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,
107 0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E,
108 0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,
109 0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF,
110 0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,
111 0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16,
112};
113
114/*
115 * AES is-box
116 */
117static const uint8_t aes_isbox[256] =
118{
119 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
120 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
121 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
122 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
123 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
124 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
125 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
126 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
127 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
128 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
129 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
130 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
131 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
132 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
133 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
134 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
135 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
136 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
137 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
138 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
139 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
140 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
141 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
142 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
143 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
144 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
145 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
146 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
147 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
148 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
149 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
150 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
151};
152
153static const unsigned char Rcon[30]=
154{
155 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,
156 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,
157 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,
158 0xb3,0x7d,0xfa,0xef,0xc5,0x91,
159};
160
161/* ----- static functions ----- */
162
163/* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial
164 x^8+x^4+x^3+x+1 */
165static unsigned char AES_xtime(uint32_t x)
166{
167 return (x&0x80) ? (x<<1)^0x1b : x<<1;
168}
169
170/**
171 * Set up AES with the key/iv and cipher size.
172 */
173void AES_set_key(AES_CTX *ctx, const uint8_t *key,
174 const uint8_t *iv, AES_MODE mode)
175{
176 int i, ii;
177 uint32_t *W, tmp, tmp2;
178 const unsigned char *ip;
179 int words;
180
181 switch (mode)
182 {
183 case AES_MODE_128:
184 i = 10;
185 words = 4;
186 break;
187
188 case AES_MODE_256:
189 i = 14;
190 words = 8;
191 break;
192
193 default: /* fail silently */
194 return;
195 }
196
197 ctx->rounds = i;
198 ctx->key_size = words;
199 W = ctx->ks;
200 for (i = 0; i < words; i+=2)
201 {
202 W[i+0]= ((uint32_t)key[ 0]<<24)|
203 ((uint32_t)key[ 1]<<16)|
204 ((uint32_t)key[ 2]<< 8)|
205 ((uint32_t)key[ 3] );
206 W[i+1]= ((uint32_t)key[ 4]<<24)|
207 ((uint32_t)key[ 5]<<16)|
208 ((uint32_t)key[ 6]<< 8)|
209 ((uint32_t)key[ 7] );
210 key += 8;
211 }
212
213 ip = Rcon;
214 ii = 4 * (ctx->rounds+1);
215 for (i = words; i<ii; i++)
216 {
217 tmp = W[i-1];
218
219 if ((i % words) == 0)
220 {
221 tmp2 =(uint32_t)aes_sbox[(tmp )&0xff]<< 8;
222 tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16;
223 tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24;
224 tmp2|=(uint32_t)aes_sbox[(tmp>>24) ];
225 tmp=tmp2^(((unsigned int)*ip)<<24);
226 ip++;
227 }
228
229 if ((words == 8) && ((i % words) == 4))
230 {
231 tmp2 =(uint32_t)aes_sbox[(tmp )&0xff] ;
232 tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8;
233 tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16;
234 tmp2|=(uint32_t)aes_sbox[(tmp>>24) ]<<24;
235 tmp=tmp2;
236 }
237
238 W[i]=W[i-words]^tmp;
239 }
240
241 /* copy the iv across */
242 if (iv)
243 {
244 memcpy(ctx->iv, iv, 16);
245 }
246}
247
248/**
249 * Change a key for decryption.
250 */
251void AES_convert_key(AES_CTX *ctx)
252{
253 int i;
254 uint32_t *k,w,t1,t2,t3,t4;
255
256 k = ctx->ks;
257 k += 4;
258
259 for (i= ctx->rounds*4; i > 4; i--)
260 {
261 w= *k;
262 w = inv_mix_col(w,t1,t2,t3,t4);
263 *k++ =w;
264 }
265}
266
267/**
268 * Encrypt a byte sequence (with a block size 16) using the AES cipher.
269 */
270void AES_cbc_encrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
271{
272 int i;
273 uint32_t tin[4], tout[4], iv[4];
274
275 memcpy(iv, ctx->iv, AES_IV_SIZE);
276 for (i = 0; i < 4; i++)
277 tout[i] = ntohl(iv[i]);
278
279 for (length -= AES_BLOCKSIZE; length >= 0; length -= AES_BLOCKSIZE)
280 {
281 uint32_t msg_32[4];
282 uint32_t out_32[4];
283 memcpy(msg_32, msg, AES_BLOCKSIZE);
284 msg += AES_BLOCKSIZE;
285
286 for (i = 0; i < 4; i++)
287 tin[i] = ntohl(msg_32[i])^tout[i];
288
289 AES_encrypt(ctx, tin);
290
291 for (i = 0; i < 4; i++)
292 {
293 tout[i] = tin[i];
294 out_32[i] = htonl(tout[i]);
295 }
296
297 memcpy(out, out_32, AES_BLOCKSIZE);
298 out += AES_BLOCKSIZE;
299 }
300
301 for (i = 0; i < 4; i++)
302 iv[i] = htonl(tout[i]);
303 memcpy(ctx->iv, iv, AES_IV_SIZE);
304}
305
306/**
307 * Decrypt a byte sequence (with a block size 16) using the AES cipher.
308 */
309void AES_cbc_decrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
310{
311 int i;
312 uint32_t tin[4], xor[4], tout[4], data[4], iv[4];
313
314 memcpy(iv, ctx->iv, AES_IV_SIZE);
315 for (i = 0; i < 4; i++)
316 xor[i] = ntohl(iv[i]);
317
318 for (length -= 16; length >= 0; length -= 16)
319 {
320 uint32_t msg_32[4];
321 uint32_t out_32[4];
322 memcpy(msg_32, msg, AES_BLOCKSIZE);
323 msg += AES_BLOCKSIZE;
324
325 for (i = 0; i < 4; i++)
326 {
327 tin[i] = ntohl(msg_32[i]);
328 data[i] = tin[i];
329 }
330
331 AES_decrypt(ctx, data);
332
333 for (i = 0; i < 4; i++)
334 {
335 tout[i] = data[i]^xor[i];
336 xor[i] = tin[i];
337 out_32[i] = htonl(tout[i]);
338 }
339
340 memcpy(out, out_32, AES_BLOCKSIZE);
341 out += AES_BLOCKSIZE;
342 }
343
344 for (i = 0; i < 4; i++)
345 iv[i] = htonl(xor[i]);
346 memcpy(ctx->iv, iv, AES_IV_SIZE);
347}
348
349/**
350 * Encrypt a single block (16 bytes) of data
351 */
352void AES_encrypt(const AES_CTX *ctx, uint32_t *data)
353{
354 /* To make this code smaller, generate the sbox entries on the fly.
355 * This will have a really heavy effect upon performance.
356 */
357 uint32_t tmp[4];
358 uint32_t tmp1, old_a0, a0, a1, a2, a3, row;
359 int curr_rnd;
360 int rounds = ctx->rounds;
361 const uint32_t *k = ctx->ks;
362
363 /* Pre-round key addition */
364 for (row = 0; row < 4; row++)
365 data[row] ^= *(k++);
366
367 /* Encrypt one block. */
368 for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
369 {
370 /* Perform ByteSub and ShiftRow operations together */
371 for (row = 0; row < 4; row++)
372 {
373 a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF];
374 a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF];
375 a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF];
376 a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF];
377
378 /* Perform MixColumn iff not last round */
379 if (curr_rnd < (rounds - 1))
380 {
381 tmp1 = a0 ^ a1 ^ a2 ^ a3;
382 old_a0 = a0;
383 a0 ^= tmp1 ^ AES_xtime(a0 ^ a1);
384 a1 ^= tmp1 ^ AES_xtime(a1 ^ a2);
385 a2 ^= tmp1 ^ AES_xtime(a2 ^ a3);
386 a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0);
387 }
388
389 tmp[row] = ((a0 << 24) | (a1 << 16) | (a2 << 8) | a3);
390 }
391
392 /* KeyAddition - note that it is vital that this loop is separate from
393 the MixColumn operation, which must be atomic...*/
394 for (row = 0; row < 4; row++)
395 data[row] = tmp[row] ^ *(k++);
396 }
397}
398
399/**
400 * Decrypt a single block (16 bytes) of data
401 */
402void AES_decrypt(const AES_CTX *ctx, uint32_t *data)
403{
404 uint32_t tmp[4];
405 uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6;
406 uint32_t a0, a1, a2, a3, row;
407 int curr_rnd;
408 int rounds = ctx->rounds;
409 const uint32_t *k = ctx->ks + ((rounds+1)*4);
410
411 /* pre-round key addition */
412 for (row=4; row > 0;row--)
413 data[row-1] ^= *(--k);
414
415 /* Decrypt one block */
416 for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
417 {
418 /* Perform ByteSub and ShiftRow operations together */
419 for (row = 4; row > 0; row--)
420 {
421 a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF];
422 a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF];
423 a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF];
424 a3 = aes_isbox[(data[row%4])&0xFF];
425
426 /* Perform MixColumn iff not last round */
427 if (curr_rnd<(rounds-1))
428 {
429 /* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E)
430 are quite large compared to encryption; this
431 operation slows decryption down noticeably. */
432 xt0 = AES_xtime(a0^a1);
433 xt1 = AES_xtime(a1^a2);
434 xt2 = AES_xtime(a2^a3);
435 xt3 = AES_xtime(a3^a0);
436 xt4 = AES_xtime(xt0^xt1);
437 xt5 = AES_xtime(xt1^xt2);
438 xt6 = AES_xtime(xt4^xt5);
439
440 xt0 ^= a1^a2^a3^xt4^xt6;
441 xt1 ^= a0^a2^a3^xt5^xt6;
442 xt2 ^= a0^a1^a3^xt4^xt6;
443 xt3 ^= a0^a1^a2^xt5^xt6;
444 tmp[row-1] = ((xt0<<24)|(xt1<<16)|(xt2<<8)|xt3);
445 }
446 else
447 tmp[row-1] = ((a0<<24)|(a1<<16)|(a2<<8)|a3);
448 }
449
450 for (row = 4; row > 0; row--)
451 data[row-1] = tmp[row-1] ^ *(--k);
452 }
453}
454