1/*
2 * Copyright 2017 The OpenSSL Project Authors. All Rights Reserved.
3 * Copyright 2017 Ribose Inc. All Rights Reserved.
4 * Ported from Ribose contributions from Botan.
5 *
6 * Licensed under the Apache License 2.0 (the "License"). You may not use
7 * this file except in compliance with the License. You can obtain a copy
8 * in the file LICENSE in the source distribution or at
9 * https://www.openssl.org/source/license.html
10 */
11
12#include <openssl/e_os2.h>
13#include "crypto/sm4.h"
14
15static const uint8_t SM4_S[256] = {
16 0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2,
17 0x28, 0xFB, 0x2C, 0x05, 0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3,
18 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, 0x9C, 0x42, 0x50, 0xF4,
19 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62,
20 0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA,
21 0x75, 0x8F, 0x3F, 0xA6, 0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA,
22 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, 0x68, 0x6B, 0x81, 0xB2,
23 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35,
24 0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B,
25 0x01, 0x21, 0x78, 0x87, 0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52,
26 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, 0xEA, 0xBF, 0x8A, 0xD2,
27 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1,
28 0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30,
29 0xF5, 0x8C, 0xB1, 0xE3, 0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60,
30 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, 0xD5, 0xDB, 0x37, 0x45,
31 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51,
32 0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41,
33 0x1F, 0x10, 0x5A, 0xD8, 0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD,
34 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, 0x89, 0x69, 0x97, 0x4A,
35 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84,
36 0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E,
37 0xD7, 0xCB, 0x39, 0x48
38};
39
40/*
41 * SM4_SBOX_T[j] == L(SM4_SBOX[j]).
42 */
43static const uint32_t SM4_SBOX_T[256] = {
44 0x8ED55B5B, 0xD0924242, 0x4DEAA7A7, 0x06FDFBFB, 0xFCCF3333, 0x65E28787,
45 0xC93DF4F4, 0x6BB5DEDE, 0x4E165858, 0x6EB4DADA, 0x44145050, 0xCAC10B0B,
46 0x8828A0A0, 0x17F8EFEF, 0x9C2CB0B0, 0x11051414, 0x872BACAC, 0xFB669D9D,
47 0xF2986A6A, 0xAE77D9D9, 0x822AA8A8, 0x46BCFAFA, 0x14041010, 0xCFC00F0F,
48 0x02A8AAAA, 0x54451111, 0x5F134C4C, 0xBE269898, 0x6D482525, 0x9E841A1A,
49 0x1E061818, 0xFD9B6666, 0xEC9E7272, 0x4A430909, 0x10514141, 0x24F7D3D3,
50 0xD5934646, 0x53ECBFBF, 0xF89A6262, 0x927BE9E9, 0xFF33CCCC, 0x04555151,
51 0x270B2C2C, 0x4F420D0D, 0x59EEB7B7, 0xF3CC3F3F, 0x1CAEB2B2, 0xEA638989,
52 0x74E79393, 0x7FB1CECE, 0x6C1C7070, 0x0DABA6A6, 0xEDCA2727, 0x28082020,
53 0x48EBA3A3, 0xC1975656, 0x80820202, 0xA3DC7F7F, 0xC4965252, 0x12F9EBEB,
54 0xA174D5D5, 0xB38D3E3E, 0xC33FFCFC, 0x3EA49A9A, 0x5B461D1D, 0x1B071C1C,
55 0x3BA59E9E, 0x0CFFF3F3, 0x3FF0CFCF, 0xBF72CDCD, 0x4B175C5C, 0x52B8EAEA,
56 0x8F810E0E, 0x3D586565, 0xCC3CF0F0, 0x7D196464, 0x7EE59B9B, 0x91871616,
57 0x734E3D3D, 0x08AAA2A2, 0xC869A1A1, 0xC76AADAD, 0x85830606, 0x7AB0CACA,
58 0xB570C5C5, 0xF4659191, 0xB2D96B6B, 0xA7892E2E, 0x18FBE3E3, 0x47E8AFAF,
59 0x330F3C3C, 0x674A2D2D, 0xB071C1C1, 0x0E575959, 0xE99F7676, 0xE135D4D4,
60 0x661E7878, 0xB4249090, 0x360E3838, 0x265F7979, 0xEF628D8D, 0x38596161,
61 0x95D24747, 0x2AA08A8A, 0xB1259494, 0xAA228888, 0x8C7DF1F1, 0xD73BECEC,
62 0x05010404, 0xA5218484, 0x9879E1E1, 0x9B851E1E, 0x84D75353, 0x00000000,
63 0x5E471919, 0x0B565D5D, 0xE39D7E7E, 0x9FD04F4F, 0xBB279C9C, 0x1A534949,
64 0x7C4D3131, 0xEE36D8D8, 0x0A020808, 0x7BE49F9F, 0x20A28282, 0xD4C71313,
65 0xE8CB2323, 0xE69C7A7A, 0x42E9ABAB, 0x43BDFEFE, 0xA2882A2A, 0x9AD14B4B,
66 0x40410101, 0xDBC41F1F, 0xD838E0E0, 0x61B7D6D6, 0x2FA18E8E, 0x2BF4DFDF,
67 0x3AF1CBCB, 0xF6CD3B3B, 0x1DFAE7E7, 0xE5608585, 0x41155454, 0x25A38686,
68 0x60E38383, 0x16ACBABA, 0x295C7575, 0x34A69292, 0xF7996E6E, 0xE434D0D0,
69 0x721A6868, 0x01545555, 0x19AFB6B6, 0xDF914E4E, 0xFA32C8C8, 0xF030C0C0,
70 0x21F6D7D7, 0xBC8E3232, 0x75B3C6C6, 0x6FE08F8F, 0x691D7474, 0x2EF5DBDB,
71 0x6AE18B8B, 0x962EB8B8, 0x8A800A0A, 0xFE679999, 0xE2C92B2B, 0xE0618181,
72 0xC0C30303, 0x8D29A4A4, 0xAF238C8C, 0x07A9AEAE, 0x390D3434, 0x1F524D4D,
73 0x764F3939, 0xD36EBDBD, 0x81D65757, 0xB7D86F6F, 0xEB37DCDC, 0x51441515,
74 0xA6DD7B7B, 0x09FEF7F7, 0xB68C3A3A, 0x932FBCBC, 0x0F030C0C, 0x03FCFFFF,
75 0xC26BA9A9, 0xBA73C9C9, 0xD96CB5B5, 0xDC6DB1B1, 0x375A6D6D, 0x15504545,
76 0xB98F3636, 0x771B6C6C, 0x13ADBEBE, 0xDA904A4A, 0x57B9EEEE, 0xA9DE7777,
77 0x4CBEF2F2, 0x837EFDFD, 0x55114444, 0xBDDA6767, 0x2C5D7171, 0x45400505,
78 0x631F7C7C, 0x50104040, 0x325B6969, 0xB8DB6363, 0x220A2828, 0xC5C20707,
79 0xF531C4C4, 0xA88A2222, 0x31A79696, 0xF9CE3737, 0x977AEDED, 0x49BFF6F6,
80 0x992DB4B4, 0xA475D1D1, 0x90D34343, 0x5A124848, 0x58BAE2E2, 0x71E69797,
81 0x64B6D2D2, 0x70B2C2C2, 0xAD8B2626, 0xCD68A5A5, 0xCB955E5E, 0x624B2929,
82 0x3C0C3030, 0xCE945A5A, 0xAB76DDDD, 0x867FF9F9, 0xF1649595, 0x5DBBE6E6,
83 0x35F2C7C7, 0x2D092424, 0xD1C61717, 0xD66FB9B9, 0xDEC51B1B, 0x94861212,
84 0x78186060, 0x30F3C3C3, 0x897CF5F5, 0x5CEFB3B3, 0xD23AE8E8, 0xACDF7373,
85 0x794C3535, 0xA0208080, 0x9D78E5E5, 0x56EDBBBB, 0x235E7D7D, 0xC63EF8F8,
86 0x8BD45F5F, 0xE7C82F2F, 0xDD39E4E4, 0x68492121 };
87
88static ossl_inline uint32_t rotl(uint32_t a, uint8_t n)
89{
90 return (a << n) | (a >> (32 - n));
91}
92
93static ossl_inline uint32_t load_u32_be(const uint8_t *b, uint32_t n)
94{
95 return ((uint32_t)b[4 * n] << 24) |
96 ((uint32_t)b[4 * n + 1] << 16) |
97 ((uint32_t)b[4 * n + 2] << 8) |
98 ((uint32_t)b[4 * n + 3]);
99}
100
101static ossl_inline void store_u32_be(uint32_t v, uint8_t *b)
102{
103 b[0] = (uint8_t)(v >> 24);
104 b[1] = (uint8_t)(v >> 16);
105 b[2] = (uint8_t)(v >> 8);
106 b[3] = (uint8_t)(v);
107}
108
109static ossl_inline uint32_t SM4_T_slow(uint32_t X)
110{
111 uint32_t t = 0;
112
113 t |= ((uint32_t)SM4_S[(uint8_t)(X >> 24)]) << 24;
114 t |= ((uint32_t)SM4_S[(uint8_t)(X >> 16)]) << 16;
115 t |= ((uint32_t)SM4_S[(uint8_t)(X >> 8)]) << 8;
116 t |= SM4_S[(uint8_t)X];
117
118 /*
119 * L linear transform
120 */
121 return t ^ rotl(t, 2) ^ rotl(t, 10) ^ rotl(t, 18) ^ rotl(t, 24);
122}
123
124static ossl_inline uint32_t SM4_T(uint32_t X)
125{
126 return SM4_SBOX_T[(uint8_t)(X >> 24)] ^
127 rotl(SM4_SBOX_T[(uint8_t)(X >> 16)], 24) ^
128 rotl(SM4_SBOX_T[(uint8_t)(X >> 8)], 16) ^
129 rotl(SM4_SBOX_T[(uint8_t)X], 8);
130}
131
132int SM4_set_key(const uint8_t *key, SM4_KEY *ks)
133{
134 /*
135 * Family Key
136 */
137 static const uint32_t FK[4] =
138 { 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc };
139
140 /*
141 * Constant Key
142 */
143 static const uint32_t CK[32] = {
144 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269,
145 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9,
146 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249,
147 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9,
148 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229,
149 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299,
150 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209,
151 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279
152 };
153
154 uint32_t K[4];
155 int i;
156
157 K[0] = load_u32_be(key, 0) ^ FK[0];
158 K[1] = load_u32_be(key, 1) ^ FK[1];
159 K[2] = load_u32_be(key, 2) ^ FK[2];
160 K[3] = load_u32_be(key, 3) ^ FK[3];
161
162 for (i = 0; i != SM4_KEY_SCHEDULE; ++i) {
163 uint32_t X = K[(i + 1) % 4] ^ K[(i + 2) % 4] ^ K[(i + 3) % 4] ^ CK[i];
164 uint32_t t = 0;
165
166 t |= ((uint32_t)SM4_S[(uint8_t)(X >> 24)]) << 24;
167 t |= ((uint32_t)SM4_S[(uint8_t)(X >> 16)]) << 16;
168 t |= ((uint32_t)SM4_S[(uint8_t)(X >> 8)]) << 8;
169 t |= SM4_S[(uint8_t)X];
170
171 t = t ^ rotl(t, 13) ^ rotl(t, 23);
172 K[i % 4] ^= t;
173 ks->rk[i] = K[i % 4];
174 }
175
176 return 1;
177}
178
179#define SM4_RNDS(k0, k1, k2, k3, F) \
180 do { \
181 B0 ^= F(B1 ^ B2 ^ B3 ^ ks->rk[k0]); \
182 B1 ^= F(B0 ^ B2 ^ B3 ^ ks->rk[k1]); \
183 B2 ^= F(B0 ^ B1 ^ B3 ^ ks->rk[k2]); \
184 B3 ^= F(B0 ^ B1 ^ B2 ^ ks->rk[k3]); \
185 } while(0)
186
187void SM4_encrypt(const uint8_t *in, uint8_t *out, const SM4_KEY *ks)
188{
189 uint32_t B0 = load_u32_be(in, 0);
190 uint32_t B1 = load_u32_be(in, 1);
191 uint32_t B2 = load_u32_be(in, 2);
192 uint32_t B3 = load_u32_be(in, 3);
193
194 /*
195 * Uses byte-wise sbox in the first and last rounds to provide some
196 * protection from cache based side channels.
197 */
198 SM4_RNDS( 0, 1, 2, 3, SM4_T_slow);
199 SM4_RNDS( 4, 5, 6, 7, SM4_T);
200 SM4_RNDS( 8, 9, 10, 11, SM4_T);
201 SM4_RNDS(12, 13, 14, 15, SM4_T);
202 SM4_RNDS(16, 17, 18, 19, SM4_T);
203 SM4_RNDS(20, 21, 22, 23, SM4_T);
204 SM4_RNDS(24, 25, 26, 27, SM4_T);
205 SM4_RNDS(28, 29, 30, 31, SM4_T_slow);
206
207 store_u32_be(B3, out);
208 store_u32_be(B2, out + 4);
209 store_u32_be(B1, out + 8);
210 store_u32_be(B0, out + 12);
211}
212
213void SM4_decrypt(const uint8_t *in, uint8_t *out, const SM4_KEY *ks)
214{
215 uint32_t B0 = load_u32_be(in, 0);
216 uint32_t B1 = load_u32_be(in, 1);
217 uint32_t B2 = load_u32_be(in, 2);
218 uint32_t B3 = load_u32_be(in, 3);
219
220 SM4_RNDS(31, 30, 29, 28, SM4_T_slow);
221 SM4_RNDS(27, 26, 25, 24, SM4_T);
222 SM4_RNDS(23, 22, 21, 20, SM4_T);
223 SM4_RNDS(19, 18, 17, 16, SM4_T);
224 SM4_RNDS(15, 14, 13, 12, SM4_T);
225 SM4_RNDS(11, 10, 9, 8, SM4_T);
226 SM4_RNDS( 7, 6, 5, 4, SM4_T);
227 SM4_RNDS( 3, 2, 1, 0, SM4_T_slow);
228
229 store_u32_be(B3, out);
230 store_u32_be(B2, out + 4);
231 store_u32_be(B1, out + 8);
232 store_u32_be(B0, out + 12);
233}
234