1/*
2 * Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10#include <openssl/opensslconf.h>
11/*-
12 * IMPLEMENTATION NOTES.
13 *
14 * As you might have noticed 32-bit hash algorithms:
15 *
16 * - permit SHA_LONG to be wider than 32-bit
17 * - optimized versions implement two transform functions: one operating
18 * on [aligned] data in host byte order and one - on data in input
19 * stream byte order;
20 * - share common byte-order neutral collector and padding function
21 * implementations, ../md32_common.h;
22 *
23 * Neither of the above applies to this SHA-512 implementations. Reasons
24 * [in reverse order] are:
25 *
26 * - it's the only 64-bit hash algorithm for the moment of this writing,
27 * there is no need for common collector/padding implementation [yet];
28 * - by supporting only one transform function [which operates on
29 * *aligned* data in input stream byte order, big-endian in this case]
30 * we minimize burden of maintenance in two ways: a) collector/padding
31 * function is simpler; b) only one transform function to stare at;
32 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
33 * apply a number of optimizations to mitigate potential performance
34 * penalties caused by previous design decision;
35 *
36 * Caveat lector.
37 *
38 * Implementation relies on the fact that "long long" is 64-bit on
39 * both 32- and 64-bit platforms. If some compiler vendor comes up
40 * with 128-bit long long, adjustment to sha.h would be required.
41 * As this implementation relies on 64-bit integer type, it's totally
42 * inappropriate for platforms which don't support it, most notably
43 * 16-bit platforms.
44 */
45#include <stdlib.h>
46#include <string.h>
47
48#include <openssl/crypto.h>
49#include <openssl/sha.h>
50#include <openssl/opensslv.h>
51
52#include "internal/cryptlib.h"
53#include "crypto/sha.h"
54
55#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57 defined(__s390__) || defined(__s390x__) || \
58 defined(__aarch64__) || \
59 defined(SHA512_ASM)
60# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
61#endif
62
63#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
64# define U64(C) C##UI64
65#elif defined(__arch64__)
66# define U64(C) C##UL
67#else
68# define U64(C) C##ULL
69#endif
70
71int sha512_224_init(SHA512_CTX *c)
72{
73 c->h[0] = U64(0x8c3d37c819544da2);
74 c->h[1] = U64(0x73e1996689dcd4d6);
75 c->h[2] = U64(0x1dfab7ae32ff9c82);
76 c->h[3] = U64(0x679dd514582f9fcf);
77 c->h[4] = U64(0x0f6d2b697bd44da8);
78 c->h[5] = U64(0x77e36f7304c48942);
79 c->h[6] = U64(0x3f9d85a86a1d36c8);
80 c->h[7] = U64(0x1112e6ad91d692a1);
81
82 c->Nl = 0;
83 c->Nh = 0;
84 c->num = 0;
85 c->md_len = SHA224_DIGEST_LENGTH;
86 return 1;
87}
88
89int sha512_256_init(SHA512_CTX *c)
90{
91 c->h[0] = U64(0x22312194fc2bf72c);
92 c->h[1] = U64(0x9f555fa3c84c64c2);
93 c->h[2] = U64(0x2393b86b6f53b151);
94 c->h[3] = U64(0x963877195940eabd);
95 c->h[4] = U64(0x96283ee2a88effe3);
96 c->h[5] = U64(0xbe5e1e2553863992);
97 c->h[6] = U64(0x2b0199fc2c85b8aa);
98 c->h[7] = U64(0x0eb72ddc81c52ca2);
99
100 c->Nl = 0;
101 c->Nh = 0;
102 c->num = 0;
103 c->md_len = SHA256_DIGEST_LENGTH;
104 return 1;
105}
106
107int SHA384_Init(SHA512_CTX *c)
108{
109 c->h[0] = U64(0xcbbb9d5dc1059ed8);
110 c->h[1] = U64(0x629a292a367cd507);
111 c->h[2] = U64(0x9159015a3070dd17);
112 c->h[3] = U64(0x152fecd8f70e5939);
113 c->h[4] = U64(0x67332667ffc00b31);
114 c->h[5] = U64(0x8eb44a8768581511);
115 c->h[6] = U64(0xdb0c2e0d64f98fa7);
116 c->h[7] = U64(0x47b5481dbefa4fa4);
117
118 c->Nl = 0;
119 c->Nh = 0;
120 c->num = 0;
121 c->md_len = SHA384_DIGEST_LENGTH;
122 return 1;
123}
124
125int SHA512_Init(SHA512_CTX *c)
126{
127 c->h[0] = U64(0x6a09e667f3bcc908);
128 c->h[1] = U64(0xbb67ae8584caa73b);
129 c->h[2] = U64(0x3c6ef372fe94f82b);
130 c->h[3] = U64(0xa54ff53a5f1d36f1);
131 c->h[4] = U64(0x510e527fade682d1);
132 c->h[5] = U64(0x9b05688c2b3e6c1f);
133 c->h[6] = U64(0x1f83d9abfb41bd6b);
134 c->h[7] = U64(0x5be0cd19137e2179);
135
136 c->Nl = 0;
137 c->Nh = 0;
138 c->num = 0;
139 c->md_len = SHA512_DIGEST_LENGTH;
140 return 1;
141}
142
143#ifndef SHA512_ASM
144static
145#endif
146void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
147
148int SHA512_Final(unsigned char *md, SHA512_CTX *c)
149{
150 unsigned char *p = (unsigned char *)c->u.p;
151 size_t n = c->num;
152
153 p[n] = 0x80; /* There always is a room for one */
154 n++;
155 if (n > (sizeof(c->u) - 16)) {
156 memset(p + n, 0, sizeof(c->u) - n);
157 n = 0;
158 sha512_block_data_order(c, p, 1);
159 }
160
161 memset(p + n, 0, sizeof(c->u) - 16 - n);
162#ifdef B_ENDIAN
163 c->u.d[SHA_LBLOCK - 2] = c->Nh;
164 c->u.d[SHA_LBLOCK - 1] = c->Nl;
165#else
166 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
167 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
168 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
169 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
170 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
171 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
172 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
173 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
174 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
175 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
176 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
177 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
178 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
179 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
180 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
181 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
182#endif
183
184 sha512_block_data_order(c, p, 1);
185
186 if (md == 0)
187 return 0;
188
189 switch (c->md_len) {
190 /* Let compiler decide if it's appropriate to unroll... */
191 case SHA224_DIGEST_LENGTH:
192 for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
193 SHA_LONG64 t = c->h[n];
194
195 *(md++) = (unsigned char)(t >> 56);
196 *(md++) = (unsigned char)(t >> 48);
197 *(md++) = (unsigned char)(t >> 40);
198 *(md++) = (unsigned char)(t >> 32);
199 *(md++) = (unsigned char)(t >> 24);
200 *(md++) = (unsigned char)(t >> 16);
201 *(md++) = (unsigned char)(t >> 8);
202 *(md++) = (unsigned char)(t);
203 }
204 /*
205 * For 224 bits, there are four bytes left over that have to be
206 * processed separately.
207 */
208 {
209 SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
210
211 *(md++) = (unsigned char)(t >> 56);
212 *(md++) = (unsigned char)(t >> 48);
213 *(md++) = (unsigned char)(t >> 40);
214 *(md++) = (unsigned char)(t >> 32);
215 }
216 break;
217 case SHA256_DIGEST_LENGTH:
218 for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
219 SHA_LONG64 t = c->h[n];
220
221 *(md++) = (unsigned char)(t >> 56);
222 *(md++) = (unsigned char)(t >> 48);
223 *(md++) = (unsigned char)(t >> 40);
224 *(md++) = (unsigned char)(t >> 32);
225 *(md++) = (unsigned char)(t >> 24);
226 *(md++) = (unsigned char)(t >> 16);
227 *(md++) = (unsigned char)(t >> 8);
228 *(md++) = (unsigned char)(t);
229 }
230 break;
231 case SHA384_DIGEST_LENGTH:
232 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
233 SHA_LONG64 t = c->h[n];
234
235 *(md++) = (unsigned char)(t >> 56);
236 *(md++) = (unsigned char)(t >> 48);
237 *(md++) = (unsigned char)(t >> 40);
238 *(md++) = (unsigned char)(t >> 32);
239 *(md++) = (unsigned char)(t >> 24);
240 *(md++) = (unsigned char)(t >> 16);
241 *(md++) = (unsigned char)(t >> 8);
242 *(md++) = (unsigned char)(t);
243 }
244 break;
245 case SHA512_DIGEST_LENGTH:
246 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
247 SHA_LONG64 t = c->h[n];
248
249 *(md++) = (unsigned char)(t >> 56);
250 *(md++) = (unsigned char)(t >> 48);
251 *(md++) = (unsigned char)(t >> 40);
252 *(md++) = (unsigned char)(t >> 32);
253 *(md++) = (unsigned char)(t >> 24);
254 *(md++) = (unsigned char)(t >> 16);
255 *(md++) = (unsigned char)(t >> 8);
256 *(md++) = (unsigned char)(t);
257 }
258 break;
259 /* ... as well as make sure md_len is not abused. */
260 default:
261 return 0;
262 }
263
264 return 1;
265}
266
267int SHA384_Final(unsigned char *md, SHA512_CTX *c)
268{
269 return SHA512_Final(md, c);
270}
271
272int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
273{
274 SHA_LONG64 l;
275 unsigned char *p = c->u.p;
276 const unsigned char *data = (const unsigned char *)_data;
277
278 if (len == 0)
279 return 1;
280
281 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
282 if (l < c->Nl)
283 c->Nh++;
284 if (sizeof(len) >= 8)
285 c->Nh += (((SHA_LONG64) len) >> 61);
286 c->Nl = l;
287
288 if (c->num != 0) {
289 size_t n = sizeof(c->u) - c->num;
290
291 if (len < n) {
292 memcpy(p + c->num, data, len), c->num += (unsigned int)len;
293 return 1;
294 } else {
295 memcpy(p + c->num, data, n), c->num = 0;
296 len -= n, data += n;
297 sha512_block_data_order(c, p, 1);
298 }
299 }
300
301 if (len >= sizeof(c->u)) {
302#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
303 if ((size_t)data % sizeof(c->u.d[0]) != 0)
304 while (len >= sizeof(c->u))
305 memcpy(p, data, sizeof(c->u)),
306 sha512_block_data_order(c, p, 1),
307 len -= sizeof(c->u), data += sizeof(c->u);
308 else
309#endif
310 sha512_block_data_order(c, data, len / sizeof(c->u)),
311 data += len, len %= sizeof(c->u), data -= len;
312 }
313
314 if (len != 0)
315 memcpy(p, data, len), c->num = (int)len;
316
317 return 1;
318}
319
320int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
321{
322 return SHA512_Update(c, data, len);
323}
324
325void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
326{
327#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
328 if ((size_t)data % sizeof(c->u.d[0]) != 0)
329 memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
330#endif
331 sha512_block_data_order(c, data, 1);
332}
333
334unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
335{
336 SHA512_CTX c;
337 static unsigned char m[SHA384_DIGEST_LENGTH];
338
339 if (md == NULL)
340 md = m;
341 SHA384_Init(&c);
342 SHA512_Update(&c, d, n);
343 SHA512_Final(md, &c);
344 OPENSSL_cleanse(&c, sizeof(c));
345 return md;
346}
347
348unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
349{
350 SHA512_CTX c;
351 static unsigned char m[SHA512_DIGEST_LENGTH];
352
353 if (md == NULL)
354 md = m;
355 SHA512_Init(&c);
356 SHA512_Update(&c, d, n);
357 SHA512_Final(md, &c);
358 OPENSSL_cleanse(&c, sizeof(c));
359 return md;
360}
361
362#ifndef SHA512_ASM
363static const SHA_LONG64 K512[80] = {
364 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
365 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
366 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
367 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
368 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
369 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
370 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
371 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
372 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
373 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
374 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
375 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
376 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
377 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
378 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
379 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
380 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
381 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
382 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
383 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
384 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
385 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
386 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
387 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
388 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
389 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
390 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
391 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
392 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
393 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
394 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
395 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
396 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
397 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
398 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
399 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
400 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
401 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
402 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
403 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
404};
405
406# ifndef PEDANTIC
407# if defined(__GNUC__) && __GNUC__>=2 && \
408 !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
409# if defined(__x86_64) || defined(__x86_64__)
410# define ROTR(a,n) ({ SHA_LONG64 ret; \
411 asm ("rorq %1,%0" \
412 : "=r"(ret) \
413 : "J"(n),"0"(a) \
414 : "cc"); ret; })
415# if !defined(B_ENDIAN)
416# define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
417 asm ("bswapq %0" \
418 : "=r"(ret) \
419 : "0"(ret)); ret; })
420# endif
421# elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
422# if defined(I386_ONLY)
423# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
424 unsigned int hi=p[0],lo=p[1]; \
425 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
426 "roll $16,%%eax; roll $16,%%edx; "\
427 "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
428 : "=a"(lo),"=d"(hi) \
429 : "0"(lo),"1"(hi) : "cc"); \
430 ((SHA_LONG64)hi)<<32|lo; })
431# else
432# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
433 unsigned int hi=p[0],lo=p[1]; \
434 asm ("bswapl %0; bswapl %1;" \
435 : "=r"(lo),"=r"(hi) \
436 : "0"(lo),"1"(hi)); \
437 ((SHA_LONG64)hi)<<32|lo; })
438# endif
439# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
440# define ROTR(a,n) ({ SHA_LONG64 ret; \
441 asm ("rotrdi %0,%1,%2" \
442 : "=r"(ret) \
443 : "r"(a),"K"(n)); ret; })
444# elif defined(__aarch64__)
445# define ROTR(a,n) ({ SHA_LONG64 ret; \
446 asm ("ror %0,%1,%2" \
447 : "=r"(ret) \
448 : "r"(a),"I"(n)); ret; })
449# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
450 __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
451# define PULL64(x) ({ SHA_LONG64 ret; \
452 asm ("rev %0,%1" \
453 : "=r"(ret) \
454 : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
455# endif
456# endif
457# elif defined(_MSC_VER)
458# if defined(_WIN64) /* applies to both IA-64 and AMD64 */
459# pragma intrinsic(_rotr64)
460# define ROTR(a,n) _rotr64((a),n)
461# endif
462# if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
463 !defined(OPENSSL_NO_INLINE_ASM)
464# if defined(I386_ONLY)
465static SHA_LONG64 __fastcall __pull64be(const void *x)
466{
467 _asm mov edx,[ecx + 0]
468 _asm mov eax,[ecx + 4]
469 _asm xchg dh, dl
470 _asm xchg ah, al
471 _asm rol edx, 16
472 _asm rol eax, 16
473 _asm xchg dh, dl
474 _asm xchg ah, al
475}
476# else
477static SHA_LONG64 __fastcall __pull64be(const void *x)
478{
479 _asm mov edx,[ecx + 0]
480 _asm mov eax,[ecx + 4]
481 _asm bswap edx
482 _asm bswap eax
483}
484# endif
485# define PULL64(x) __pull64be(&(x))
486# endif
487# endif
488# endif
489# ifndef PULL64
490# define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
491# define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
492# endif
493# ifndef ROTR
494# define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
495# endif
496# define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
497# define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
498# define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
499# define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
500# define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
501# define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
502
503# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
504/*
505 * This code should give better results on 32-bit CPU with less than
506 * ~24 registers, both size and performance wise...
507 */
508
509static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
510 size_t num)
511{
512 const SHA_LONG64 *W = in;
513 SHA_LONG64 A, E, T;
514 SHA_LONG64 X[9 + 80], *F;
515 int i;
516
517 while (num--) {
518
519 F = X + 80;
520 A = ctx->h[0];
521 F[1] = ctx->h[1];
522 F[2] = ctx->h[2];
523 F[3] = ctx->h[3];
524 E = ctx->h[4];
525 F[5] = ctx->h[5];
526 F[6] = ctx->h[6];
527 F[7] = ctx->h[7];
528
529 for (i = 0; i < 16; i++, F--) {
530# ifdef B_ENDIAN
531 T = W[i];
532# else
533 T = PULL64(W[i]);
534# endif
535 F[0] = A;
536 F[4] = E;
537 F[8] = T;
538 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
539 E = F[3] + T;
540 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
541 }
542
543 for (; i < 80; i++, F--) {
544 T = sigma0(F[8 + 16 - 1]);
545 T += sigma1(F[8 + 16 - 14]);
546 T += F[8 + 16] + F[8 + 16 - 9];
547
548 F[0] = A;
549 F[4] = E;
550 F[8] = T;
551 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
552 E = F[3] + T;
553 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
554 }
555
556 ctx->h[0] += A;
557 ctx->h[1] += F[1];
558 ctx->h[2] += F[2];
559 ctx->h[3] += F[3];
560 ctx->h[4] += E;
561 ctx->h[5] += F[5];
562 ctx->h[6] += F[6];
563 ctx->h[7] += F[7];
564
565 W += SHA_LBLOCK;
566 }
567}
568
569# elif defined(OPENSSL_SMALL_FOOTPRINT)
570
571static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
572 size_t num)
573{
574 const SHA_LONG64 *W = in;
575 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
576 SHA_LONG64 X[16];
577 int i;
578
579 while (num--) {
580
581 a = ctx->h[0];
582 b = ctx->h[1];
583 c = ctx->h[2];
584 d = ctx->h[3];
585 e = ctx->h[4];
586 f = ctx->h[5];
587 g = ctx->h[6];
588 h = ctx->h[7];
589
590 for (i = 0; i < 16; i++) {
591# ifdef B_ENDIAN
592 T1 = X[i] = W[i];
593# else
594 T1 = X[i] = PULL64(W[i]);
595# endif
596 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
597 T2 = Sigma0(a) + Maj(a, b, c);
598 h = g;
599 g = f;
600 f = e;
601 e = d + T1;
602 d = c;
603 c = b;
604 b = a;
605 a = T1 + T2;
606 }
607
608 for (; i < 80; i++) {
609 s0 = X[(i + 1) & 0x0f];
610 s0 = sigma0(s0);
611 s1 = X[(i + 14) & 0x0f];
612 s1 = sigma1(s1);
613
614 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
615 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
616 T2 = Sigma0(a) + Maj(a, b, c);
617 h = g;
618 g = f;
619 f = e;
620 e = d + T1;
621 d = c;
622 c = b;
623 b = a;
624 a = T1 + T2;
625 }
626
627 ctx->h[0] += a;
628 ctx->h[1] += b;
629 ctx->h[2] += c;
630 ctx->h[3] += d;
631 ctx->h[4] += e;
632 ctx->h[5] += f;
633 ctx->h[6] += g;
634 ctx->h[7] += h;
635
636 W += SHA_LBLOCK;
637 }
638}
639
640# else
641# define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
642 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
643 h = Sigma0(a) + Maj(a,b,c); \
644 d += T1; h += T1; } while (0)
645
646# define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
647 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
648 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
649 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
650 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
651
652static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
653 size_t num)
654{
655 const SHA_LONG64 *W = in;
656 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
657 SHA_LONG64 X[16];
658 int i;
659
660 while (num--) {
661
662 a = ctx->h[0];
663 b = ctx->h[1];
664 c = ctx->h[2];
665 d = ctx->h[3];
666 e = ctx->h[4];
667 f = ctx->h[5];
668 g = ctx->h[6];
669 h = ctx->h[7];
670
671# ifdef B_ENDIAN
672 T1 = X[0] = W[0];
673 ROUND_00_15(0, a, b, c, d, e, f, g, h);
674 T1 = X[1] = W[1];
675 ROUND_00_15(1, h, a, b, c, d, e, f, g);
676 T1 = X[2] = W[2];
677 ROUND_00_15(2, g, h, a, b, c, d, e, f);
678 T1 = X[3] = W[3];
679 ROUND_00_15(3, f, g, h, a, b, c, d, e);
680 T1 = X[4] = W[4];
681 ROUND_00_15(4, e, f, g, h, a, b, c, d);
682 T1 = X[5] = W[5];
683 ROUND_00_15(5, d, e, f, g, h, a, b, c);
684 T1 = X[6] = W[6];
685 ROUND_00_15(6, c, d, e, f, g, h, a, b);
686 T1 = X[7] = W[7];
687 ROUND_00_15(7, b, c, d, e, f, g, h, a);
688 T1 = X[8] = W[8];
689 ROUND_00_15(8, a, b, c, d, e, f, g, h);
690 T1 = X[9] = W[9];
691 ROUND_00_15(9, h, a, b, c, d, e, f, g);
692 T1 = X[10] = W[10];
693 ROUND_00_15(10, g, h, a, b, c, d, e, f);
694 T1 = X[11] = W[11];
695 ROUND_00_15(11, f, g, h, a, b, c, d, e);
696 T1 = X[12] = W[12];
697 ROUND_00_15(12, e, f, g, h, a, b, c, d);
698 T1 = X[13] = W[13];
699 ROUND_00_15(13, d, e, f, g, h, a, b, c);
700 T1 = X[14] = W[14];
701 ROUND_00_15(14, c, d, e, f, g, h, a, b);
702 T1 = X[15] = W[15];
703 ROUND_00_15(15, b, c, d, e, f, g, h, a);
704# else
705 T1 = X[0] = PULL64(W[0]);
706 ROUND_00_15(0, a, b, c, d, e, f, g, h);
707 T1 = X[1] = PULL64(W[1]);
708 ROUND_00_15(1, h, a, b, c, d, e, f, g);
709 T1 = X[2] = PULL64(W[2]);
710 ROUND_00_15(2, g, h, a, b, c, d, e, f);
711 T1 = X[3] = PULL64(W[3]);
712 ROUND_00_15(3, f, g, h, a, b, c, d, e);
713 T1 = X[4] = PULL64(W[4]);
714 ROUND_00_15(4, e, f, g, h, a, b, c, d);
715 T1 = X[5] = PULL64(W[5]);
716 ROUND_00_15(5, d, e, f, g, h, a, b, c);
717 T1 = X[6] = PULL64(W[6]);
718 ROUND_00_15(6, c, d, e, f, g, h, a, b);
719 T1 = X[7] = PULL64(W[7]);
720 ROUND_00_15(7, b, c, d, e, f, g, h, a);
721 T1 = X[8] = PULL64(W[8]);
722 ROUND_00_15(8, a, b, c, d, e, f, g, h);
723 T1 = X[9] = PULL64(W[9]);
724 ROUND_00_15(9, h, a, b, c, d, e, f, g);
725 T1 = X[10] = PULL64(W[10]);
726 ROUND_00_15(10, g, h, a, b, c, d, e, f);
727 T1 = X[11] = PULL64(W[11]);
728 ROUND_00_15(11, f, g, h, a, b, c, d, e);
729 T1 = X[12] = PULL64(W[12]);
730 ROUND_00_15(12, e, f, g, h, a, b, c, d);
731 T1 = X[13] = PULL64(W[13]);
732 ROUND_00_15(13, d, e, f, g, h, a, b, c);
733 T1 = X[14] = PULL64(W[14]);
734 ROUND_00_15(14, c, d, e, f, g, h, a, b);
735 T1 = X[15] = PULL64(W[15]);
736 ROUND_00_15(15, b, c, d, e, f, g, h, a);
737# endif
738
739 for (i = 16; i < 80; i += 16) {
740 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
741 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
742 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
743 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
744 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
745 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
746 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
747 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
748 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
749 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
750 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
751 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
752 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
753 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
754 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
755 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
756 }
757
758 ctx->h[0] += a;
759 ctx->h[1] += b;
760 ctx->h[2] += c;
761 ctx->h[3] += d;
762 ctx->h[4] += e;
763 ctx->h[5] += f;
764 ctx->h[6] += g;
765 ctx->h[7] += h;
766
767 W += SHA_LBLOCK;
768 }
769}
770
771# endif
772
773#endif /* SHA512_ASM */
774