1 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
2 | * All rights reserved. |
3 | * |
4 | * This package is an SSL implementation written |
5 | * by Eric Young (eay@cryptsoft.com). |
6 | * The implementation was written so as to conform with Netscapes SSL. |
7 | * |
8 | * This library is free for commercial and non-commercial use as long as |
9 | * the following conditions are aheared to. The following conditions |
10 | * apply to all code found in this distribution, be it the RC4, RSA, |
11 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation |
12 | * included with this distribution is covered by the same copyright terms |
13 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). |
14 | * |
15 | * Copyright remains Eric Young's, and as such any Copyright notices in |
16 | * the code are not to be removed. |
17 | * If this package is used in a product, Eric Young should be given attribution |
18 | * as the author of the parts of the library used. |
19 | * This can be in the form of a textual message at program startup or |
20 | * in documentation (online or textual) provided with the package. |
21 | * |
22 | * Redistribution and use in source and binary forms, with or without |
23 | * modification, are permitted provided that the following conditions |
24 | * are met: |
25 | * 1. Redistributions of source code must retain the copyright |
26 | * notice, this list of conditions and the following disclaimer. |
27 | * 2. Redistributions in binary form must reproduce the above copyright |
28 | * notice, this list of conditions and the following disclaimer in the |
29 | * documentation and/or other materials provided with the distribution. |
30 | * 3. All advertising materials mentioning features or use of this software |
31 | * must display the following acknowledgement: |
32 | * "This product includes cryptographic software written by |
33 | * Eric Young (eay@cryptsoft.com)" |
34 | * The word 'cryptographic' can be left out if the rouines from the library |
35 | * being used are not cryptographic related :-). |
36 | * 4. If you include any Windows specific code (or a derivative thereof) from |
37 | * the apps directory (application code) you must include an acknowledgement: |
38 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" |
39 | * |
40 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND |
41 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
42 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
43 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
44 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
45 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
46 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
47 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
48 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
49 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
50 | * SUCH DAMAGE. |
51 | * |
52 | * The licence and distribution terms for any publically available version or |
53 | * derivative of this code cannot be changed. i.e. this code cannot simply be |
54 | * copied and put under another distribution licence |
55 | * [including the GNU Public Licence.] */ |
56 | |
57 | #include <openssl/bn.h> |
58 | |
59 | #include <assert.h> |
60 | |
61 | #include "internal.h" |
62 | |
63 | |
64 | // This file has two other implementations: x86 assembly language in |
65 | // asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c. |
66 | #if defined(OPENSSL_NO_ASM) || \ |
67 | !(defined(OPENSSL_X86) || \ |
68 | (defined(OPENSSL_X86_64) && (defined(__GNUC__) || defined(__clang__)))) |
69 | |
70 | #ifdef BN_ULLONG |
71 | #define mul_add(r, a, w, c) \ |
72 | do { \ |
73 | BN_ULLONG t; \ |
74 | t = (BN_ULLONG)(w) * (a) + (r) + (c); \ |
75 | (r) = Lw(t); \ |
76 | (c) = Hw(t); \ |
77 | } while (0) |
78 | |
79 | #define mul(r, a, w, c) \ |
80 | do { \ |
81 | BN_ULLONG t; \ |
82 | t = (BN_ULLONG)(w) * (a) + (c); \ |
83 | (r) = Lw(t); \ |
84 | (c) = Hw(t); \ |
85 | } while (0) |
86 | |
87 | #define sqr(r0, r1, a) \ |
88 | do { \ |
89 | BN_ULLONG t; \ |
90 | t = (BN_ULLONG)(a) * (a); \ |
91 | (r0) = Lw(t); \ |
92 | (r1) = Hw(t); \ |
93 | } while (0) |
94 | |
95 | #else |
96 | |
97 | #define mul_add(r, a, w, c) \ |
98 | do { \ |
99 | BN_ULONG high, low, ret, tmp = (a); \ |
100 | ret = (r); \ |
101 | BN_UMULT_LOHI(low, high, w, tmp); \ |
102 | ret += (c); \ |
103 | (c) = (ret < (c)) ? 1 : 0; \ |
104 | (c) += high; \ |
105 | ret += low; \ |
106 | (c) += (ret < low) ? 1 : 0; \ |
107 | (r) = ret; \ |
108 | } while (0) |
109 | |
110 | #define mul(r, a, w, c) \ |
111 | do { \ |
112 | BN_ULONG high, low, ret, ta = (a); \ |
113 | BN_UMULT_LOHI(low, high, w, ta); \ |
114 | ret = low + (c); \ |
115 | (c) = high; \ |
116 | (c) += (ret < low) ? 1 : 0; \ |
117 | (r) = ret; \ |
118 | } while (0) |
119 | |
120 | #define sqr(r0, r1, a) \ |
121 | do { \ |
122 | BN_ULONG tmp = (a); \ |
123 | BN_UMULT_LOHI(r0, r1, tmp, tmp); \ |
124 | } while (0) |
125 | |
126 | #endif // !BN_ULLONG |
127 | |
128 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num, |
129 | BN_ULONG w) { |
130 | BN_ULONG c1 = 0; |
131 | |
132 | if (num == 0) { |
133 | return c1; |
134 | } |
135 | |
136 | while (num & ~3) { |
137 | mul_add(rp[0], ap[0], w, c1); |
138 | mul_add(rp[1], ap[1], w, c1); |
139 | mul_add(rp[2], ap[2], w, c1); |
140 | mul_add(rp[3], ap[3], w, c1); |
141 | ap += 4; |
142 | rp += 4; |
143 | num -= 4; |
144 | } |
145 | |
146 | while (num) { |
147 | mul_add(rp[0], ap[0], w, c1); |
148 | ap++; |
149 | rp++; |
150 | num--; |
151 | } |
152 | |
153 | return c1; |
154 | } |
155 | |
156 | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num, |
157 | BN_ULONG w) { |
158 | BN_ULONG c1 = 0; |
159 | |
160 | if (num == 0) { |
161 | return c1; |
162 | } |
163 | |
164 | while (num & ~3) { |
165 | mul(rp[0], ap[0], w, c1); |
166 | mul(rp[1], ap[1], w, c1); |
167 | mul(rp[2], ap[2], w, c1); |
168 | mul(rp[3], ap[3], w, c1); |
169 | ap += 4; |
170 | rp += 4; |
171 | num -= 4; |
172 | } |
173 | while (num) { |
174 | mul(rp[0], ap[0], w, c1); |
175 | ap++; |
176 | rp++; |
177 | num--; |
178 | } |
179 | return c1; |
180 | } |
181 | |
182 | void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) { |
183 | if (n == 0) { |
184 | return; |
185 | } |
186 | |
187 | while (n & ~3) { |
188 | sqr(r[0], r[1], a[0]); |
189 | sqr(r[2], r[3], a[1]); |
190 | sqr(r[4], r[5], a[2]); |
191 | sqr(r[6], r[7], a[3]); |
192 | a += 4; |
193 | r += 8; |
194 | n -= 4; |
195 | } |
196 | while (n) { |
197 | sqr(r[0], r[1], a[0]); |
198 | a++; |
199 | r += 2; |
200 | n--; |
201 | } |
202 | } |
203 | |
204 | #ifdef BN_ULLONG |
205 | BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, |
206 | size_t n) { |
207 | BN_ULLONG ll = 0; |
208 | |
209 | if (n == 0) { |
210 | return 0; |
211 | } |
212 | |
213 | while (n & ~3) { |
214 | ll += (BN_ULLONG)a[0] + b[0]; |
215 | r[0] = (BN_ULONG)ll; |
216 | ll >>= BN_BITS2; |
217 | ll += (BN_ULLONG)a[1] + b[1]; |
218 | r[1] = (BN_ULONG)ll; |
219 | ll >>= BN_BITS2; |
220 | ll += (BN_ULLONG)a[2] + b[2]; |
221 | r[2] = (BN_ULONG)ll; |
222 | ll >>= BN_BITS2; |
223 | ll += (BN_ULLONG)a[3] + b[3]; |
224 | r[3] = (BN_ULONG)ll; |
225 | ll >>= BN_BITS2; |
226 | a += 4; |
227 | b += 4; |
228 | r += 4; |
229 | n -= 4; |
230 | } |
231 | while (n) { |
232 | ll += (BN_ULLONG)a[0] + b[0]; |
233 | r[0] = (BN_ULONG)ll; |
234 | ll >>= BN_BITS2; |
235 | a++; |
236 | b++; |
237 | r++; |
238 | n--; |
239 | } |
240 | return (BN_ULONG)ll; |
241 | } |
242 | |
243 | #else // !BN_ULLONG |
244 | |
245 | BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, |
246 | size_t n) { |
247 | BN_ULONG c, l, t; |
248 | |
249 | if (n == 0) { |
250 | return (BN_ULONG)0; |
251 | } |
252 | |
253 | c = 0; |
254 | while (n & ~3) { |
255 | t = a[0]; |
256 | t += c; |
257 | c = (t < c); |
258 | l = t + b[0]; |
259 | c += (l < t); |
260 | r[0] = l; |
261 | t = a[1]; |
262 | t += c; |
263 | c = (t < c); |
264 | l = t + b[1]; |
265 | c += (l < t); |
266 | r[1] = l; |
267 | t = a[2]; |
268 | t += c; |
269 | c = (t < c); |
270 | l = t + b[2]; |
271 | c += (l < t); |
272 | r[2] = l; |
273 | t = a[3]; |
274 | t += c; |
275 | c = (t < c); |
276 | l = t + b[3]; |
277 | c += (l < t); |
278 | r[3] = l; |
279 | a += 4; |
280 | b += 4; |
281 | r += 4; |
282 | n -= 4; |
283 | } |
284 | while (n) { |
285 | t = a[0]; |
286 | t += c; |
287 | c = (t < c); |
288 | l = t + b[0]; |
289 | c += (l < t); |
290 | r[0] = l; |
291 | a++; |
292 | b++; |
293 | r++; |
294 | n--; |
295 | } |
296 | return (BN_ULONG)c; |
297 | } |
298 | |
299 | #endif // !BN_ULLONG |
300 | |
301 | BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, |
302 | size_t n) { |
303 | BN_ULONG t1, t2; |
304 | int c = 0; |
305 | |
306 | if (n == 0) { |
307 | return (BN_ULONG)0; |
308 | } |
309 | |
310 | while (n & ~3) { |
311 | t1 = a[0]; |
312 | t2 = b[0]; |
313 | r[0] = t1 - t2 - c; |
314 | if (t1 != t2) { |
315 | c = (t1 < t2); |
316 | } |
317 | t1 = a[1]; |
318 | t2 = b[1]; |
319 | r[1] = t1 - t2 - c; |
320 | if (t1 != t2) { |
321 | c = (t1 < t2); |
322 | } |
323 | t1 = a[2]; |
324 | t2 = b[2]; |
325 | r[2] = t1 - t2 - c; |
326 | if (t1 != t2) { |
327 | c = (t1 < t2); |
328 | } |
329 | t1 = a[3]; |
330 | t2 = b[3]; |
331 | r[3] = t1 - t2 - c; |
332 | if (t1 != t2) { |
333 | c = (t1 < t2); |
334 | } |
335 | a += 4; |
336 | b += 4; |
337 | r += 4; |
338 | n -= 4; |
339 | } |
340 | while (n) { |
341 | t1 = a[0]; |
342 | t2 = b[0]; |
343 | r[0] = t1 - t2 - c; |
344 | if (t1 != t2) { |
345 | c = (t1 < t2); |
346 | } |
347 | a++; |
348 | b++; |
349 | r++; |
350 | n--; |
351 | } |
352 | return c; |
353 | } |
354 | |
355 | // mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) |
356 | // mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) |
357 | // sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) |
358 | // sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) |
359 | |
360 | #ifdef BN_ULLONG |
361 | |
362 | // Keep in mind that additions to multiplication result can not overflow, |
363 | // because its high half cannot be all-ones. |
364 | #define mul_add_c(a, b, c0, c1, c2) \ |
365 | do { \ |
366 | BN_ULONG hi; \ |
367 | BN_ULLONG t = (BN_ULLONG)(a) * (b); \ |
368 | t += (c0); /* no carry */ \ |
369 | (c0) = (BN_ULONG)Lw(t); \ |
370 | hi = (BN_ULONG)Hw(t); \ |
371 | (c1) += (hi); \ |
372 | if ((c1) < hi) { \ |
373 | (c2)++; \ |
374 | } \ |
375 | } while (0) |
376 | |
377 | #define mul_add_c2(a, b, c0, c1, c2) \ |
378 | do { \ |
379 | BN_ULONG hi; \ |
380 | BN_ULLONG t = (BN_ULLONG)(a) * (b); \ |
381 | BN_ULLONG tt = t + (c0); /* no carry */ \ |
382 | (c0) = (BN_ULONG)Lw(tt); \ |
383 | hi = (BN_ULONG)Hw(tt); \ |
384 | (c1) += hi; \ |
385 | if ((c1) < hi) { \ |
386 | (c2)++; \ |
387 | } \ |
388 | t += (c0); /* no carry */ \ |
389 | (c0) = (BN_ULONG)Lw(t); \ |
390 | hi = (BN_ULONG)Hw(t); \ |
391 | (c1) += hi; \ |
392 | if ((c1) < hi) { \ |
393 | (c2)++; \ |
394 | } \ |
395 | } while (0) |
396 | |
397 | #define sqr_add_c(a, i, c0, c1, c2) \ |
398 | do { \ |
399 | BN_ULONG hi; \ |
400 | BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \ |
401 | t += (c0); /* no carry */ \ |
402 | (c0) = (BN_ULONG)Lw(t); \ |
403 | hi = (BN_ULONG)Hw(t); \ |
404 | (c1) += hi; \ |
405 | if ((c1) < hi) { \ |
406 | (c2)++; \ |
407 | } \ |
408 | } while (0) |
409 | |
410 | #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2) |
411 | |
412 | #else |
413 | |
414 | // Keep in mind that additions to hi can not overflow, because the high word of |
415 | // a multiplication result cannot be all-ones. |
416 | #define mul_add_c(a, b, c0, c1, c2) \ |
417 | do { \ |
418 | BN_ULONG ta = (a), tb = (b); \ |
419 | BN_ULONG lo, hi; \ |
420 | BN_UMULT_LOHI(lo, hi, ta, tb); \ |
421 | (c0) += lo; \ |
422 | hi += ((c0) < lo) ? 1 : 0; \ |
423 | (c1) += hi; \ |
424 | (c2) += ((c1) < hi) ? 1 : 0; \ |
425 | } while (0) |
426 | |
427 | #define mul_add_c2(a, b, c0, c1, c2) \ |
428 | do { \ |
429 | BN_ULONG ta = (a), tb = (b); \ |
430 | BN_ULONG lo, hi, tt; \ |
431 | BN_UMULT_LOHI(lo, hi, ta, tb); \ |
432 | (c0) += lo; \ |
433 | tt = hi + (((c0) < lo) ? 1 : 0); \ |
434 | (c1) += tt; \ |
435 | (c2) += ((c1) < tt) ? 1 : 0; \ |
436 | (c0) += lo; \ |
437 | hi += (c0 < lo) ? 1 : 0; \ |
438 | (c1) += hi; \ |
439 | (c2) += ((c1) < hi) ? 1 : 0; \ |
440 | } while (0) |
441 | |
442 | #define sqr_add_c(a, i, c0, c1, c2) \ |
443 | do { \ |
444 | BN_ULONG ta = (a)[i]; \ |
445 | BN_ULONG lo, hi; \ |
446 | BN_UMULT_LOHI(lo, hi, ta, ta); \ |
447 | (c0) += lo; \ |
448 | hi += (c0 < lo) ? 1 : 0; \ |
449 | (c1) += hi; \ |
450 | (c2) += ((c1) < hi) ? 1 : 0; \ |
451 | } while (0) |
452 | |
453 | #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2) |
454 | |
455 | #endif // !BN_ULLONG |
456 | |
457 | void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) { |
458 | BN_ULONG c1, c2, c3; |
459 | |
460 | c1 = 0; |
461 | c2 = 0; |
462 | c3 = 0; |
463 | mul_add_c(a[0], b[0], c1, c2, c3); |
464 | r[0] = c1; |
465 | c1 = 0; |
466 | mul_add_c(a[0], b[1], c2, c3, c1); |
467 | mul_add_c(a[1], b[0], c2, c3, c1); |
468 | r[1] = c2; |
469 | c2 = 0; |
470 | mul_add_c(a[2], b[0], c3, c1, c2); |
471 | mul_add_c(a[1], b[1], c3, c1, c2); |
472 | mul_add_c(a[0], b[2], c3, c1, c2); |
473 | r[2] = c3; |
474 | c3 = 0; |
475 | mul_add_c(a[0], b[3], c1, c2, c3); |
476 | mul_add_c(a[1], b[2], c1, c2, c3); |
477 | mul_add_c(a[2], b[1], c1, c2, c3); |
478 | mul_add_c(a[3], b[0], c1, c2, c3); |
479 | r[3] = c1; |
480 | c1 = 0; |
481 | mul_add_c(a[4], b[0], c2, c3, c1); |
482 | mul_add_c(a[3], b[1], c2, c3, c1); |
483 | mul_add_c(a[2], b[2], c2, c3, c1); |
484 | mul_add_c(a[1], b[3], c2, c3, c1); |
485 | mul_add_c(a[0], b[4], c2, c3, c1); |
486 | r[4] = c2; |
487 | c2 = 0; |
488 | mul_add_c(a[0], b[5], c3, c1, c2); |
489 | mul_add_c(a[1], b[4], c3, c1, c2); |
490 | mul_add_c(a[2], b[3], c3, c1, c2); |
491 | mul_add_c(a[3], b[2], c3, c1, c2); |
492 | mul_add_c(a[4], b[1], c3, c1, c2); |
493 | mul_add_c(a[5], b[0], c3, c1, c2); |
494 | r[5] = c3; |
495 | c3 = 0; |
496 | mul_add_c(a[6], b[0], c1, c2, c3); |
497 | mul_add_c(a[5], b[1], c1, c2, c3); |
498 | mul_add_c(a[4], b[2], c1, c2, c3); |
499 | mul_add_c(a[3], b[3], c1, c2, c3); |
500 | mul_add_c(a[2], b[4], c1, c2, c3); |
501 | mul_add_c(a[1], b[5], c1, c2, c3); |
502 | mul_add_c(a[0], b[6], c1, c2, c3); |
503 | r[6] = c1; |
504 | c1 = 0; |
505 | mul_add_c(a[0], b[7], c2, c3, c1); |
506 | mul_add_c(a[1], b[6], c2, c3, c1); |
507 | mul_add_c(a[2], b[5], c2, c3, c1); |
508 | mul_add_c(a[3], b[4], c2, c3, c1); |
509 | mul_add_c(a[4], b[3], c2, c3, c1); |
510 | mul_add_c(a[5], b[2], c2, c3, c1); |
511 | mul_add_c(a[6], b[1], c2, c3, c1); |
512 | mul_add_c(a[7], b[0], c2, c3, c1); |
513 | r[7] = c2; |
514 | c2 = 0; |
515 | mul_add_c(a[7], b[1], c3, c1, c2); |
516 | mul_add_c(a[6], b[2], c3, c1, c2); |
517 | mul_add_c(a[5], b[3], c3, c1, c2); |
518 | mul_add_c(a[4], b[4], c3, c1, c2); |
519 | mul_add_c(a[3], b[5], c3, c1, c2); |
520 | mul_add_c(a[2], b[6], c3, c1, c2); |
521 | mul_add_c(a[1], b[7], c3, c1, c2); |
522 | r[8] = c3; |
523 | c3 = 0; |
524 | mul_add_c(a[2], b[7], c1, c2, c3); |
525 | mul_add_c(a[3], b[6], c1, c2, c3); |
526 | mul_add_c(a[4], b[5], c1, c2, c3); |
527 | mul_add_c(a[5], b[4], c1, c2, c3); |
528 | mul_add_c(a[6], b[3], c1, c2, c3); |
529 | mul_add_c(a[7], b[2], c1, c2, c3); |
530 | r[9] = c1; |
531 | c1 = 0; |
532 | mul_add_c(a[7], b[3], c2, c3, c1); |
533 | mul_add_c(a[6], b[4], c2, c3, c1); |
534 | mul_add_c(a[5], b[5], c2, c3, c1); |
535 | mul_add_c(a[4], b[6], c2, c3, c1); |
536 | mul_add_c(a[3], b[7], c2, c3, c1); |
537 | r[10] = c2; |
538 | c2 = 0; |
539 | mul_add_c(a[4], b[7], c3, c1, c2); |
540 | mul_add_c(a[5], b[6], c3, c1, c2); |
541 | mul_add_c(a[6], b[5], c3, c1, c2); |
542 | mul_add_c(a[7], b[4], c3, c1, c2); |
543 | r[11] = c3; |
544 | c3 = 0; |
545 | mul_add_c(a[7], b[5], c1, c2, c3); |
546 | mul_add_c(a[6], b[6], c1, c2, c3); |
547 | mul_add_c(a[5], b[7], c1, c2, c3); |
548 | r[12] = c1; |
549 | c1 = 0; |
550 | mul_add_c(a[6], b[7], c2, c3, c1); |
551 | mul_add_c(a[7], b[6], c2, c3, c1); |
552 | r[13] = c2; |
553 | c2 = 0; |
554 | mul_add_c(a[7], b[7], c3, c1, c2); |
555 | r[14] = c3; |
556 | r[15] = c1; |
557 | } |
558 | |
559 | void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) { |
560 | BN_ULONG c1, c2, c3; |
561 | |
562 | c1 = 0; |
563 | c2 = 0; |
564 | c3 = 0; |
565 | mul_add_c(a[0], b[0], c1, c2, c3); |
566 | r[0] = c1; |
567 | c1 = 0; |
568 | mul_add_c(a[0], b[1], c2, c3, c1); |
569 | mul_add_c(a[1], b[0], c2, c3, c1); |
570 | r[1] = c2; |
571 | c2 = 0; |
572 | mul_add_c(a[2], b[0], c3, c1, c2); |
573 | mul_add_c(a[1], b[1], c3, c1, c2); |
574 | mul_add_c(a[0], b[2], c3, c1, c2); |
575 | r[2] = c3; |
576 | c3 = 0; |
577 | mul_add_c(a[0], b[3], c1, c2, c3); |
578 | mul_add_c(a[1], b[2], c1, c2, c3); |
579 | mul_add_c(a[2], b[1], c1, c2, c3); |
580 | mul_add_c(a[3], b[0], c1, c2, c3); |
581 | r[3] = c1; |
582 | c1 = 0; |
583 | mul_add_c(a[3], b[1], c2, c3, c1); |
584 | mul_add_c(a[2], b[2], c2, c3, c1); |
585 | mul_add_c(a[1], b[3], c2, c3, c1); |
586 | r[4] = c2; |
587 | c2 = 0; |
588 | mul_add_c(a[2], b[3], c3, c1, c2); |
589 | mul_add_c(a[3], b[2], c3, c1, c2); |
590 | r[5] = c3; |
591 | c3 = 0; |
592 | mul_add_c(a[3], b[3], c1, c2, c3); |
593 | r[6] = c1; |
594 | r[7] = c2; |
595 | } |
596 | |
597 | void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) { |
598 | BN_ULONG c1, c2, c3; |
599 | |
600 | c1 = 0; |
601 | c2 = 0; |
602 | c3 = 0; |
603 | sqr_add_c(a, 0, c1, c2, c3); |
604 | r[0] = c1; |
605 | c1 = 0; |
606 | sqr_add_c2(a, 1, 0, c2, c3, c1); |
607 | r[1] = c2; |
608 | c2 = 0; |
609 | sqr_add_c(a, 1, c3, c1, c2); |
610 | sqr_add_c2(a, 2, 0, c3, c1, c2); |
611 | r[2] = c3; |
612 | c3 = 0; |
613 | sqr_add_c2(a, 3, 0, c1, c2, c3); |
614 | sqr_add_c2(a, 2, 1, c1, c2, c3); |
615 | r[3] = c1; |
616 | c1 = 0; |
617 | sqr_add_c(a, 2, c2, c3, c1); |
618 | sqr_add_c2(a, 3, 1, c2, c3, c1); |
619 | sqr_add_c2(a, 4, 0, c2, c3, c1); |
620 | r[4] = c2; |
621 | c2 = 0; |
622 | sqr_add_c2(a, 5, 0, c3, c1, c2); |
623 | sqr_add_c2(a, 4, 1, c3, c1, c2); |
624 | sqr_add_c2(a, 3, 2, c3, c1, c2); |
625 | r[5] = c3; |
626 | c3 = 0; |
627 | sqr_add_c(a, 3, c1, c2, c3); |
628 | sqr_add_c2(a, 4, 2, c1, c2, c3); |
629 | sqr_add_c2(a, 5, 1, c1, c2, c3); |
630 | sqr_add_c2(a, 6, 0, c1, c2, c3); |
631 | r[6] = c1; |
632 | c1 = 0; |
633 | sqr_add_c2(a, 7, 0, c2, c3, c1); |
634 | sqr_add_c2(a, 6, 1, c2, c3, c1); |
635 | sqr_add_c2(a, 5, 2, c2, c3, c1); |
636 | sqr_add_c2(a, 4, 3, c2, c3, c1); |
637 | r[7] = c2; |
638 | c2 = 0; |
639 | sqr_add_c(a, 4, c3, c1, c2); |
640 | sqr_add_c2(a, 5, 3, c3, c1, c2); |
641 | sqr_add_c2(a, 6, 2, c3, c1, c2); |
642 | sqr_add_c2(a, 7, 1, c3, c1, c2); |
643 | r[8] = c3; |
644 | c3 = 0; |
645 | sqr_add_c2(a, 7, 2, c1, c2, c3); |
646 | sqr_add_c2(a, 6, 3, c1, c2, c3); |
647 | sqr_add_c2(a, 5, 4, c1, c2, c3); |
648 | r[9] = c1; |
649 | c1 = 0; |
650 | sqr_add_c(a, 5, c2, c3, c1); |
651 | sqr_add_c2(a, 6, 4, c2, c3, c1); |
652 | sqr_add_c2(a, 7, 3, c2, c3, c1); |
653 | r[10] = c2; |
654 | c2 = 0; |
655 | sqr_add_c2(a, 7, 4, c3, c1, c2); |
656 | sqr_add_c2(a, 6, 5, c3, c1, c2); |
657 | r[11] = c3; |
658 | c3 = 0; |
659 | sqr_add_c(a, 6, c1, c2, c3); |
660 | sqr_add_c2(a, 7, 5, c1, c2, c3); |
661 | r[12] = c1; |
662 | c1 = 0; |
663 | sqr_add_c2(a, 7, 6, c2, c3, c1); |
664 | r[13] = c2; |
665 | c2 = 0; |
666 | sqr_add_c(a, 7, c3, c1, c2); |
667 | r[14] = c3; |
668 | r[15] = c1; |
669 | } |
670 | |
671 | void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) { |
672 | BN_ULONG c1, c2, c3; |
673 | |
674 | c1 = 0; |
675 | c2 = 0; |
676 | c3 = 0; |
677 | sqr_add_c(a, 0, c1, c2, c3); |
678 | r[0] = c1; |
679 | c1 = 0; |
680 | sqr_add_c2(a, 1, 0, c2, c3, c1); |
681 | r[1] = c2; |
682 | c2 = 0; |
683 | sqr_add_c(a, 1, c3, c1, c2); |
684 | sqr_add_c2(a, 2, 0, c3, c1, c2); |
685 | r[2] = c3; |
686 | c3 = 0; |
687 | sqr_add_c2(a, 3, 0, c1, c2, c3); |
688 | sqr_add_c2(a, 2, 1, c1, c2, c3); |
689 | r[3] = c1; |
690 | c1 = 0; |
691 | sqr_add_c(a, 2, c2, c3, c1); |
692 | sqr_add_c2(a, 3, 1, c2, c3, c1); |
693 | r[4] = c2; |
694 | c2 = 0; |
695 | sqr_add_c2(a, 3, 2, c3, c1, c2); |
696 | r[5] = c3; |
697 | c3 = 0; |
698 | sqr_add_c(a, 3, c1, c2, c3); |
699 | r[6] = c1; |
700 | r[7] = c2; |
701 | } |
702 | |
703 | #undef mul_add |
704 | #undef mul |
705 | #undef sqr |
706 | #undef mul_add_c |
707 | #undef mul_add_c2 |
708 | #undef sqr_add_c |
709 | #undef sqr_add_c2 |
710 | |
711 | #endif |
712 | |