1/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
2 * All rights reserved.
3 *
4 * This package is an SSL implementation written
5 * by Eric Young (eay@cryptsoft.com).
6 * The implementation was written so as to conform with Netscapes SSL.
7 *
8 * This library is free for commercial and non-commercial use as long as
9 * the following conditions are aheared to. The following conditions
10 * apply to all code found in this distribution, be it the RC4, RSA,
11 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
12 * included with this distribution is covered by the same copyright terms
13 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
14 *
15 * Copyright remains Eric Young's, and as such any Copyright notices in
16 * the code are not to be removed.
17 * If this package is used in a product, Eric Young should be given attribution
18 * as the author of the parts of the library used.
19 * This can be in the form of a textual message at program startup or
20 * in documentation (online or textual) provided with the package.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 3. All advertising materials mentioning features or use of this software
31 * must display the following acknowledgement:
32 * "This product includes cryptographic software written by
33 * Eric Young (eay@cryptsoft.com)"
34 * The word 'cryptographic' can be left out if the rouines from the library
35 * being used are not cryptographic related :-).
36 * 4. If you include any Windows specific code (or a derivative thereof) from
37 * the apps directory (application code) you must include an acknowledgement:
38 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
39 *
40 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 * SUCH DAMAGE.
51 *
52 * The licence and distribution terms for any publically available version or
53 * derivative of this code cannot be changed. i.e. this code cannot simply be
54 * copied and put under another distribution licence
55 * [including the GNU Public Licence.] */
56
57#include <openssl/bn.h>
58
59#include <assert.h>
60#include <stdlib.h>
61#include <string.h>
62
63#include <openssl/err.h>
64#include <openssl/mem.h>
65#include <openssl/type_check.h>
66
67#include "internal.h"
68#include "../../internal.h"
69
70
71#define BN_MUL_RECURSIVE_SIZE_NORMAL 16
72#define BN_SQR_RECURSIVE_SIZE_NORMAL BN_MUL_RECURSIVE_SIZE_NORMAL
73
74
75static void bn_abs_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
76 size_t num, BN_ULONG *tmp) {
77 BN_ULONG borrow = bn_sub_words(tmp, a, b, num);
78 bn_sub_words(r, b, a, num);
79 bn_select_words(r, 0 - borrow, r /* tmp < 0 */, tmp /* tmp >= 0 */, num);
80}
81
82static void bn_mul_normal(BN_ULONG *r, const BN_ULONG *a, size_t na,
83 const BN_ULONG *b, size_t nb) {
84 if (na < nb) {
85 size_t itmp = na;
86 na = nb;
87 nb = itmp;
88 const BN_ULONG *ltmp = a;
89 a = b;
90 b = ltmp;
91 }
92 BN_ULONG *rr = &(r[na]);
93 if (nb == 0) {
94 OPENSSL_memset(r, 0, na * sizeof(BN_ULONG));
95 return;
96 }
97 rr[0] = bn_mul_words(r, a, na, b[0]);
98
99 for (;;) {
100 if (--nb == 0) {
101 return;
102 }
103 rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
104 if (--nb == 0) {
105 return;
106 }
107 rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
108 if (--nb == 0) {
109 return;
110 }
111 rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
112 if (--nb == 0) {
113 return;
114 }
115 rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
116 rr += 4;
117 r += 4;
118 b += 4;
119 }
120}
121
122#if !defined(OPENSSL_X86) || defined(OPENSSL_NO_ASM)
123// Here follows specialised variants of bn_add_words() and bn_sub_words(). They
124// have the property performing operations on arrays of different sizes. The
125// sizes of those arrays is expressed through cl, which is the common length (
126// basicall, min(len(a),len(b)) ), and dl, which is the delta between the two
127// lengths, calculated as len(a)-len(b). All lengths are the number of
128// BN_ULONGs... For the operations that require a result array as parameter,
129// it must have the length cl+abs(dl). These functions should probably end up
130// in bn_asm.c as soon as there are assembler counterparts for the systems that
131// use assembler files.
132
133static BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a,
134 const BN_ULONG *b, int cl, int dl) {
135 BN_ULONG c, t;
136
137 assert(cl >= 0);
138 c = bn_sub_words(r, a, b, cl);
139
140 if (dl == 0) {
141 return c;
142 }
143
144 r += cl;
145 a += cl;
146 b += cl;
147
148 if (dl < 0) {
149 for (;;) {
150 t = b[0];
151 r[0] = 0 - t - c;
152 if (t != 0) {
153 c = 1;
154 }
155 if (++dl >= 0) {
156 break;
157 }
158
159 t = b[1];
160 r[1] = 0 - t - c;
161 if (t != 0) {
162 c = 1;
163 }
164 if (++dl >= 0) {
165 break;
166 }
167
168 t = b[2];
169 r[2] = 0 - t - c;
170 if (t != 0) {
171 c = 1;
172 }
173 if (++dl >= 0) {
174 break;
175 }
176
177 t = b[3];
178 r[3] = 0 - t - c;
179 if (t != 0) {
180 c = 1;
181 }
182 if (++dl >= 0) {
183 break;
184 }
185
186 b += 4;
187 r += 4;
188 }
189 } else {
190 int save_dl = dl;
191 while (c) {
192 t = a[0];
193 r[0] = t - c;
194 if (t != 0) {
195 c = 0;
196 }
197 if (--dl <= 0) {
198 break;
199 }
200
201 t = a[1];
202 r[1] = t - c;
203 if (t != 0) {
204 c = 0;
205 }
206 if (--dl <= 0) {
207 break;
208 }
209
210 t = a[2];
211 r[2] = t - c;
212 if (t != 0) {
213 c = 0;
214 }
215 if (--dl <= 0) {
216 break;
217 }
218
219 t = a[3];
220 r[3] = t - c;
221 if (t != 0) {
222 c = 0;
223 }
224 if (--dl <= 0) {
225 break;
226 }
227
228 save_dl = dl;
229 a += 4;
230 r += 4;
231 }
232 if (dl > 0) {
233 if (save_dl > dl) {
234 switch (save_dl - dl) {
235 case 1:
236 r[1] = a[1];
237 if (--dl <= 0) {
238 break;
239 }
240 OPENSSL_FALLTHROUGH;
241 case 2:
242 r[2] = a[2];
243 if (--dl <= 0) {
244 break;
245 }
246 OPENSSL_FALLTHROUGH;
247 case 3:
248 r[3] = a[3];
249 if (--dl <= 0) {
250 break;
251 }
252 }
253 a += 4;
254 r += 4;
255 }
256 }
257
258 if (dl > 0) {
259 for (;;) {
260 r[0] = a[0];
261 if (--dl <= 0) {
262 break;
263 }
264 r[1] = a[1];
265 if (--dl <= 0) {
266 break;
267 }
268 r[2] = a[2];
269 if (--dl <= 0) {
270 break;
271 }
272 r[3] = a[3];
273 if (--dl <= 0) {
274 break;
275 }
276
277 a += 4;
278 r += 4;
279 }
280 }
281 }
282
283 return c;
284}
285#else
286// On other platforms the function is defined in asm.
287BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
288 int cl, int dl);
289#endif
290
291// bn_abs_sub_part_words computes |r| = |a| - |b|, storing the absolute value
292// and returning a mask of all ones if the result was negative and all zeros if
293// the result was positive. |cl| and |dl| follow the |bn_sub_part_words| calling
294// convention.
295//
296// TODO(davidben): Make this take |size_t|. The |cl| + |dl| calling convention
297// is confusing. The trouble is 32-bit x86 implements |bn_sub_part_words| in
298// assembly, but we can probably just delete it?
299static BN_ULONG bn_abs_sub_part_words(BN_ULONG *r, const BN_ULONG *a,
300 const BN_ULONG *b, int cl, int dl,
301 BN_ULONG *tmp) {
302 BN_ULONG borrow = bn_sub_part_words(tmp, a, b, cl, dl);
303 bn_sub_part_words(r, b, a, cl, -dl);
304 int r_len = cl + (dl < 0 ? -dl : dl);
305 borrow = 0 - borrow;
306 bn_select_words(r, borrow, r /* tmp < 0 */, tmp /* tmp >= 0 */, r_len);
307 return borrow;
308}
309
310int bn_abs_sub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
311 BN_CTX *ctx) {
312 int cl = a->width < b->width ? a->width : b->width;
313 int dl = a->width - b->width;
314 int r_len = a->width < b->width ? b->width : a->width;
315 BN_CTX_start(ctx);
316 BIGNUM *tmp = BN_CTX_get(ctx);
317 int ok = tmp != NULL &&
318 bn_wexpand(r, r_len) &&
319 bn_wexpand(tmp, r_len);
320 if (ok) {
321 bn_abs_sub_part_words(r->d, a->d, b->d, cl, dl, tmp->d);
322 r->width = r_len;
323 }
324 BN_CTX_end(ctx);
325 return ok;
326}
327
328// Karatsuba recursive multiplication algorithm
329// (cf. Knuth, The Art of Computer Programming, Vol. 2)
330
331// bn_mul_recursive sets |r| to |a| * |b|, using |t| as scratch space. |r| has
332// length 2*|n2|, |a| has length |n2| + |dna|, |b| has length |n2| + |dnb|, and
333// |t| has length 4*|n2|. |n2| must be a power of two. Finally, we must have
334// -|BN_MUL_RECURSIVE_SIZE_NORMAL|/2 <= |dna| <= 0 and
335// -|BN_MUL_RECURSIVE_SIZE_NORMAL|/2 <= |dnb| <= 0.
336//
337// TODO(davidben): Simplify and |size_t| the calling convention around lengths
338// here.
339static void bn_mul_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
340 int n2, int dna, int dnb, BN_ULONG *t) {
341 // |n2| is a power of two.
342 assert(n2 != 0 && (n2 & (n2 - 1)) == 0);
343 // Check |dna| and |dnb| are in range.
344 assert(-BN_MUL_RECURSIVE_SIZE_NORMAL/2 <= dna && dna <= 0);
345 assert(-BN_MUL_RECURSIVE_SIZE_NORMAL/2 <= dnb && dnb <= 0);
346
347 // Only call bn_mul_comba 8 if n2 == 8 and the
348 // two arrays are complete [steve]
349 if (n2 == 8 && dna == 0 && dnb == 0) {
350 bn_mul_comba8(r, a, b);
351 return;
352 }
353
354 // Else do normal multiply
355 if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
356 bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
357 if (dna + dnb < 0) {
358 OPENSSL_memset(&r[2 * n2 + dna + dnb], 0,
359 sizeof(BN_ULONG) * -(dna + dnb));
360 }
361 return;
362 }
363
364 // Split |a| and |b| into a0,a1 and b0,b1, where a0 and b0 have size |n|.
365 // Split |t| into t0,t1,t2,t3, each of size |n|, with the remaining 4*|n| used
366 // for recursive calls.
367 // Split |r| into r0,r1,r2,r3. We must contribute a0*b0 to r0,r1, a0*a1+b0*b1
368 // to r1,r2, and a1*b1 to r2,r3. The middle term we will compute as:
369 //
370 // a0*a1 + b0*b1 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0
371 //
372 // Note that we know |n| >= |BN_MUL_RECURSIVE_SIZE_NORMAL|/2 above, so
373 // |tna| and |tnb| are non-negative.
374 int n = n2 / 2, tna = n + dna, tnb = n + dnb;
375
376 // t0 = a0 - a1 and t1 = b1 - b0. The result will be multiplied, so we XOR
377 // their sign masks, giving the sign of (a0 - a1)*(b1 - b0). t0 and t1
378 // themselves store the absolute value.
379 BN_ULONG neg = bn_abs_sub_part_words(t, a, &a[n], tna, n - tna, &t[n2]);
380 neg ^= bn_abs_sub_part_words(&t[n], &b[n], b, tnb, tnb - n, &t[n2]);
381
382 // Compute:
383 // t2,t3 = t0 * t1 = |(a0 - a1)*(b1 - b0)|
384 // r0,r1 = a0 * b0
385 // r2,r3 = a1 * b1
386 if (n == 4 && dna == 0 && dnb == 0) {
387 bn_mul_comba4(&t[n2], t, &t[n]);
388
389 bn_mul_comba4(r, a, b);
390 bn_mul_comba4(&r[n2], &a[n], &b[n]);
391 } else if (n == 8 && dna == 0 && dnb == 0) {
392 bn_mul_comba8(&t[n2], t, &t[n]);
393
394 bn_mul_comba8(r, a, b);
395 bn_mul_comba8(&r[n2], &a[n], &b[n]);
396 } else {
397 BN_ULONG *p = &t[n2 * 2];
398 bn_mul_recursive(&t[n2], t, &t[n], n, 0, 0, p);
399 bn_mul_recursive(r, a, b, n, 0, 0, p);
400 bn_mul_recursive(&r[n2], &a[n], &b[n], n, dna, dnb, p);
401 }
402
403 // t0,t1,c = r0,r1 + r2,r3 = a0*b0 + a1*b1
404 BN_ULONG c = bn_add_words(t, r, &r[n2], n2);
405
406 // t2,t3,c = t0,t1,c + neg*t2,t3 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0.
407 // The second term is stored as the absolute value, so we do this with a
408 // constant-time select.
409 BN_ULONG c_neg = c - bn_sub_words(&t[n2 * 2], t, &t[n2], n2);
410 BN_ULONG c_pos = c + bn_add_words(&t[n2], t, &t[n2], n2);
411 bn_select_words(&t[n2], neg, &t[n2 * 2], &t[n2], n2);
412 OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
413 "crypto_word_t is too small");
414 c = constant_time_select_w(neg, c_neg, c_pos);
415
416 // We now have our three components. Add them together.
417 // r1,r2,c = r1,r2 + t2,t3,c
418 c += bn_add_words(&r[n], &r[n], &t[n2], n2);
419
420 // Propagate the carry bit to the end.
421 for (int i = n + n2; i < n2 + n2; i++) {
422 BN_ULONG old = r[i];
423 r[i] = old + c;
424 c = r[i] < old;
425 }
426
427 // The product should fit without carries.
428 assert(c == 0);
429}
430
431// bn_mul_part_recursive sets |r| to |a| * |b|, using |t| as scratch space. |r|
432// has length 4*|n|, |a| has length |n| + |tna|, |b| has length |n| + |tnb|, and
433// |t| has length 8*|n|. |n| must be a power of two. Additionally, we must have
434// 0 <= tna < n and 0 <= tnb < n, and |tna| and |tnb| must differ by at most
435// one.
436//
437// TODO(davidben): Make this take |size_t| and perhaps the actual lengths of |a|
438// and |b|.
439static void bn_mul_part_recursive(BN_ULONG *r, const BN_ULONG *a,
440 const BN_ULONG *b, int n, int tna, int tnb,
441 BN_ULONG *t) {
442 // |n| is a power of two.
443 assert(n != 0 && (n & (n - 1)) == 0);
444 // Check |tna| and |tnb| are in range.
445 assert(0 <= tna && tna < n);
446 assert(0 <= tnb && tnb < n);
447 assert(-1 <= tna - tnb && tna - tnb <= 1);
448
449 int n2 = n * 2;
450 if (n < 8) {
451 bn_mul_normal(r, a, n + tna, b, n + tnb);
452 OPENSSL_memset(r + n2 + tna + tnb, 0, n2 - tna - tnb);
453 return;
454 }
455
456 // Split |a| and |b| into a0,a1 and b0,b1, where a0 and b0 have size |n|. |a1|
457 // and |b1| have size |tna| and |tnb|, respectively.
458 // Split |t| into t0,t1,t2,t3, each of size |n|, with the remaining 4*|n| used
459 // for recursive calls.
460 // Split |r| into r0,r1,r2,r3. We must contribute a0*b0 to r0,r1, a0*a1+b0*b1
461 // to r1,r2, and a1*b1 to r2,r3. The middle term we will compute as:
462 //
463 // a0*a1 + b0*b1 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0
464
465 // t0 = a0 - a1 and t1 = b1 - b0. The result will be multiplied, so we XOR
466 // their sign masks, giving the sign of (a0 - a1)*(b1 - b0). t0 and t1
467 // themselves store the absolute value.
468 BN_ULONG neg = bn_abs_sub_part_words(t, a, &a[n], tna, n - tna, &t[n2]);
469 neg ^= bn_abs_sub_part_words(&t[n], &b[n], b, tnb, tnb - n, &t[n2]);
470
471 // Compute:
472 // t2,t3 = t0 * t1 = |(a0 - a1)*(b1 - b0)|
473 // r0,r1 = a0 * b0
474 // r2,r3 = a1 * b1
475 if (n == 8) {
476 bn_mul_comba8(&t[n2], t, &t[n]);
477 bn_mul_comba8(r, a, b);
478
479 bn_mul_normal(&r[n2], &a[n], tna, &b[n], tnb);
480 // |bn_mul_normal| only writes |tna| + |tna| words. Zero the rest.
481 OPENSSL_memset(&r[n2 + tna + tnb], 0, sizeof(BN_ULONG) * (n2 - tna - tnb));
482 } else {
483 BN_ULONG *p = &t[n2 * 2];
484 bn_mul_recursive(&t[n2], t, &t[n], n, 0, 0, p);
485 bn_mul_recursive(r, a, b, n, 0, 0, p);
486
487 OPENSSL_memset(&r[n2], 0, sizeof(BN_ULONG) * n2);
488 if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
489 tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
490 bn_mul_normal(&r[n2], &a[n], tna, &b[n], tnb);
491 } else {
492 int i = n;
493 for (;;) {
494 i /= 2;
495 if (i < tna || i < tnb) {
496 // E.g., n == 16, i == 8 and tna == 11. |tna| and |tnb| are within one
497 // of each other, so if |tna| is larger and tna > i, then we know
498 // tnb >= i, and this call is valid.
499 bn_mul_part_recursive(&r[n2], &a[n], &b[n], i, tna - i, tnb - i, p);
500 break;
501 }
502 if (i == tna || i == tnb) {
503 // If there is only a bottom half to the number, just do it. We know
504 // the larger of |tna - i| and |tnb - i| is zero. The other is zero or
505 // -1 by because of |tna| and |tnb| differ by at most one.
506 bn_mul_recursive(&r[n2], &a[n], &b[n], i, tna - i, tnb - i, p);
507 break;
508 }
509
510 // This loop will eventually terminate when |i| falls below
511 // |BN_MUL_RECURSIVE_SIZE_NORMAL| because we know one of |tna| and |tnb|
512 // exceeds that.
513 }
514 }
515 }
516
517 // t0,t1,c = r0,r1 + r2,r3 = a0*b0 + a1*b1
518 BN_ULONG c = bn_add_words(t, r, &r[n2], n2);
519
520 // t2,t3,c = t0,t1,c + neg*t2,t3 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0.
521 // The second term is stored as the absolute value, so we do this with a
522 // constant-time select.
523 BN_ULONG c_neg = c - bn_sub_words(&t[n2 * 2], t, &t[n2], n2);
524 BN_ULONG c_pos = c + bn_add_words(&t[n2], t, &t[n2], n2);
525 bn_select_words(&t[n2], neg, &t[n2 * 2], &t[n2], n2);
526 OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
527 "crypto_word_t is too small");
528 c = constant_time_select_w(neg, c_neg, c_pos);
529
530 // We now have our three components. Add them together.
531 // r1,r2,c = r1,r2 + t2,t3,c
532 c += bn_add_words(&r[n], &r[n], &t[n2], n2);
533
534 // Propagate the carry bit to the end.
535 for (int i = n + n2; i < n2 + n2; i++) {
536 BN_ULONG old = r[i];
537 r[i] = old + c;
538 c = r[i] < old;
539 }
540
541 // The product should fit without carries.
542 assert(c == 0);
543}
544
545// bn_mul_impl implements |BN_mul| and |bn_mul_consttime|. Note this function
546// breaks |BIGNUM| invariants and may return a negative zero. This is handled by
547// the callers.
548static int bn_mul_impl(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
549 BN_CTX *ctx) {
550 int al = a->width;
551 int bl = b->width;
552 if (al == 0 || bl == 0) {
553 BN_zero(r);
554 return 1;
555 }
556
557 int ret = 0;
558 BIGNUM *rr;
559 BN_CTX_start(ctx);
560 if (r == a || r == b) {
561 rr = BN_CTX_get(ctx);
562 if (rr == NULL) {
563 goto err;
564 }
565 } else {
566 rr = r;
567 }
568 rr->neg = a->neg ^ b->neg;
569
570 int i = al - bl;
571 if (i == 0) {
572 if (al == 8) {
573 if (!bn_wexpand(rr, 16)) {
574 goto err;
575 }
576 rr->width = 16;
577 bn_mul_comba8(rr->d, a->d, b->d);
578 goto end;
579 }
580 }
581
582 int top = al + bl;
583 static const int kMulNormalSize = 16;
584 if (al >= kMulNormalSize && bl >= kMulNormalSize) {
585 if (-1 <= i && i <= 1) {
586 // Find the larger power of two less than or equal to the larger length.
587 int j;
588 if (i >= 0) {
589 j = BN_num_bits_word((BN_ULONG)al);
590 } else {
591 j = BN_num_bits_word((BN_ULONG)bl);
592 }
593 j = 1 << (j - 1);
594 assert(j <= al || j <= bl);
595 BIGNUM *t = BN_CTX_get(ctx);
596 if (t == NULL) {
597 goto err;
598 }
599 if (al > j || bl > j) {
600 // We know |al| and |bl| are at most one from each other, so if al > j,
601 // bl >= j, and vice versa. Thus we can use |bn_mul_part_recursive|.
602 assert(al >= j && bl >= j);
603 if (!bn_wexpand(t, j * 8) ||
604 !bn_wexpand(rr, j * 4)) {
605 goto err;
606 }
607 bn_mul_part_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
608 } else {
609 // al <= j && bl <= j. Additionally, we know j <= al or j <= bl, so one
610 // of al - j or bl - j is zero. The other, by the bound on |i| above, is
611 // zero or -1. Thus, we can use |bn_mul_recursive|.
612 if (!bn_wexpand(t, j * 4) ||
613 !bn_wexpand(rr, j * 2)) {
614 goto err;
615 }
616 bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
617 }
618 rr->width = top;
619 goto end;
620 }
621 }
622
623 if (!bn_wexpand(rr, top)) {
624 goto err;
625 }
626 rr->width = top;
627 bn_mul_normal(rr->d, a->d, al, b->d, bl);
628
629end:
630 if (r != rr && !BN_copy(r, rr)) {
631 goto err;
632 }
633 ret = 1;
634
635err:
636 BN_CTX_end(ctx);
637 return ret;
638}
639
640int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
641 if (!bn_mul_impl(r, a, b, ctx)) {
642 return 0;
643 }
644
645 // This additionally fixes any negative zeros created by |bn_mul_impl|.
646 bn_set_minimal_width(r);
647 return 1;
648}
649
650int bn_mul_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
651 // Prevent negative zeros.
652 if (a->neg || b->neg) {
653 OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
654 return 0;
655 }
656
657 return bn_mul_impl(r, a, b, ctx);
658}
659
660void bn_mul_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a,
661 const BN_ULONG *b, size_t num_b) {
662 if (num_r != num_a + num_b) {
663 abort();
664 }
665 // TODO(davidben): Should this call |bn_mul_comba4| too? |BN_mul| does not
666 // hit that code.
667 if (num_a == 8 && num_b == 8) {
668 bn_mul_comba8(r, a, b);
669 } else {
670 bn_mul_normal(r, a, num_a, b, num_b);
671 }
672}
673
674// tmp must have 2*n words
675static void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, size_t n,
676 BN_ULONG *tmp) {
677 if (n == 0) {
678 return;
679 }
680
681 size_t max = n * 2;
682 const BN_ULONG *ap = a;
683 BN_ULONG *rp = r;
684 rp[0] = rp[max - 1] = 0;
685 rp++;
686
687 // Compute the contribution of a[i] * a[j] for all i < j.
688 if (n > 1) {
689 ap++;
690 rp[n - 1] = bn_mul_words(rp, ap, n - 1, ap[-1]);
691 rp += 2;
692 }
693 if (n > 2) {
694 for (size_t i = n - 2; i > 0; i--) {
695 ap++;
696 rp[i] = bn_mul_add_words(rp, ap, i, ap[-1]);
697 rp += 2;
698 }
699 }
700
701 // The final result fits in |max| words, so none of the following operations
702 // will overflow.
703
704 // Double |r|, giving the contribution of a[i] * a[j] for all i != j.
705 bn_add_words(r, r, r, max);
706
707 // Add in the contribution of a[i] * a[i] for all i.
708 bn_sqr_words(tmp, a, n);
709 bn_add_words(r, r, tmp, max);
710}
711
712// bn_sqr_recursive sets |r| to |a|^2, using |t| as scratch space. |r| has
713// length 2*|n2|, |a| has length |n2|, and |t| has length 4*|n2|. |n2| must be
714// a power of two.
715static void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, size_t n2,
716 BN_ULONG *t) {
717 // |n2| is a power of two.
718 assert(n2 != 0 && (n2 & (n2 - 1)) == 0);
719
720 if (n2 == 4) {
721 bn_sqr_comba4(r, a);
722 return;
723 }
724 if (n2 == 8) {
725 bn_sqr_comba8(r, a);
726 return;
727 }
728 if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL) {
729 bn_sqr_normal(r, a, n2, t);
730 return;
731 }
732
733 // Split |a| into a0,a1, each of size |n|.
734 // Split |t| into t0,t1,t2,t3, each of size |n|, with the remaining 4*|n| used
735 // for recursive calls.
736 // Split |r| into r0,r1,r2,r3. We must contribute a0^2 to r0,r1, 2*a0*a1 to
737 // r1,r2, and a1^2 to r2,r3.
738 size_t n = n2 / 2;
739 BN_ULONG *t_recursive = &t[n2 * 2];
740
741 // t0 = |a0 - a1|.
742 bn_abs_sub_words(t, a, &a[n], n, &t[n]);
743 // t2,t3 = t0^2 = |a0 - a1|^2 = a0^2 - 2*a0*a1 + a1^2
744 bn_sqr_recursive(&t[n2], t, n, t_recursive);
745
746 // r0,r1 = a0^2
747 bn_sqr_recursive(r, a, n, t_recursive);
748
749 // r2,r3 = a1^2
750 bn_sqr_recursive(&r[n2], &a[n], n, t_recursive);
751
752 // t0,t1,c = r0,r1 + r2,r3 = a0^2 + a1^2
753 BN_ULONG c = bn_add_words(t, r, &r[n2], n2);
754 // t2,t3,c = t0,t1,c - t2,t3 = 2*a0*a1
755 c -= bn_sub_words(&t[n2], t, &t[n2], n2);
756
757 // We now have our three components. Add them together.
758 // r1,r2,c = r1,r2 + t2,t3,c
759 c += bn_add_words(&r[n], &r[n], &t[n2], n2);
760
761 // Propagate the carry bit to the end.
762 for (size_t i = n + n2; i < n2 + n2; i++) {
763 BN_ULONG old = r[i];
764 r[i] = old + c;
765 c = r[i] < old;
766 }
767
768 // The square should fit without carries.
769 assert(c == 0);
770}
771
772int BN_mul_word(BIGNUM *bn, BN_ULONG w) {
773 if (!bn->width) {
774 return 1;
775 }
776
777 if (w == 0) {
778 BN_zero(bn);
779 return 1;
780 }
781
782 BN_ULONG ll = bn_mul_words(bn->d, bn->d, bn->width, w);
783 if (ll) {
784 if (!bn_wexpand(bn, bn->width + 1)) {
785 return 0;
786 }
787 bn->d[bn->width++] = ll;
788 }
789
790 return 1;
791}
792
793int bn_sqr_consttime(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) {
794 int al = a->width;
795 if (al <= 0) {
796 r->width = 0;
797 r->neg = 0;
798 return 1;
799 }
800
801 int ret = 0;
802 BN_CTX_start(ctx);
803 BIGNUM *rr = (a != r) ? r : BN_CTX_get(ctx);
804 BIGNUM *tmp = BN_CTX_get(ctx);
805 if (!rr || !tmp) {
806 goto err;
807 }
808
809 int max = 2 * al; // Non-zero (from above)
810 if (!bn_wexpand(rr, max)) {
811 goto err;
812 }
813
814 if (al == 4) {
815 bn_sqr_comba4(rr->d, a->d);
816 } else if (al == 8) {
817 bn_sqr_comba8(rr->d, a->d);
818 } else {
819 if (al < BN_SQR_RECURSIVE_SIZE_NORMAL) {
820 BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL * 2];
821 bn_sqr_normal(rr->d, a->d, al, t);
822 } else {
823 // If |al| is a power of two, we can use |bn_sqr_recursive|.
824 if (al != 0 && (al & (al - 1)) == 0) {
825 if (!bn_wexpand(tmp, al * 4)) {
826 goto err;
827 }
828 bn_sqr_recursive(rr->d, a->d, al, tmp->d);
829 } else {
830 if (!bn_wexpand(tmp, max)) {
831 goto err;
832 }
833 bn_sqr_normal(rr->d, a->d, al, tmp->d);
834 }
835 }
836 }
837
838 rr->neg = 0;
839 rr->width = max;
840
841 if (rr != r && !BN_copy(r, rr)) {
842 goto err;
843 }
844 ret = 1;
845
846err:
847 BN_CTX_end(ctx);
848 return ret;
849}
850
851int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) {
852 if (!bn_sqr_consttime(r, a, ctx)) {
853 return 0;
854 }
855
856 bn_set_minimal_width(r);
857 return 1;
858}
859
860void bn_sqr_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a) {
861 if (num_r != 2 * num_a || num_a > BN_SMALL_MAX_WORDS) {
862 abort();
863 }
864 if (num_a == 4) {
865 bn_sqr_comba4(r, a);
866 } else if (num_a == 8) {
867 bn_sqr_comba8(r, a);
868 } else {
869 BN_ULONG tmp[2 * BN_SMALL_MAX_WORDS];
870 bn_sqr_normal(r, a, num_a, tmp);
871 OPENSSL_cleanse(tmp, 2 * num_a * sizeof(BN_ULONG));
872 }
873}
874