1 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
2 | * All rights reserved. |
3 | * |
4 | * This package is an SSL implementation written |
5 | * by Eric Young (eay@cryptsoft.com). |
6 | * The implementation was written so as to conform with Netscapes SSL. |
7 | * |
8 | * This library is free for commercial and non-commercial use as long as |
9 | * the following conditions are aheared to. The following conditions |
10 | * apply to all code found in this distribution, be it the RC4, RSA, |
11 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation |
12 | * included with this distribution is covered by the same copyright terms |
13 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). |
14 | * |
15 | * Copyright remains Eric Young's, and as such any Copyright notices in |
16 | * the code are not to be removed. |
17 | * If this package is used in a product, Eric Young should be given attribution |
18 | * as the author of the parts of the library used. |
19 | * This can be in the form of a textual message at program startup or |
20 | * in documentation (online or textual) provided with the package. |
21 | * |
22 | * Redistribution and use in source and binary forms, with or without |
23 | * modification, are permitted provided that the following conditions |
24 | * are met: |
25 | * 1. Redistributions of source code must retain the copyright |
26 | * notice, this list of conditions and the following disclaimer. |
27 | * 2. Redistributions in binary form must reproduce the above copyright |
28 | * notice, this list of conditions and the following disclaimer in the |
29 | * documentation and/or other materials provided with the distribution. |
30 | * 3. All advertising materials mentioning features or use of this software |
31 | * must display the following acknowledgement: |
32 | * "This product includes cryptographic software written by |
33 | * Eric Young (eay@cryptsoft.com)" |
34 | * The word 'cryptographic' can be left out if the rouines from the library |
35 | * being used are not cryptographic related :-). |
36 | * 4. If you include any Windows specific code (or a derivative thereof) from |
37 | * the apps directory (application code) you must include an acknowledgement: |
38 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" |
39 | * |
40 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND |
41 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
42 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
43 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
44 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
45 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
46 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
47 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
48 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
49 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
50 | * SUCH DAMAGE. |
51 | * |
52 | * The licence and distribution terms for any publically available version or |
53 | * derivative of this code cannot be changed. i.e. this code cannot simply be |
54 | * copied and put under another distribution licence |
55 | * [including the GNU Public Licence.] |
56 | */ |
57 | /* ==================================================================== |
58 | * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. |
59 | * |
60 | * Redistribution and use in source and binary forms, with or without |
61 | * modification, are permitted provided that the following conditions |
62 | * are met: |
63 | * |
64 | * 1. Redistributions of source code must retain the above copyright |
65 | * notice, this list of conditions and the following disclaimer. |
66 | * |
67 | * 2. Redistributions in binary form must reproduce the above copyright |
68 | * notice, this list of conditions and the following disclaimer in |
69 | * the documentation and/or other materials provided with the |
70 | * distribution. |
71 | * |
72 | * 3. All advertising materials mentioning features or use of this |
73 | * software must display the following acknowledgment: |
74 | * "This product includes software developed by the OpenSSL Project |
75 | * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" |
76 | * |
77 | * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
78 | * endorse or promote products derived from this software without |
79 | * prior written permission. For written permission, please contact |
80 | * openssl-core@openssl.org. |
81 | * |
82 | * 5. Products derived from this software may not be called "OpenSSL" |
83 | * nor may "OpenSSL" appear in their names without prior written |
84 | * permission of the OpenSSL Project. |
85 | * |
86 | * 6. Redistributions of any form whatsoever must retain the following |
87 | * acknowledgment: |
88 | * "This product includes software developed by the OpenSSL Project |
89 | * for use in the OpenSSL Toolkit (http://www.openssl.org/)" |
90 | * |
91 | * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
92 | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
93 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
94 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
95 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
96 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
97 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
98 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
99 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
100 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
101 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
102 | * OF THE POSSIBILITY OF SUCH DAMAGE. |
103 | * ==================================================================== |
104 | * |
105 | * This product includes cryptographic software written by Eric Young |
106 | * (eay@cryptsoft.com). This product includes software written by Tim |
107 | * Hudson (tjh@cryptsoft.com). */ |
108 | |
109 | #include <openssl/bn.h> |
110 | |
111 | #include <assert.h> |
112 | #include <stdlib.h> |
113 | #include <string.h> |
114 | |
115 | #include <openssl/cpu.h> |
116 | #include <openssl/err.h> |
117 | #include <openssl/mem.h> |
118 | |
119 | #include "internal.h" |
120 | #include "rsaz_exp.h" |
121 | |
122 | |
123 | int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { |
124 | int i, bits, ret = 0; |
125 | BIGNUM *v, *rr; |
126 | |
127 | BN_CTX_start(ctx); |
128 | if (r == a || r == p) { |
129 | rr = BN_CTX_get(ctx); |
130 | } else { |
131 | rr = r; |
132 | } |
133 | |
134 | v = BN_CTX_get(ctx); |
135 | if (rr == NULL || v == NULL) { |
136 | goto err; |
137 | } |
138 | |
139 | if (BN_copy(v, a) == NULL) { |
140 | goto err; |
141 | } |
142 | bits = BN_num_bits(p); |
143 | |
144 | if (BN_is_odd(p)) { |
145 | if (BN_copy(rr, a) == NULL) { |
146 | goto err; |
147 | } |
148 | } else { |
149 | if (!BN_one(rr)) { |
150 | goto err; |
151 | } |
152 | } |
153 | |
154 | for (i = 1; i < bits; i++) { |
155 | if (!BN_sqr(v, v, ctx)) { |
156 | goto err; |
157 | } |
158 | if (BN_is_bit_set(p, i)) { |
159 | if (!BN_mul(rr, rr, v, ctx)) { |
160 | goto err; |
161 | } |
162 | } |
163 | } |
164 | |
165 | if (r != rr && !BN_copy(r, rr)) { |
166 | goto err; |
167 | } |
168 | ret = 1; |
169 | |
170 | err: |
171 | BN_CTX_end(ctx); |
172 | return ret; |
173 | } |
174 | |
175 | typedef struct bn_recp_ctx_st { |
176 | BIGNUM N; // the divisor |
177 | BIGNUM Nr; // the reciprocal |
178 | int num_bits; |
179 | int shift; |
180 | int flags; |
181 | } BN_RECP_CTX; |
182 | |
183 | static void BN_RECP_CTX_init(BN_RECP_CTX *recp) { |
184 | BN_init(&recp->N); |
185 | BN_init(&recp->Nr); |
186 | recp->num_bits = 0; |
187 | recp->shift = 0; |
188 | recp->flags = 0; |
189 | } |
190 | |
191 | static void BN_RECP_CTX_free(BN_RECP_CTX *recp) { |
192 | if (recp == NULL) { |
193 | return; |
194 | } |
195 | |
196 | BN_free(&recp->N); |
197 | BN_free(&recp->Nr); |
198 | } |
199 | |
200 | static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) { |
201 | if (!BN_copy(&(recp->N), d)) { |
202 | return 0; |
203 | } |
204 | BN_zero(&recp->Nr); |
205 | recp->num_bits = BN_num_bits(d); |
206 | recp->shift = 0; |
207 | |
208 | return 1; |
209 | } |
210 | |
211 | // len is the expected size of the result We actually calculate with an extra |
212 | // word of precision, so we can do faster division if the remainder is not |
213 | // required. |
214 | // r := 2^len / m |
215 | static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) { |
216 | int ret = -1; |
217 | BIGNUM *t; |
218 | |
219 | BN_CTX_start(ctx); |
220 | t = BN_CTX_get(ctx); |
221 | if (t == NULL) { |
222 | goto err; |
223 | } |
224 | |
225 | if (!BN_set_bit(t, len)) { |
226 | goto err; |
227 | } |
228 | |
229 | if (!BN_div(r, NULL, t, m, ctx)) { |
230 | goto err; |
231 | } |
232 | |
233 | ret = len; |
234 | |
235 | err: |
236 | BN_CTX_end(ctx); |
237 | return ret; |
238 | } |
239 | |
240 | static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, |
241 | BN_RECP_CTX *recp, BN_CTX *ctx) { |
242 | int i, j, ret = 0; |
243 | BIGNUM *a, *b, *d, *r; |
244 | |
245 | BN_CTX_start(ctx); |
246 | a = BN_CTX_get(ctx); |
247 | b = BN_CTX_get(ctx); |
248 | if (dv != NULL) { |
249 | d = dv; |
250 | } else { |
251 | d = BN_CTX_get(ctx); |
252 | } |
253 | |
254 | if (rem != NULL) { |
255 | r = rem; |
256 | } else { |
257 | r = BN_CTX_get(ctx); |
258 | } |
259 | |
260 | if (a == NULL || b == NULL || d == NULL || r == NULL) { |
261 | goto err; |
262 | } |
263 | |
264 | if (BN_ucmp(m, &recp->N) < 0) { |
265 | BN_zero(d); |
266 | if (!BN_copy(r, m)) { |
267 | goto err; |
268 | } |
269 | BN_CTX_end(ctx); |
270 | return 1; |
271 | } |
272 | |
273 | // We want the remainder |
274 | // Given input of ABCDEF / ab |
275 | // we need multiply ABCDEF by 3 digests of the reciprocal of ab |
276 | |
277 | // i := max(BN_num_bits(m), 2*BN_num_bits(N)) |
278 | i = BN_num_bits(m); |
279 | j = recp->num_bits << 1; |
280 | if (j > i) { |
281 | i = j; |
282 | } |
283 | |
284 | // Nr := round(2^i / N) |
285 | if (i != recp->shift) { |
286 | recp->shift = |
287 | BN_reciprocal(&(recp->Nr), &(recp->N), i, |
288 | ctx); // BN_reciprocal returns i, or -1 for an error |
289 | } |
290 | |
291 | if (recp->shift == -1) { |
292 | goto err; |
293 | } |
294 | |
295 | // d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i - |
296 | // BN_num_bits(N)))| |
297 | // = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i - |
298 | // BN_num_bits(N)))| |
299 | // <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)| |
300 | // = |m/N| |
301 | if (!BN_rshift(a, m, recp->num_bits)) { |
302 | goto err; |
303 | } |
304 | if (!BN_mul(b, a, &(recp->Nr), ctx)) { |
305 | goto err; |
306 | } |
307 | if (!BN_rshift(d, b, i - recp->num_bits)) { |
308 | goto err; |
309 | } |
310 | d->neg = 0; |
311 | |
312 | if (!BN_mul(b, &(recp->N), d, ctx)) { |
313 | goto err; |
314 | } |
315 | if (!BN_usub(r, m, b)) { |
316 | goto err; |
317 | } |
318 | r->neg = 0; |
319 | |
320 | j = 0; |
321 | while (BN_ucmp(r, &(recp->N)) >= 0) { |
322 | if (j++ > 2) { |
323 | OPENSSL_PUT_ERROR(BN, BN_R_BAD_RECIPROCAL); |
324 | goto err; |
325 | } |
326 | if (!BN_usub(r, r, &(recp->N))) { |
327 | goto err; |
328 | } |
329 | if (!BN_add_word(d, 1)) { |
330 | goto err; |
331 | } |
332 | } |
333 | |
334 | r->neg = BN_is_zero(r) ? 0 : m->neg; |
335 | d->neg = m->neg ^ recp->N.neg; |
336 | ret = 1; |
337 | |
338 | err: |
339 | BN_CTX_end(ctx); |
340 | return ret; |
341 | } |
342 | |
343 | static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y, |
344 | BN_RECP_CTX *recp, BN_CTX *ctx) { |
345 | int ret = 0; |
346 | BIGNUM *a; |
347 | const BIGNUM *ca; |
348 | |
349 | BN_CTX_start(ctx); |
350 | a = BN_CTX_get(ctx); |
351 | if (a == NULL) { |
352 | goto err; |
353 | } |
354 | |
355 | if (y != NULL) { |
356 | if (x == y) { |
357 | if (!BN_sqr(a, x, ctx)) { |
358 | goto err; |
359 | } |
360 | } else { |
361 | if (!BN_mul(a, x, y, ctx)) { |
362 | goto err; |
363 | } |
364 | } |
365 | ca = a; |
366 | } else { |
367 | ca = x; // Just do the mod |
368 | } |
369 | |
370 | ret = BN_div_recp(NULL, r, ca, recp, ctx); |
371 | |
372 | err: |
373 | BN_CTX_end(ctx); |
374 | return ret; |
375 | } |
376 | |
377 | // BN_window_bits_for_exponent_size returns sliding window size for mod_exp with |
378 | // a |b| bit exponent. |
379 | // |
380 | // For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of |
381 | // multiplications is a constant plus on average |
382 | // |
383 | // 2^(w-1) + (b-w)/(w+1); |
384 | // |
385 | // here 2^(w-1) is for precomputing the table (we actually need entries only |
386 | // for windows that have the lowest bit set), and (b-w)/(w+1) is an |
387 | // approximation for the expected number of w-bit windows, not counting the |
388 | // first one. |
389 | // |
390 | // Thus we should use |
391 | // |
392 | // w >= 6 if b > 671 |
393 | // w = 5 if 671 > b > 239 |
394 | // w = 4 if 239 > b > 79 |
395 | // w = 3 if 79 > b > 23 |
396 | // w <= 2 if 23 > b |
397 | // |
398 | // (with draws in between). Very small exponents are often selected |
399 | // with low Hamming weight, so we use w = 1 for b <= 23. |
400 | static int BN_window_bits_for_exponent_size(int b) { |
401 | if (b > 671) { |
402 | return 6; |
403 | } |
404 | if (b > 239) { |
405 | return 5; |
406 | } |
407 | if (b > 79) { |
408 | return 4; |
409 | } |
410 | if (b > 23) { |
411 | return 3; |
412 | } |
413 | return 1; |
414 | } |
415 | |
416 | // TABLE_SIZE is the maximum precomputation table size for *variable* sliding |
417 | // windows. This must be 2^(max_window - 1), where max_window is the largest |
418 | // value returned from |BN_window_bits_for_exponent_size|. |
419 | #define TABLE_SIZE 32 |
420 | |
421 | // TABLE_BITS_SMALL is the smallest value returned from |
422 | // |BN_window_bits_for_exponent_size| when |b| is at most |BN_BITS2| * |
423 | // |BN_SMALL_MAX_WORDS| words. |
424 | #define TABLE_BITS_SMALL 5 |
425 | |
426 | // TABLE_SIZE_SMALL is the same as |TABLE_SIZE|, but when |b| is at most |
427 | // |BN_BITS2| * |BN_SMALL_MAX_WORDS|. |
428 | #define TABLE_SIZE_SMALL (1 << (TABLE_BITS_SMALL - 1)) |
429 | |
430 | static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, |
431 | const BIGNUM *m, BN_CTX *ctx) { |
432 | int i, j, ret = 0, wstart, window; |
433 | int start = 1; |
434 | BIGNUM *aa; |
435 | // Table of variables obtained from 'ctx' |
436 | BIGNUM *val[TABLE_SIZE]; |
437 | BN_RECP_CTX recp; |
438 | |
439 | // This function is only called on even moduli. |
440 | assert(!BN_is_odd(m)); |
441 | |
442 | int bits = BN_num_bits(p); |
443 | if (bits == 0) { |
444 | return BN_one(r); |
445 | } |
446 | |
447 | BN_CTX_start(ctx); |
448 | aa = BN_CTX_get(ctx); |
449 | val[0] = BN_CTX_get(ctx); |
450 | if (!aa || !val[0]) { |
451 | goto err; |
452 | } |
453 | |
454 | BN_RECP_CTX_init(&recp); |
455 | if (m->neg) { |
456 | // ignore sign of 'm' |
457 | if (!BN_copy(aa, m)) { |
458 | goto err; |
459 | } |
460 | aa->neg = 0; |
461 | if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) { |
462 | goto err; |
463 | } |
464 | } else { |
465 | if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) { |
466 | goto err; |
467 | } |
468 | } |
469 | |
470 | if (!BN_nnmod(val[0], a, m, ctx)) { |
471 | goto err; // 1 |
472 | } |
473 | if (BN_is_zero(val[0])) { |
474 | BN_zero(r); |
475 | ret = 1; |
476 | goto err; |
477 | } |
478 | |
479 | window = BN_window_bits_for_exponent_size(bits); |
480 | if (window > 1) { |
481 | if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) { |
482 | goto err; // 2 |
483 | } |
484 | j = 1 << (window - 1); |
485 | for (i = 1; i < j; i++) { |
486 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || |
487 | !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) { |
488 | goto err; |
489 | } |
490 | } |
491 | } |
492 | |
493 | start = 1; // This is used to avoid multiplication etc |
494 | // when there is only the value '1' in the |
495 | // buffer. |
496 | wstart = bits - 1; // The top bit of the window |
497 | |
498 | if (!BN_one(r)) { |
499 | goto err; |
500 | } |
501 | |
502 | for (;;) { |
503 | int wvalue; // The 'value' of the window |
504 | int wend; // The bottom bit of the window |
505 | |
506 | if (!BN_is_bit_set(p, wstart)) { |
507 | if (!start) { |
508 | if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) { |
509 | goto err; |
510 | } |
511 | } |
512 | if (wstart == 0) { |
513 | break; |
514 | } |
515 | wstart--; |
516 | continue; |
517 | } |
518 | |
519 | // We now have wstart on a 'set' bit, we now need to work out |
520 | // how bit a window to do. To do this we need to scan |
521 | // forward until the last set bit before the end of the |
522 | // window |
523 | wvalue = 1; |
524 | wend = 0; |
525 | for (i = 1; i < window; i++) { |
526 | if (wstart - i < 0) { |
527 | break; |
528 | } |
529 | if (BN_is_bit_set(p, wstart - i)) { |
530 | wvalue <<= (i - wend); |
531 | wvalue |= 1; |
532 | wend = i; |
533 | } |
534 | } |
535 | |
536 | // wend is the size of the current window |
537 | j = wend + 1; |
538 | // add the 'bytes above' |
539 | if (!start) { |
540 | for (i = 0; i < j; i++) { |
541 | if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) { |
542 | goto err; |
543 | } |
544 | } |
545 | } |
546 | |
547 | // wvalue will be an odd number < 2^window |
548 | if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) { |
549 | goto err; |
550 | } |
551 | |
552 | // move the 'window' down further |
553 | wstart -= wend + 1; |
554 | start = 0; |
555 | if (wstart < 0) { |
556 | break; |
557 | } |
558 | } |
559 | ret = 1; |
560 | |
561 | err: |
562 | BN_CTX_end(ctx); |
563 | BN_RECP_CTX_free(&recp); |
564 | return ret; |
565 | } |
566 | |
567 | int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
568 | BN_CTX *ctx) { |
569 | if (m->neg) { |
570 | OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER); |
571 | return 0; |
572 | } |
573 | if (a->neg || BN_ucmp(a, m) >= 0) { |
574 | if (!BN_nnmod(r, a, m, ctx)) { |
575 | return 0; |
576 | } |
577 | a = r; |
578 | } |
579 | |
580 | if (BN_is_odd(m)) { |
581 | return BN_mod_exp_mont(r, a, p, m, ctx, NULL); |
582 | } |
583 | |
584 | return mod_exp_recp(r, a, p, m, ctx); |
585 | } |
586 | |
587 | int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
588 | const BIGNUM *m, BN_CTX *ctx, const BN_MONT_CTX *mont) { |
589 | if (!BN_is_odd(m)) { |
590 | OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS); |
591 | return 0; |
592 | } |
593 | if (m->neg) { |
594 | OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER); |
595 | return 0; |
596 | } |
597 | if (a->neg || BN_ucmp(a, m) >= 0) { |
598 | OPENSSL_PUT_ERROR(BN, BN_R_INPUT_NOT_REDUCED); |
599 | return 0; |
600 | } |
601 | |
602 | int bits = BN_num_bits(p); |
603 | if (bits == 0) { |
604 | // x**0 mod 1 is still zero. |
605 | if (BN_abs_is_word(m, 1)) { |
606 | BN_zero(rr); |
607 | return 1; |
608 | } |
609 | return BN_one(rr); |
610 | } |
611 | |
612 | int ret = 0; |
613 | BIGNUM *val[TABLE_SIZE]; |
614 | BN_MONT_CTX *new_mont = NULL; |
615 | |
616 | BN_CTX_start(ctx); |
617 | BIGNUM *r = BN_CTX_get(ctx); |
618 | val[0] = BN_CTX_get(ctx); |
619 | if (r == NULL || val[0] == NULL) { |
620 | goto err; |
621 | } |
622 | |
623 | // Allocate a montgomery context if it was not supplied by the caller. |
624 | if (mont == NULL) { |
625 | new_mont = BN_MONT_CTX_new_consttime(m, ctx); |
626 | if (new_mont == NULL) { |
627 | goto err; |
628 | } |
629 | mont = new_mont; |
630 | } |
631 | |
632 | // We exponentiate by looking at sliding windows of the exponent and |
633 | // precomputing powers of |a|. Windows may be shifted so they always end on a |
634 | // set bit, so only precompute odd powers. We compute val[i] = a^(2*i + 1) |
635 | // for i = 0 to 2^(window-1), all in Montgomery form. |
636 | int window = BN_window_bits_for_exponent_size(bits); |
637 | if (!BN_to_montgomery(val[0], a, mont, ctx)) { |
638 | goto err; |
639 | } |
640 | if (window > 1) { |
641 | BIGNUM *d = BN_CTX_get(ctx); |
642 | if (d == NULL || |
643 | !BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) { |
644 | goto err; |
645 | } |
646 | for (int i = 1; i < 1 << (window - 1); i++) { |
647 | val[i] = BN_CTX_get(ctx); |
648 | if (val[i] == NULL || |
649 | !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) { |
650 | goto err; |
651 | } |
652 | } |
653 | } |
654 | |
655 | // |p| is non-zero, so at least one window is non-zero. To save some |
656 | // multiplications, defer initializing |r| until then. |
657 | int r_is_one = 1; |
658 | int wstart = bits - 1; // The top bit of the window. |
659 | for (;;) { |
660 | if (!BN_is_bit_set(p, wstart)) { |
661 | if (!r_is_one && !BN_mod_mul_montgomery(r, r, r, mont, ctx)) { |
662 | goto err; |
663 | } |
664 | if (wstart == 0) { |
665 | break; |
666 | } |
667 | wstart--; |
668 | continue; |
669 | } |
670 | |
671 | // We now have wstart on a set bit. Find the largest window we can use. |
672 | int wvalue = 1; |
673 | int wsize = 0; |
674 | for (int i = 1; i < window && i <= wstart; i++) { |
675 | if (BN_is_bit_set(p, wstart - i)) { |
676 | wvalue <<= (i - wsize); |
677 | wvalue |= 1; |
678 | wsize = i; |
679 | } |
680 | } |
681 | |
682 | // Shift |r| to the end of the window. |
683 | if (!r_is_one) { |
684 | for (int i = 0; i < wsize + 1; i++) { |
685 | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) { |
686 | goto err; |
687 | } |
688 | } |
689 | } |
690 | |
691 | assert(wvalue & 1); |
692 | assert(wvalue < (1 << window)); |
693 | if (r_is_one) { |
694 | if (!BN_copy(r, val[wvalue >> 1])) { |
695 | goto err; |
696 | } |
697 | } else if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) { |
698 | goto err; |
699 | } |
700 | |
701 | r_is_one = 0; |
702 | if (wstart == wsize) { |
703 | break; |
704 | } |
705 | wstart -= wsize + 1; |
706 | } |
707 | |
708 | // |p| is non-zero, so |r_is_one| must be cleared at some point. |
709 | assert(!r_is_one); |
710 | |
711 | if (!BN_from_montgomery(rr, r, mont, ctx)) { |
712 | goto err; |
713 | } |
714 | ret = 1; |
715 | |
716 | err: |
717 | BN_MONT_CTX_free(new_mont); |
718 | BN_CTX_end(ctx); |
719 | return ret; |
720 | } |
721 | |
722 | void bn_mod_exp_mont_small(BN_ULONG *r, const BN_ULONG *a, size_t num, |
723 | const BN_ULONG *p, size_t num_p, |
724 | const BN_MONT_CTX *mont) { |
725 | if (num != (size_t)mont->N.width || num > BN_SMALL_MAX_WORDS) { |
726 | abort(); |
727 | } |
728 | assert(BN_is_odd(&mont->N)); |
729 | |
730 | // Count the number of bits in |p|. Note this function treats |p| as public. |
731 | while (num_p != 0 && p[num_p - 1] == 0) { |
732 | num_p--; |
733 | } |
734 | if (num_p == 0) { |
735 | bn_from_montgomery_small(r, mont->RR.d, num, mont); |
736 | return; |
737 | } |
738 | unsigned bits = BN_num_bits_word(p[num_p - 1]) + (num_p - 1) * BN_BITS2; |
739 | assert(bits != 0); |
740 | |
741 | // We exponentiate by looking at sliding windows of the exponent and |
742 | // precomputing powers of |a|. Windows may be shifted so they always end on a |
743 | // set bit, so only precompute odd powers. We compute val[i] = a^(2*i + 1) for |
744 | // i = 0 to 2^(window-1), all in Montgomery form. |
745 | unsigned window = BN_window_bits_for_exponent_size(bits); |
746 | if (window > TABLE_BITS_SMALL) { |
747 | window = TABLE_BITS_SMALL; // Tolerate excessively large |p|. |
748 | } |
749 | BN_ULONG val[TABLE_SIZE_SMALL][BN_SMALL_MAX_WORDS]; |
750 | OPENSSL_memcpy(val[0], a, num * sizeof(BN_ULONG)); |
751 | if (window > 1) { |
752 | BN_ULONG d[BN_SMALL_MAX_WORDS]; |
753 | bn_mod_mul_montgomery_small(d, val[0], val[0], num, mont); |
754 | for (unsigned i = 1; i < 1u << (window - 1); i++) { |
755 | bn_mod_mul_montgomery_small(val[i], val[i - 1], d, num, mont); |
756 | } |
757 | } |
758 | |
759 | // |p| is non-zero, so at least one window is non-zero. To save some |
760 | // multiplications, defer initializing |r| until then. |
761 | int r_is_one = 1; |
762 | unsigned wstart = bits - 1; // The top bit of the window. |
763 | for (;;) { |
764 | if (!bn_is_bit_set_words(p, num_p, wstart)) { |
765 | if (!r_is_one) { |
766 | bn_mod_mul_montgomery_small(r, r, r, num, mont); |
767 | } |
768 | if (wstart == 0) { |
769 | break; |
770 | } |
771 | wstart--; |
772 | continue; |
773 | } |
774 | |
775 | // We now have wstart on a set bit. Find the largest window we can use. |
776 | unsigned wvalue = 1; |
777 | unsigned wsize = 0; |
778 | for (unsigned i = 1; i < window && i <= wstart; i++) { |
779 | if (bn_is_bit_set_words(p, num_p, wstart - i)) { |
780 | wvalue <<= (i - wsize); |
781 | wvalue |= 1; |
782 | wsize = i; |
783 | } |
784 | } |
785 | |
786 | // Shift |r| to the end of the window. |
787 | if (!r_is_one) { |
788 | for (unsigned i = 0; i < wsize + 1; i++) { |
789 | bn_mod_mul_montgomery_small(r, r, r, num, mont); |
790 | } |
791 | } |
792 | |
793 | assert(wvalue & 1); |
794 | assert(wvalue < (1u << window)); |
795 | if (r_is_one) { |
796 | OPENSSL_memcpy(r, val[wvalue >> 1], num * sizeof(BN_ULONG)); |
797 | } else { |
798 | bn_mod_mul_montgomery_small(r, r, val[wvalue >> 1], num, mont); |
799 | } |
800 | r_is_one = 0; |
801 | if (wstart == wsize) { |
802 | break; |
803 | } |
804 | wstart -= wsize + 1; |
805 | } |
806 | |
807 | // |p| is non-zero, so |r_is_one| must be cleared at some point. |
808 | assert(!r_is_one); |
809 | OPENSSL_cleanse(val, sizeof(val)); |
810 | } |
811 | |
812 | void bn_mod_inverse_prime_mont_small(BN_ULONG *r, const BN_ULONG *a, size_t num, |
813 | const BN_MONT_CTX *mont) { |
814 | if (num != (size_t)mont->N.width || num > BN_SMALL_MAX_WORDS) { |
815 | abort(); |
816 | } |
817 | |
818 | // Per Fermat's Little Theorem, a^-1 = a^(p-2) (mod p) for p prime. |
819 | BN_ULONG p_minus_two[BN_SMALL_MAX_WORDS]; |
820 | const BN_ULONG *p = mont->N.d; |
821 | OPENSSL_memcpy(p_minus_two, p, num * sizeof(BN_ULONG)); |
822 | if (p_minus_two[0] >= 2) { |
823 | p_minus_two[0] -= 2; |
824 | } else { |
825 | p_minus_two[0] -= 2; |
826 | for (size_t i = 1; i < num; i++) { |
827 | if (p_minus_two[i]-- != 0) { |
828 | break; |
829 | } |
830 | } |
831 | } |
832 | |
833 | bn_mod_exp_mont_small(r, a, num, p_minus_two, num, mont); |
834 | } |
835 | |
836 | static void copy_to_prebuf(const BIGNUM *b, int top, BN_ULONG *table, int idx, |
837 | int window) { |
838 | int ret = bn_copy_words(table + idx * top, top, b); |
839 | assert(ret); // |b| is guaranteed to fit. |
840 | (void)ret; |
841 | } |
842 | |
843 | static int copy_from_prebuf(BIGNUM *b, int top, const BN_ULONG *table, int idx, |
844 | int window) { |
845 | if (!bn_wexpand(b, top)) { |
846 | return 0; |
847 | } |
848 | |
849 | OPENSSL_memset(b->d, 0, sizeof(BN_ULONG) * top); |
850 | const int width = 1 << window; |
851 | for (int i = 0; i < width; i++, table += top) { |
852 | BN_ULONG mask = constant_time_eq_int(i, idx); |
853 | for (int j = 0; j < top; j++) { |
854 | b->d[j] |= table[j] & mask; |
855 | } |
856 | } |
857 | |
858 | b->width = top; |
859 | return 1; |
860 | } |
861 | |
862 | #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \ |
863 | (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1) |
864 | |
865 | // Window sizes optimized for fixed window size modular exponentiation |
866 | // algorithm (BN_mod_exp_mont_consttime). |
867 | // |
868 | // To achieve the security goals of BN_mode_exp_mont_consttime, the maximum |
869 | // size of the window must not exceed |
870 | // log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH). |
871 | // |
872 | // Window size thresholds are defined for cache line sizes of 32 and 64, cache |
873 | // line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of |
874 | // 7 should only be used on processors that have a 128 byte or greater cache |
875 | // line size. |
876 | #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64 |
877 | |
878 | #define BN_window_bits_for_ctime_exponent_size(b) \ |
879 | ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1) |
880 | #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6) |
881 | |
882 | #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32 |
883 | |
884 | #define BN_window_bits_for_ctime_exponent_size(b) \ |
885 | ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1) |
886 | #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5) |
887 | |
888 | #endif |
889 | |
890 | // Given a pointer value, compute the next address that is a cache line |
891 | // multiple. |
892 | #define MOD_EXP_CTIME_ALIGN(x_) \ |
893 | ((unsigned char *)(x_) + \ |
894 | (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \ |
895 | (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) |
896 | |
897 | // This variant of |BN_mod_exp_mont| uses fixed windows and fixed memory access |
898 | // patterns to protect secret exponents (cf. the hyper-threading timing attacks |
899 | // pointed out by Colin Percival, |
900 | // http://www.daemonology.net/hyperthreading-considered-harmful/) |
901 | int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
902 | const BIGNUM *m, BN_CTX *ctx, |
903 | const BN_MONT_CTX *mont) { |
904 | int i, ret = 0, window, wvalue; |
905 | BN_MONT_CTX *new_mont = NULL; |
906 | |
907 | int numPowers; |
908 | unsigned char *powerbufFree = NULL; |
909 | int powerbufLen = 0; |
910 | BN_ULONG *powerbuf = NULL; |
911 | BIGNUM tmp, am; |
912 | |
913 | if (!BN_is_odd(m)) { |
914 | OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS); |
915 | return 0; |
916 | } |
917 | if (m->neg) { |
918 | OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER); |
919 | return 0; |
920 | } |
921 | if (a->neg || BN_ucmp(a, m) >= 0) { |
922 | OPENSSL_PUT_ERROR(BN, BN_R_INPUT_NOT_REDUCED); |
923 | return 0; |
924 | } |
925 | |
926 | // Use all bits stored in |p|, rather than |BN_num_bits|, so we do not leak |
927 | // whether the top bits are zero. |
928 | int max_bits = p->width * BN_BITS2; |
929 | int bits = max_bits; |
930 | if (bits == 0) { |
931 | // x**0 mod 1 is still zero. |
932 | if (BN_abs_is_word(m, 1)) { |
933 | BN_zero(rr); |
934 | return 1; |
935 | } |
936 | return BN_one(rr); |
937 | } |
938 | |
939 | // Allocate a montgomery context if it was not supplied by the caller. |
940 | if (mont == NULL) { |
941 | new_mont = BN_MONT_CTX_new_consttime(m, ctx); |
942 | if (new_mont == NULL) { |
943 | goto err; |
944 | } |
945 | mont = new_mont; |
946 | } |
947 | |
948 | // Use the width in |mont->N|, rather than the copy in |m|. The assembly |
949 | // implementation assumes it can use |top| to size R. |
950 | int top = mont->N.width; |
951 | |
952 | #if defined(OPENSSL_BN_ASM_MONT5) || defined(RSAZ_ENABLED) |
953 | // Share one large stack-allocated buffer between the RSAZ and non-RSAZ code |
954 | // paths. If we were to use separate static buffers for each then there is |
955 | // some chance that both large buffers would be allocated on the stack, |
956 | // causing the stack space requirement to be truly huge (~10KB). |
957 | alignas(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH) BN_ULONG |
958 | storage[MOD_EXP_CTIME_STORAGE_LEN]; |
959 | #endif |
960 | #if defined(RSAZ_ENABLED) |
961 | // If the size of the operands allow it, perform the optimized RSAZ |
962 | // exponentiation. For further information see crypto/fipsmodule/bn/rsaz_exp.c |
963 | // and accompanying assembly modules. |
964 | if (a->width == 16 && p->width == 16 && BN_num_bits(m) == 1024 && |
965 | rsaz_avx2_preferred()) { |
966 | if (!bn_wexpand(rr, 16)) { |
967 | goto err; |
968 | } |
969 | RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0], |
970 | storage); |
971 | rr->width = 16; |
972 | rr->neg = 0; |
973 | ret = 1; |
974 | goto err; |
975 | } |
976 | #endif |
977 | |
978 | // Get the window size to use with size of p. |
979 | window = BN_window_bits_for_ctime_exponent_size(bits); |
980 | #if defined(OPENSSL_BN_ASM_MONT5) |
981 | if (window >= 5) { |
982 | window = 5; // ~5% improvement for RSA2048 sign, and even for RSA4096 |
983 | // reserve space for mont->N.d[] copy |
984 | powerbufLen += top * sizeof(mont->N.d[0]); |
985 | } |
986 | #endif |
987 | |
988 | // Allocate a buffer large enough to hold all of the pre-computed |
989 | // powers of am, am itself and tmp. |
990 | numPowers = 1 << window; |
991 | powerbufLen += |
992 | sizeof(m->d[0]) * |
993 | (top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers)); |
994 | |
995 | #if defined(OPENSSL_BN_ASM_MONT5) |
996 | if ((size_t)powerbufLen <= sizeof(storage)) { |
997 | powerbuf = storage; |
998 | } |
999 | // |storage| is more than large enough to handle 1024-bit inputs. |
1000 | assert(powerbuf != NULL || top * BN_BITS2 > 1024); |
1001 | #endif |
1002 | if (powerbuf == NULL) { |
1003 | powerbufFree = |
1004 | OPENSSL_malloc(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); |
1005 | if (powerbufFree == NULL) { |
1006 | goto err; |
1007 | } |
1008 | powerbuf = (BN_ULONG *)MOD_EXP_CTIME_ALIGN(powerbufFree); |
1009 | } |
1010 | OPENSSL_memset(powerbuf, 0, powerbufLen); |
1011 | |
1012 | // lay down tmp and am right after powers table |
1013 | tmp.d = powerbuf + top * numPowers; |
1014 | am.d = tmp.d + top; |
1015 | tmp.width = am.width = 0; |
1016 | tmp.dmax = am.dmax = top; |
1017 | tmp.neg = am.neg = 0; |
1018 | tmp.flags = am.flags = BN_FLG_STATIC_DATA; |
1019 | |
1020 | if (!bn_one_to_montgomery(&tmp, mont, ctx)) { |
1021 | goto err; |
1022 | } |
1023 | |
1024 | // prepare a^1 in Montgomery domain |
1025 | assert(!a->neg); |
1026 | assert(BN_ucmp(a, m) < 0); |
1027 | if (!BN_to_montgomery(&am, a, mont, ctx)) { |
1028 | goto err; |
1029 | } |
1030 | |
1031 | #if defined(OPENSSL_BN_ASM_MONT5) |
1032 | // This optimization uses ideas from http://eprint.iacr.org/2011/239, |
1033 | // specifically optimization of cache-timing attack countermeasures |
1034 | // and pre-computation optimization. |
1035 | |
1036 | // Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as |
1037 | // 512-bit RSA is hardly relevant, we omit it to spare size... |
1038 | if (window == 5 && top > 1) { |
1039 | const BN_ULONG *n0 = mont->n0; |
1040 | BN_ULONG *np; |
1041 | |
1042 | // BN_to_montgomery can contaminate words above .top |
1043 | // [in BN_DEBUG[_DEBUG] build]... |
1044 | for (i = am.width; i < top; i++) { |
1045 | am.d[i] = 0; |
1046 | } |
1047 | for (i = tmp.width; i < top; i++) { |
1048 | tmp.d[i] = 0; |
1049 | } |
1050 | |
1051 | // copy mont->N.d[] to improve cache locality |
1052 | for (np = am.d + top, i = 0; i < top; i++) { |
1053 | np[i] = mont->N.d[i]; |
1054 | } |
1055 | |
1056 | bn_scatter5(tmp.d, top, powerbuf, 0); |
1057 | bn_scatter5(am.d, am.width, powerbuf, 1); |
1058 | bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); |
1059 | bn_scatter5(tmp.d, top, powerbuf, 2); |
1060 | |
1061 | // same as above, but uses squaring for 1/2 of operations |
1062 | for (i = 4; i < 32; i *= 2) { |
1063 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
1064 | bn_scatter5(tmp.d, top, powerbuf, i); |
1065 | } |
1066 | for (i = 3; i < 8; i += 2) { |
1067 | int j; |
1068 | bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); |
1069 | bn_scatter5(tmp.d, top, powerbuf, i); |
1070 | for (j = 2 * i; j < 32; j *= 2) { |
1071 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
1072 | bn_scatter5(tmp.d, top, powerbuf, j); |
1073 | } |
1074 | } |
1075 | for (; i < 16; i += 2) { |
1076 | bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); |
1077 | bn_scatter5(tmp.d, top, powerbuf, i); |
1078 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
1079 | bn_scatter5(tmp.d, top, powerbuf, 2 * i); |
1080 | } |
1081 | for (; i < 32; i += 2) { |
1082 | bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); |
1083 | bn_scatter5(tmp.d, top, powerbuf, i); |
1084 | } |
1085 | |
1086 | bits--; |
1087 | for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) { |
1088 | wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); |
1089 | } |
1090 | bn_gather5(tmp.d, top, powerbuf, wvalue); |
1091 | |
1092 | // At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit |
1093 | // that has not been read yet.) |
1094 | assert(bits >= -1 && (bits == -1 || bits % 5 == 4)); |
1095 | |
1096 | // Scan the exponent one window at a time starting from the most |
1097 | // significant bits. |
1098 | if (top & 7) { |
1099 | while (bits >= 0) { |
1100 | for (wvalue = 0, i = 0; i < 5; i++, bits--) { |
1101 | wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); |
1102 | } |
1103 | |
1104 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
1105 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
1106 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
1107 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
1108 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
1109 | bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); |
1110 | } |
1111 | } else { |
1112 | const uint8_t *p_bytes = (const uint8_t *)p->d; |
1113 | assert(bits < max_bits); |
1114 | // |p = 0| has been handled as a special case, so |max_bits| is at least |
1115 | // one word. |
1116 | assert(max_bits >= 64); |
1117 | |
1118 | // If the first bit to be read lands in the last byte, unroll the first |
1119 | // iteration to avoid reading past the bounds of |p->d|. (After the first |
1120 | // iteration, we are guaranteed to be past the last byte.) Note |bits| |
1121 | // here is the top bit, inclusive. |
1122 | if (bits - 4 >= max_bits - 8) { |
1123 | // Read five bits from |bits-4| through |bits|, inclusive. |
1124 | wvalue = p_bytes[p->width * BN_BYTES - 1]; |
1125 | wvalue >>= (bits - 4) & 7; |
1126 | wvalue &= 0x1f; |
1127 | bits -= 5; |
1128 | bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); |
1129 | } |
1130 | while (bits >= 0) { |
1131 | // Read five bits from |bits-4| through |bits|, inclusive. |
1132 | int first_bit = bits - 4; |
1133 | uint16_t val; |
1134 | OPENSSL_memcpy(&val, p_bytes + (first_bit >> 3), sizeof(val)); |
1135 | val >>= first_bit & 7; |
1136 | val &= 0x1f; |
1137 | bits -= 5; |
1138 | bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, val); |
1139 | } |
1140 | } |
1141 | |
1142 | ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top); |
1143 | tmp.width = top; |
1144 | if (ret) { |
1145 | if (!BN_copy(rr, &tmp)) { |
1146 | ret = 0; |
1147 | } |
1148 | goto err; // non-zero ret means it's not error |
1149 | } |
1150 | } else |
1151 | #endif |
1152 | { |
1153 | copy_to_prebuf(&tmp, top, powerbuf, 0, window); |
1154 | copy_to_prebuf(&am, top, powerbuf, 1, window); |
1155 | |
1156 | // If the window size is greater than 1, then calculate |
1157 | // val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) |
1158 | // (even powers could instead be computed as (a^(i/2))^2 |
1159 | // to use the slight performance advantage of sqr over mul). |
1160 | if (window > 1) { |
1161 | if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) { |
1162 | goto err; |
1163 | } |
1164 | |
1165 | copy_to_prebuf(&tmp, top, powerbuf, 2, window); |
1166 | |
1167 | for (i = 3; i < numPowers; i++) { |
1168 | // Calculate a^i = a^(i-1) * a |
1169 | if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) { |
1170 | goto err; |
1171 | } |
1172 | |
1173 | copy_to_prebuf(&tmp, top, powerbuf, i, window); |
1174 | } |
1175 | } |
1176 | |
1177 | bits--; |
1178 | for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) { |
1179 | wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); |
1180 | } |
1181 | if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, window)) { |
1182 | goto err; |
1183 | } |
1184 | |
1185 | // Scan the exponent one window at a time starting from the most |
1186 | // significant bits. |
1187 | while (bits >= 0) { |
1188 | wvalue = 0; // The 'value' of the window |
1189 | |
1190 | // Scan the window, squaring the result as we go |
1191 | for (i = 0; i < window; i++, bits--) { |
1192 | if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) { |
1193 | goto err; |
1194 | } |
1195 | wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); |
1196 | } |
1197 | |
1198 | // Fetch the appropriate pre-computed value from the pre-buf |
1199 | if (!copy_from_prebuf(&am, top, powerbuf, wvalue, window)) { |
1200 | goto err; |
1201 | } |
1202 | |
1203 | // Multiply the result into the intermediate result |
1204 | if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) { |
1205 | goto err; |
1206 | } |
1207 | } |
1208 | } |
1209 | |
1210 | // Convert the final result from montgomery to standard format |
1211 | if (!BN_from_montgomery(rr, &tmp, mont, ctx)) { |
1212 | goto err; |
1213 | } |
1214 | ret = 1; |
1215 | |
1216 | err: |
1217 | BN_MONT_CTX_free(new_mont); |
1218 | if (powerbuf != NULL && powerbufFree == NULL) { |
1219 | OPENSSL_cleanse(powerbuf, powerbufLen); |
1220 | } |
1221 | OPENSSL_free(powerbufFree); |
1222 | return (ret); |
1223 | } |
1224 | |
1225 | int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, |
1226 | const BIGNUM *m, BN_CTX *ctx, |
1227 | const BN_MONT_CTX *mont) { |
1228 | BIGNUM a_bignum; |
1229 | BN_init(&a_bignum); |
1230 | |
1231 | int ret = 0; |
1232 | |
1233 | // BN_mod_exp_mont requires reduced inputs. |
1234 | if (bn_minimal_width(m) == 1) { |
1235 | a %= m->d[0]; |
1236 | } |
1237 | |
1238 | if (!BN_set_word(&a_bignum, a)) { |
1239 | OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR); |
1240 | goto err; |
1241 | } |
1242 | |
1243 | ret = BN_mod_exp_mont(rr, &a_bignum, p, m, ctx, mont); |
1244 | |
1245 | err: |
1246 | BN_free(&a_bignum); |
1247 | |
1248 | return ret; |
1249 | } |
1250 | |
1251 | #define TABLE_SIZE 32 |
1252 | |
1253 | int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, |
1254 | const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m, |
1255 | BN_CTX *ctx, const BN_MONT_CTX *mont) { |
1256 | BIGNUM tmp; |
1257 | BN_init(&tmp); |
1258 | |
1259 | int ret = 0; |
1260 | BN_MONT_CTX *new_mont = NULL; |
1261 | |
1262 | // Allocate a montgomery context if it was not supplied by the caller. |
1263 | if (mont == NULL) { |
1264 | new_mont = BN_MONT_CTX_new_for_modulus(m, ctx); |
1265 | if (new_mont == NULL) { |
1266 | goto err; |
1267 | } |
1268 | mont = new_mont; |
1269 | } |
1270 | |
1271 | // BN_mod_mul_montgomery removes one Montgomery factor, so passing one |
1272 | // Montgomery-encoded and one non-Montgomery-encoded value gives a |
1273 | // non-Montgomery-encoded result. |
1274 | if (!BN_mod_exp_mont(rr, a1, p1, m, ctx, mont) || |
1275 | !BN_mod_exp_mont(&tmp, a2, p2, m, ctx, mont) || |
1276 | !BN_to_montgomery(rr, rr, mont, ctx) || |
1277 | !BN_mod_mul_montgomery(rr, rr, &tmp, mont, ctx)) { |
1278 | goto err; |
1279 | } |
1280 | |
1281 | ret = 1; |
1282 | |
1283 | err: |
1284 | BN_MONT_CTX_free(new_mont); |
1285 | BN_free(&tmp); |
1286 | |
1287 | return ret; |
1288 | } |
1289 | |