1 | /* crypto/bn/bn_mul.c */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. |
4 | * |
5 | * This package is an SSL implementation written |
6 | * by Eric Young (eay@cryptsoft.com). |
7 | * The implementation was written so as to conform with Netscapes SSL. |
8 | * |
9 | * This library is free for commercial and non-commercial use as long as |
10 | * the following conditions are aheared to. The following conditions |
11 | * apply to all code found in this distribution, be it the RC4, RSA, |
12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation |
13 | * included with this distribution is covered by the same copyright terms |
14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). |
15 | * |
16 | * Copyright remains Eric Young's, and as such any Copyright notices in |
17 | * the code are not to be removed. |
18 | * If this package is used in a product, Eric Young should be given attribution |
19 | * as the author of the parts of the library used. |
20 | * This can be in the form of a textual message at program startup or |
21 | * in documentation (online or textual) provided with the package. |
22 | * |
23 | * Redistribution and use in source and binary forms, with or without |
24 | * modification, are permitted provided that the following conditions |
25 | * are met: |
26 | * 1. Redistributions of source code must retain the copyright |
27 | * notice, this list of conditions and the following disclaimer. |
28 | * 2. Redistributions in binary form must reproduce the above copyright |
29 | * notice, this list of conditions and the following disclaimer in the |
30 | * documentation and/or other materials provided with the distribution. |
31 | * 3. All advertising materials mentioning features or use of this software |
32 | * must display the following acknowledgement: |
33 | * "This product includes cryptographic software written by |
34 | * Eric Young (eay@cryptsoft.com)" |
35 | * The word 'cryptographic' can be left out if the rouines from the library |
36 | * being used are not cryptographic related :-). |
37 | * 4. If you include any Windows specific code (or a derivative thereof) from |
38 | * the apps directory (application code) you must include an acknowledgement: |
39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" |
40 | * |
41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND |
42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
51 | * SUCH DAMAGE. |
52 | * |
53 | * The licence and distribution terms for any publically available version or |
54 | * derivative of this code cannot be changed. i.e. this code cannot simply be |
55 | * copied and put under another distribution licence |
56 | * [including the GNU Public Licence.] |
57 | */ |
58 | |
59 | #ifndef BN_DEBUG |
60 | # undef NDEBUG /* avoid conflicting definitions */ |
61 | # define NDEBUG |
62 | #endif |
63 | |
64 | #include <stdio.h> |
65 | #include <assert.h> |
66 | #include "../bn/bn_lcl.h" |
67 | |
68 | #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) |
69 | /* |
70 | * Here follows specialised variants of bn_add_words() and bn_sub_words(). |
71 | * They have the property performing operations on arrays of different sizes. |
72 | * The sizes of those arrays is expressed through cl, which is the common |
73 | * length ( basicall, min(len(a),len(b)) ), and dl, which is the delta |
74 | * between the two lengths, calculated as len(a)-len(b). All lengths are the |
75 | * number of BN_ULONGs... For the operations that require a result array as |
76 | * parameter, it must have the length cl+abs(dl). These functions should |
77 | * probably end up in bn_asm.c as soon as there are assembler counterparts |
78 | * for the systems that use assembler files. |
79 | */ |
80 | |
81 | BN_ULONG bn_sub_part_words(BN_ULONG *r, |
82 | const BN_ULONG *a, const BN_ULONG *b, |
83 | int cl, int dl) |
84 | { |
85 | BN_ULONG c, t; |
86 | |
87 | assert(cl >= 0); |
88 | c = bn_sub_words(r, a, b, cl); |
89 | |
90 | if (dl == 0) |
91 | return c; |
92 | |
93 | r += cl; |
94 | a += cl; |
95 | b += cl; |
96 | |
97 | if (dl < 0) { |
98 | # ifdef BN_COUNT |
99 | fprintf(stderr, " bn_sub_part_words %d + %d (dl < 0, c = %d)\n" , cl, |
100 | dl, c); |
101 | # endif |
102 | for (;;) { |
103 | t = b[0]; |
104 | r[0] = (0 - t - c) & BN_MASK2; |
105 | if (t != 0) |
106 | c = 1; |
107 | if (++dl >= 0) |
108 | break; |
109 | |
110 | t = b[1]; |
111 | r[1] = (0 - t - c) & BN_MASK2; |
112 | if (t != 0) |
113 | c = 1; |
114 | if (++dl >= 0) |
115 | break; |
116 | |
117 | t = b[2]; |
118 | r[2] = (0 - t - c) & BN_MASK2; |
119 | if (t != 0) |
120 | c = 1; |
121 | if (++dl >= 0) |
122 | break; |
123 | |
124 | t = b[3]; |
125 | r[3] = (0 - t - c) & BN_MASK2; |
126 | if (t != 0) |
127 | c = 1; |
128 | if (++dl >= 0) |
129 | break; |
130 | |
131 | b += 4; |
132 | r += 4; |
133 | } |
134 | } else { |
135 | int save_dl = dl; |
136 | # ifdef BN_COUNT |
137 | fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c = %d)\n" , cl, |
138 | dl, c); |
139 | # endif |
140 | while (c) { |
141 | t = a[0]; |
142 | r[0] = (t - c) & BN_MASK2; |
143 | if (t != 0) |
144 | c = 0; |
145 | if (--dl <= 0) |
146 | break; |
147 | |
148 | t = a[1]; |
149 | r[1] = (t - c) & BN_MASK2; |
150 | if (t != 0) |
151 | c = 0; |
152 | if (--dl <= 0) |
153 | break; |
154 | |
155 | t = a[2]; |
156 | r[2] = (t - c) & BN_MASK2; |
157 | if (t != 0) |
158 | c = 0; |
159 | if (--dl <= 0) |
160 | break; |
161 | |
162 | t = a[3]; |
163 | r[3] = (t - c) & BN_MASK2; |
164 | if (t != 0) |
165 | c = 0; |
166 | if (--dl <= 0) |
167 | break; |
168 | |
169 | save_dl = dl; |
170 | a += 4; |
171 | r += 4; |
172 | } |
173 | if (dl > 0) { |
174 | # ifdef BN_COUNT |
175 | fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c == 0)\n" , |
176 | cl, dl); |
177 | # endif |
178 | if (save_dl > dl) { |
179 | switch (save_dl - dl) { |
180 | case 1: |
181 | r[1] = a[1]; |
182 | if (--dl <= 0) |
183 | break; |
184 | case 2: |
185 | r[2] = a[2]; |
186 | if (--dl <= 0) |
187 | break; |
188 | case 3: |
189 | r[3] = a[3]; |
190 | if (--dl <= 0) |
191 | break; |
192 | } |
193 | a += 4; |
194 | r += 4; |
195 | } |
196 | } |
197 | if (dl > 0) { |
198 | # ifdef BN_COUNT |
199 | fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, copy)\n" , |
200 | cl, dl); |
201 | # endif |
202 | for (;;) { |
203 | r[0] = a[0]; |
204 | if (--dl <= 0) |
205 | break; |
206 | r[1] = a[1]; |
207 | if (--dl <= 0) |
208 | break; |
209 | r[2] = a[2]; |
210 | if (--dl <= 0) |
211 | break; |
212 | r[3] = a[3]; |
213 | if (--dl <= 0) |
214 | break; |
215 | |
216 | a += 4; |
217 | r += 4; |
218 | } |
219 | } |
220 | } |
221 | return c; |
222 | } |
223 | #endif |
224 | |
225 | BN_ULONG bn_add_part_words(BN_ULONG *r, |
226 | const BN_ULONG *a, const BN_ULONG *b, |
227 | int cl, int dl) |
228 | { |
229 | BN_ULONG c, l, t; |
230 | |
231 | assert(cl >= 0); |
232 | c = bn_add_words(r, a, b, cl); |
233 | |
234 | if (dl == 0) |
235 | return c; |
236 | |
237 | r += cl; |
238 | a += cl; |
239 | b += cl; |
240 | |
241 | if (dl < 0) { |
242 | int save_dl = dl; |
243 | #ifdef BN_COUNT |
244 | fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c = %d)\n" , cl, |
245 | dl, c); |
246 | #endif |
247 | while (c) { |
248 | l = (c + b[0]) & BN_MASK2; |
249 | c = (l < c); |
250 | r[0] = l; |
251 | if (++dl >= 0) |
252 | break; |
253 | |
254 | l = (c + b[1]) & BN_MASK2; |
255 | c = (l < c); |
256 | r[1] = l; |
257 | if (++dl >= 0) |
258 | break; |
259 | |
260 | l = (c + b[2]) & BN_MASK2; |
261 | c = (l < c); |
262 | r[2] = l; |
263 | if (++dl >= 0) |
264 | break; |
265 | |
266 | l = (c + b[3]) & BN_MASK2; |
267 | c = (l < c); |
268 | r[3] = l; |
269 | if (++dl >= 0) |
270 | break; |
271 | |
272 | save_dl = dl; |
273 | b += 4; |
274 | r += 4; |
275 | } |
276 | if (dl < 0) { |
277 | #ifdef BN_COUNT |
278 | fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c == 0)\n" , |
279 | cl, dl); |
280 | #endif |
281 | if (save_dl < dl) { |
282 | switch (dl - save_dl) { |
283 | case 1: |
284 | r[1] = b[1]; |
285 | if (++dl >= 0) |
286 | break; |
287 | case 2: |
288 | r[2] = b[2]; |
289 | if (++dl >= 0) |
290 | break; |
291 | case 3: |
292 | r[3] = b[3]; |
293 | if (++dl >= 0) |
294 | break; |
295 | } |
296 | b += 4; |
297 | r += 4; |
298 | } |
299 | } |
300 | if (dl < 0) { |
301 | #ifdef BN_COUNT |
302 | fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, copy)\n" , |
303 | cl, dl); |
304 | #endif |
305 | for (;;) { |
306 | r[0] = b[0]; |
307 | if (++dl >= 0) |
308 | break; |
309 | r[1] = b[1]; |
310 | if (++dl >= 0) |
311 | break; |
312 | r[2] = b[2]; |
313 | if (++dl >= 0) |
314 | break; |
315 | r[3] = b[3]; |
316 | if (++dl >= 0) |
317 | break; |
318 | |
319 | b += 4; |
320 | r += 4; |
321 | } |
322 | } |
323 | } else { |
324 | int save_dl = dl; |
325 | #ifdef BN_COUNT |
326 | fprintf(stderr, " bn_add_part_words %d + %d (dl > 0)\n" , cl, dl); |
327 | #endif |
328 | while (c) { |
329 | t = (a[0] + c) & BN_MASK2; |
330 | c = (t < c); |
331 | r[0] = t; |
332 | if (--dl <= 0) |
333 | break; |
334 | |
335 | t = (a[1] + c) & BN_MASK2; |
336 | c = (t < c); |
337 | r[1] = t; |
338 | if (--dl <= 0) |
339 | break; |
340 | |
341 | t = (a[2] + c) & BN_MASK2; |
342 | c = (t < c); |
343 | r[2] = t; |
344 | if (--dl <= 0) |
345 | break; |
346 | |
347 | t = (a[3] + c) & BN_MASK2; |
348 | c = (t < c); |
349 | r[3] = t; |
350 | if (--dl <= 0) |
351 | break; |
352 | |
353 | save_dl = dl; |
354 | a += 4; |
355 | r += 4; |
356 | } |
357 | #ifdef BN_COUNT |
358 | fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, c == 0)\n" , cl, |
359 | dl); |
360 | #endif |
361 | if (dl > 0) { |
362 | if (save_dl > dl) { |
363 | switch (save_dl - dl) { |
364 | case 1: |
365 | r[1] = a[1]; |
366 | if (--dl <= 0) |
367 | break; |
368 | case 2: |
369 | r[2] = a[2]; |
370 | if (--dl <= 0) |
371 | break; |
372 | case 3: |
373 | r[3] = a[3]; |
374 | if (--dl <= 0) |
375 | break; |
376 | } |
377 | a += 4; |
378 | r += 4; |
379 | } |
380 | } |
381 | if (dl > 0) { |
382 | #ifdef BN_COUNT |
383 | fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, copy)\n" , |
384 | cl, dl); |
385 | #endif |
386 | for (;;) { |
387 | r[0] = a[0]; |
388 | if (--dl <= 0) |
389 | break; |
390 | r[1] = a[1]; |
391 | if (--dl <= 0) |
392 | break; |
393 | r[2] = a[2]; |
394 | if (--dl <= 0) |
395 | break; |
396 | r[3] = a[3]; |
397 | if (--dl <= 0) |
398 | break; |
399 | |
400 | a += 4; |
401 | r += 4; |
402 | } |
403 | } |
404 | } |
405 | return c; |
406 | } |
407 | |
408 | #ifdef BN_RECURSION |
409 | /* |
410 | * Karatsuba recursive multiplication algorithm (cf. Knuth, The Art of |
411 | * Computer Programming, Vol. 2) |
412 | */ |
413 | |
414 | /*- |
415 | * r is 2*n2 words in size, |
416 | * a and b are both n2 words in size. |
417 | * n2 must be a power of 2. |
418 | * We multiply and return the result. |
419 | * t must be 2*n2 words in size |
420 | * We calculate |
421 | * a[0]*b[0] |
422 | * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) |
423 | * a[1]*b[1] |
424 | */ |
425 | /* dnX may not be positive, but n2/2+dnX has to be */ |
426 | void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, |
427 | int dna, int dnb, BN_ULONG *t) |
428 | { |
429 | int n = n2 / 2, c1, c2; |
430 | int tna = n + dna, tnb = n + dnb; |
431 | unsigned int neg, zero; |
432 | BN_ULONG ln, lo, *p; |
433 | |
434 | # ifdef BN_COUNT |
435 | fprintf(stderr, " bn_mul_recursive %d%+d * %d%+d\n" , n2, dna, n2, dnb); |
436 | # endif |
437 | # ifdef BN_MUL_COMBA |
438 | # if 0 |
439 | if (n2 == 4) { |
440 | bn_mul_comba4(r, a, b); |
441 | return; |
442 | } |
443 | # endif |
444 | /* |
445 | * Only call bn_mul_comba 8 if n2 == 8 and the two arrays are complete |
446 | * [steve] |
447 | */ |
448 | if (n2 == 8 && dna == 0 && dnb == 0) { |
449 | bn_mul_comba8(r, a, b); |
450 | return; |
451 | } |
452 | # endif /* BN_MUL_COMBA */ |
453 | /* Else do normal multiply */ |
454 | if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) { |
455 | bn_mul_normal(r, a, n2 + dna, b, n2 + dnb); |
456 | if ((dna + dnb) < 0) |
457 | memset(&r[2 * n2 + dna + dnb], 0, |
458 | sizeof(BN_ULONG) * -(dna + dnb)); |
459 | return; |
460 | } |
461 | /* r=(a[0]-a[1])*(b[1]-b[0]) */ |
462 | c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); |
463 | c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n); |
464 | zero = neg = 0; |
465 | switch (c1 * 3 + c2) { |
466 | case -4: |
467 | bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ |
468 | bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ |
469 | break; |
470 | case -3: |
471 | zero = 1; |
472 | break; |
473 | case -2: |
474 | bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ |
475 | bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ |
476 | neg = 1; |
477 | break; |
478 | case -1: |
479 | case 0: |
480 | case 1: |
481 | zero = 1; |
482 | break; |
483 | case 2: |
484 | bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ |
485 | bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ |
486 | neg = 1; |
487 | break; |
488 | case 3: |
489 | zero = 1; |
490 | break; |
491 | case 4: |
492 | bn_sub_part_words(t, a, &(a[n]), tna, n - tna); |
493 | bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); |
494 | break; |
495 | } |
496 | |
497 | # ifdef BN_MUL_COMBA |
498 | if (n == 4 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba4 could take |
499 | * extra args to do this well */ |
500 | if (!zero) |
501 | bn_mul_comba4(&(t[n2]), t, &(t[n])); |
502 | else |
503 | memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG)); |
504 | |
505 | bn_mul_comba4(r, a, b); |
506 | bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n])); |
507 | } else if (n == 8 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba8 could |
508 | * take extra args to do |
509 | * this well */ |
510 | if (!zero) |
511 | bn_mul_comba8(&(t[n2]), t, &(t[n])); |
512 | else |
513 | memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG)); |
514 | |
515 | bn_mul_comba8(r, a, b); |
516 | bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n])); |
517 | } else |
518 | # endif /* BN_MUL_COMBA */ |
519 | { |
520 | p = &(t[n2 * 2]); |
521 | if (!zero) |
522 | bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); |
523 | else |
524 | memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG)); |
525 | bn_mul_recursive(r, a, b, n, 0, 0, p); |
526 | bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p); |
527 | } |
528 | |
529 | /*- |
530 | * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign |
531 | * r[10] holds (a[0]*b[0]) |
532 | * r[32] holds (b[1]*b[1]) |
533 | */ |
534 | |
535 | c1 = (int)(bn_add_words(t, r, &(r[n2]), n2)); |
536 | |
537 | if (neg) { /* if t[32] is negative */ |
538 | c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2)); |
539 | } else { |
540 | /* Might have a carry */ |
541 | c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); |
542 | } |
543 | |
544 | /*- |
545 | * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) |
546 | * r[10] holds (a[0]*b[0]) |
547 | * r[32] holds (b[1]*b[1]) |
548 | * c1 holds the carry bits |
549 | */ |
550 | c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); |
551 | if (c1) { |
552 | p = &(r[n + n2]); |
553 | lo = *p; |
554 | ln = (lo + c1) & BN_MASK2; |
555 | *p = ln; |
556 | |
557 | /* |
558 | * The overflow will stop before we over write words we should not |
559 | * overwrite |
560 | */ |
561 | if (ln < (BN_ULONG)c1) { |
562 | do { |
563 | p++; |
564 | lo = *p; |
565 | ln = (lo + 1) & BN_MASK2; |
566 | *p = ln; |
567 | } while (ln == 0); |
568 | } |
569 | } |
570 | } |
571 | |
572 | /* |
573 | * n+tn is the word length t needs to be n*4 is size, as does r |
574 | */ |
575 | /* tnX may not be negative but less than n */ |
576 | void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, |
577 | int tna, int tnb, BN_ULONG *t) |
578 | { |
579 | int i, j, n2 = n * 2; |
580 | int c1, c2, neg; |
581 | BN_ULONG ln, lo, *p; |
582 | |
583 | # ifdef BN_COUNT |
584 | fprintf(stderr, " bn_mul_part_recursive (%d%+d) * (%d%+d)\n" , |
585 | n, tna, n, tnb); |
586 | # endif |
587 | if (n < 8) { |
588 | bn_mul_normal(r, a, n + tna, b, n + tnb); |
589 | return; |
590 | } |
591 | |
592 | /* r=(a[0]-a[1])*(b[1]-b[0]) */ |
593 | c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); |
594 | c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n); |
595 | neg = 0; |
596 | switch (c1 * 3 + c2) { |
597 | case -4: |
598 | bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ |
599 | bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ |
600 | break; |
601 | case -3: |
602 | /* break; */ |
603 | case -2: |
604 | bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ |
605 | bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ |
606 | neg = 1; |
607 | break; |
608 | case -1: |
609 | case 0: |
610 | case 1: |
611 | /* break; */ |
612 | case 2: |
613 | bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ |
614 | bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ |
615 | neg = 1; |
616 | break; |
617 | case 3: |
618 | /* break; */ |
619 | case 4: |
620 | bn_sub_part_words(t, a, &(a[n]), tna, n - tna); |
621 | bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); |
622 | break; |
623 | } |
624 | /* |
625 | * The zero case isn't yet implemented here. The speedup would probably |
626 | * be negligible. |
627 | */ |
628 | # if 0 |
629 | if (n == 4) { |
630 | bn_mul_comba4(&(t[n2]), t, &(t[n])); |
631 | bn_mul_comba4(r, a, b); |
632 | bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn); |
633 | memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2)); |
634 | } else |
635 | # endif |
636 | if (n == 8) { |
637 | bn_mul_comba8(&(t[n2]), t, &(t[n])); |
638 | bn_mul_comba8(r, a, b); |
639 | bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); |
640 | memset(&(r[n2 + tna + tnb]), 0, sizeof(BN_ULONG) * (n2 - tna - tnb)); |
641 | } else { |
642 | p = &(t[n2 * 2]); |
643 | bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); |
644 | bn_mul_recursive(r, a, b, n, 0, 0, p); |
645 | i = n / 2; |
646 | /* |
647 | * If there is only a bottom half to the number, just do it |
648 | */ |
649 | if (tna > tnb) |
650 | j = tna - i; |
651 | else |
652 | j = tnb - i; |
653 | if (j == 0) { |
654 | bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), |
655 | i, tna - i, tnb - i, p); |
656 | memset(&(r[n2 + i * 2]), 0, sizeof(BN_ULONG) * (n2 - i * 2)); |
657 | } else if (j > 0) { /* eg, n == 16, i == 8 and tn == 11 */ |
658 | bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), |
659 | i, tna - i, tnb - i, p); |
660 | memset(&(r[n2 + tna + tnb]), 0, |
661 | sizeof(BN_ULONG) * (n2 - tna - tnb)); |
662 | } else { /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ |
663 | |
664 | memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2); |
665 | if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL |
666 | && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) { |
667 | bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); |
668 | } else { |
669 | for (;;) { |
670 | i /= 2; |
671 | /* |
672 | * these simplified conditions work exclusively because |
673 | * difference between tna and tnb is 1 or 0 |
674 | */ |
675 | if (i < tna || i < tnb) { |
676 | bn_mul_part_recursive(&(r[n2]), |
677 | &(a[n]), &(b[n]), |
678 | i, tna - i, tnb - i, p); |
679 | break; |
680 | } else if (i == tna || i == tnb) { |
681 | bn_mul_recursive(&(r[n2]), |
682 | &(a[n]), &(b[n]), |
683 | i, tna - i, tnb - i, p); |
684 | break; |
685 | } |
686 | } |
687 | } |
688 | } |
689 | } |
690 | |
691 | /*- |
692 | * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign |
693 | * r[10] holds (a[0]*b[0]) |
694 | * r[32] holds (b[1]*b[1]) |
695 | */ |
696 | |
697 | c1 = (int)(bn_add_words(t, r, &(r[n2]), n2)); |
698 | |
699 | if (neg) { /* if t[32] is negative */ |
700 | c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2)); |
701 | } else { |
702 | /* Might have a carry */ |
703 | c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); |
704 | } |
705 | |
706 | /*- |
707 | * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) |
708 | * r[10] holds (a[0]*b[0]) |
709 | * r[32] holds (b[1]*b[1]) |
710 | * c1 holds the carry bits |
711 | */ |
712 | c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); |
713 | if (c1) { |
714 | p = &(r[n + n2]); |
715 | lo = *p; |
716 | ln = (lo + c1) & BN_MASK2; |
717 | *p = ln; |
718 | |
719 | /* |
720 | * The overflow will stop before we over write words we should not |
721 | * overwrite |
722 | */ |
723 | if (ln < (BN_ULONG)c1) { |
724 | do { |
725 | p++; |
726 | lo = *p; |
727 | ln = (lo + 1) & BN_MASK2; |
728 | *p = ln; |
729 | } while (ln == 0); |
730 | } |
731 | } |
732 | } |
733 | |
734 | /*- |
735 | * a and b must be the same size, which is n2. |
736 | * r needs to be n2 words and t needs to be n2*2 |
737 | */ |
738 | void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, |
739 | BN_ULONG *t) |
740 | { |
741 | int n = n2 / 2; |
742 | |
743 | # ifdef BN_COUNT |
744 | fprintf(stderr, " bn_mul_low_recursive %d * %d\n" , n2, n2); |
745 | # endif |
746 | |
747 | bn_mul_recursive(r, a, b, n, 0, 0, &(t[0])); |
748 | if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) { |
749 | bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2])); |
750 | bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); |
751 | bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2])); |
752 | bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); |
753 | } else { |
754 | bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n); |
755 | bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n); |
756 | bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); |
757 | bn_add_words(&(r[n]), &(r[n]), &(t[n]), n); |
758 | } |
759 | } |
760 | |
761 | /*- |
762 | * a and b must be the same size, which is n2. |
763 | * r needs to be n2 words and t needs to be n2*2 |
764 | * l is the low words of the output. |
765 | * t needs to be n2*3 |
766 | */ |
767 | void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, |
768 | BN_ULONG *t) |
769 | { |
770 | int i, n; |
771 | int c1, c2; |
772 | int neg, oneg, zero; |
773 | BN_ULONG ll, lc, *lp, *mp; |
774 | |
775 | # ifdef BN_COUNT |
776 | fprintf(stderr, " bn_mul_high %d * %d\n" , n2, n2); |
777 | # endif |
778 | n = n2 / 2; |
779 | |
780 | /* Calculate (al-ah)*(bh-bl) */ |
781 | neg = zero = 0; |
782 | c1 = bn_cmp_words(&(a[0]), &(a[n]), n); |
783 | c2 = bn_cmp_words(&(b[n]), &(b[0]), n); |
784 | switch (c1 * 3 + c2) { |
785 | case -4: |
786 | bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n); |
787 | bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n); |
788 | break; |
789 | case -3: |
790 | zero = 1; |
791 | break; |
792 | case -2: |
793 | bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n); |
794 | bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n); |
795 | neg = 1; |
796 | break; |
797 | case -1: |
798 | case 0: |
799 | case 1: |
800 | zero = 1; |
801 | break; |
802 | case 2: |
803 | bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n); |
804 | bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n); |
805 | neg = 1; |
806 | break; |
807 | case 3: |
808 | zero = 1; |
809 | break; |
810 | case 4: |
811 | bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n); |
812 | bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n); |
813 | break; |
814 | } |
815 | |
816 | oneg = neg; |
817 | /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */ |
818 | /* r[10] = (a[1]*b[1]) */ |
819 | # ifdef BN_MUL_COMBA |
820 | if (n == 8) { |
821 | bn_mul_comba8(&(t[0]), &(r[0]), &(r[n])); |
822 | bn_mul_comba8(r, &(a[n]), &(b[n])); |
823 | } else |
824 | # endif |
825 | { |
826 | bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2])); |
827 | bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2])); |
828 | } |
829 | |
830 | /*- |
831 | * s0 == low(al*bl) |
832 | * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl) |
833 | * We know s0 and s1 so the only unknown is high(al*bl) |
834 | * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl)) |
835 | * high(al*bl) == s1 - (r[0]+l[0]+t[0]) |
836 | */ |
837 | if (l != NULL) { |
838 | lp = &(t[n2 + n]); |
839 | c1 = (int)(bn_add_words(lp, &(r[0]), &(l[0]), n)); |
840 | } else { |
841 | c1 = 0; |
842 | lp = &(r[0]); |
843 | } |
844 | |
845 | if (neg) |
846 | neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n)); |
847 | else { |
848 | bn_add_words(&(t[n2]), lp, &(t[0]), n); |
849 | neg = 0; |
850 | } |
851 | |
852 | if (l != NULL) { |
853 | bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n); |
854 | } else { |
855 | lp = &(t[n2 + n]); |
856 | mp = &(t[n2]); |
857 | for (i = 0; i < n; i++) |
858 | lp[i] = ((~mp[i]) + 1) & BN_MASK2; |
859 | } |
860 | |
861 | /*- |
862 | * s[0] = low(al*bl) |
863 | * t[3] = high(al*bl) |
864 | * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign |
865 | * r[10] = (a[1]*b[1]) |
866 | */ |
867 | /*- |
868 | * R[10] = al*bl |
869 | * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0]) |
870 | * R[32] = ah*bh |
871 | */ |
872 | /*- |
873 | * R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow) |
874 | * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow) |
875 | * R[3]=r[1]+(carry/borrow) |
876 | */ |
877 | if (l != NULL) { |
878 | lp = &(t[n2]); |
879 | c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n)); |
880 | } else { |
881 | lp = &(t[n2 + n]); |
882 | c1 = 0; |
883 | } |
884 | c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n)); |
885 | if (oneg) |
886 | c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n)); |
887 | else |
888 | c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n)); |
889 | |
890 | c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n)); |
891 | c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n)); |
892 | if (oneg) |
893 | c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n)); |
894 | else |
895 | c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n)); |
896 | |
897 | if (c1 != 0) { /* Add starting at r[0], could be +ve or -ve */ |
898 | i = 0; |
899 | if (c1 > 0) { |
900 | lc = c1; |
901 | do { |
902 | ll = (r[i] + lc) & BN_MASK2; |
903 | r[i++] = ll; |
904 | lc = (lc > ll); |
905 | } while (lc); |
906 | } else { |
907 | lc = -c1; |
908 | do { |
909 | ll = r[i]; |
910 | r[i++] = (ll - lc) & BN_MASK2; |
911 | lc = (lc > ll); |
912 | } while (lc); |
913 | } |
914 | } |
915 | if (c2 != 0) { /* Add starting at r[1] */ |
916 | i = n; |
917 | if (c2 > 0) { |
918 | lc = c2; |
919 | do { |
920 | ll = (r[i] + lc) & BN_MASK2; |
921 | r[i++] = ll; |
922 | lc = (lc > ll); |
923 | } while (lc); |
924 | } else { |
925 | lc = -c2; |
926 | do { |
927 | ll = r[i]; |
928 | r[i++] = (ll - lc) & BN_MASK2; |
929 | lc = (lc > ll); |
930 | } while (lc); |
931 | } |
932 | } |
933 | } |
934 | #endif /* BN_RECURSION */ |
935 | |
936 | int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) |
937 | { |
938 | int ret = 0; |
939 | int top, al, bl; |
940 | BIGNUM *rr; |
941 | #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) |
942 | int i; |
943 | #endif |
944 | #ifdef BN_RECURSION |
945 | BIGNUM *t = NULL; |
946 | int j = 0, k; |
947 | #endif |
948 | |
949 | #ifdef BN_COUNT |
950 | fprintf(stderr, "BN_mul %d * %d\n" , a->top, b->top); |
951 | #endif |
952 | |
953 | bn_check_top(a); |
954 | bn_check_top(b); |
955 | bn_check_top(r); |
956 | |
957 | al = a->top; |
958 | bl = b->top; |
959 | |
960 | if ((al == 0) || (bl == 0)) { |
961 | BN_zero(r); |
962 | return (1); |
963 | } |
964 | top = al + bl; |
965 | |
966 | BN_CTX_start(ctx); |
967 | if ((r == a) || (r == b)) { |
968 | if ((rr = BN_CTX_get(ctx)) == NULL) |
969 | goto err; |
970 | } else |
971 | rr = r; |
972 | rr->neg = a->neg ^ b->neg; |
973 | |
974 | #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) |
975 | i = al - bl; |
976 | #endif |
977 | #ifdef BN_MUL_COMBA |
978 | if (i == 0) { |
979 | # if 0 |
980 | if (al == 4) { |
981 | if (bn_wexpand(rr, 8) == NULL) |
982 | goto err; |
983 | rr->top = 8; |
984 | bn_mul_comba4(rr->d, a->d, b->d); |
985 | goto end; |
986 | } |
987 | # endif |
988 | if (al == 8) { |
989 | if (bn_wexpand(rr, 16) == NULL) |
990 | goto err; |
991 | rr->top = 16; |
992 | bn_mul_comba8(rr->d, a->d, b->d); |
993 | goto end; |
994 | } |
995 | } |
996 | #endif /* BN_MUL_COMBA */ |
997 | #ifdef BN_RECURSION |
998 | if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) { |
999 | if (i >= -1 && i <= 1) { |
1000 | /* |
1001 | * Find out the power of two lower or equal to the longest of the |
1002 | * two numbers |
1003 | */ |
1004 | if (i >= 0) { |
1005 | j = BN_num_bits_word((BN_ULONG)al); |
1006 | } |
1007 | if (i == -1) { |
1008 | j = BN_num_bits_word((BN_ULONG)bl); |
1009 | } |
1010 | j = 1 << (j - 1); |
1011 | assert(j <= al || j <= bl); |
1012 | k = j + j; |
1013 | t = BN_CTX_get(ctx); |
1014 | if (t == NULL) |
1015 | goto err; |
1016 | if (al > j || bl > j) { |
1017 | if (bn_wexpand(t, k * 4) == NULL) |
1018 | goto err; |
1019 | if (bn_wexpand(rr, k * 4) == NULL) |
1020 | goto err; |
1021 | bn_mul_part_recursive(rr->d, a->d, b->d, |
1022 | j, al - j, bl - j, t->d); |
1023 | } else { /* al <= j || bl <= j */ |
1024 | |
1025 | if (bn_wexpand(t, k * 2) == NULL) |
1026 | goto err; |
1027 | if (bn_wexpand(rr, k * 2) == NULL) |
1028 | goto err; |
1029 | bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d); |
1030 | } |
1031 | rr->top = top; |
1032 | goto end; |
1033 | } |
1034 | # if 0 |
1035 | if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) { |
1036 | BIGNUM *tmp_bn = (BIGNUM *)b; |
1037 | if (bn_wexpand(tmp_bn, al) == NULL) |
1038 | goto err; |
1039 | tmp_bn->d[bl] = 0; |
1040 | bl++; |
1041 | i--; |
1042 | } else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) { |
1043 | BIGNUM *tmp_bn = (BIGNUM *)a; |
1044 | if (bn_wexpand(tmp_bn, bl) == NULL) |
1045 | goto err; |
1046 | tmp_bn->d[al] = 0; |
1047 | al++; |
1048 | i++; |
1049 | } |
1050 | if (i == 0) { |
1051 | /* symmetric and > 4 */ |
1052 | /* 16 or larger */ |
1053 | j = BN_num_bits_word((BN_ULONG)al); |
1054 | j = 1 << (j - 1); |
1055 | k = j + j; |
1056 | t = BN_CTX_get(ctx); |
1057 | if (al == j) { /* exact multiple */ |
1058 | if (bn_wexpand(t, k * 2) == NULL) |
1059 | goto err; |
1060 | if (bn_wexpand(rr, k * 2) == NULL) |
1061 | goto err; |
1062 | bn_mul_recursive(rr->d, a->d, b->d, al, t->d); |
1063 | } else { |
1064 | if (bn_wexpand(t, k * 4) == NULL) |
1065 | goto err; |
1066 | if (bn_wexpand(rr, k * 4) == NULL) |
1067 | goto err; |
1068 | bn_mul_part_recursive(rr->d, a->d, b->d, al - j, j, t->d); |
1069 | } |
1070 | rr->top = top; |
1071 | goto end; |
1072 | } |
1073 | # endif |
1074 | } |
1075 | #endif /* BN_RECURSION */ |
1076 | if (bn_wexpand(rr, top) == NULL) |
1077 | goto err; |
1078 | rr->top = top; |
1079 | bn_mul_normal(rr->d, a->d, al, b->d, bl); |
1080 | |
1081 | #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) |
1082 | end: |
1083 | #endif |
1084 | bn_correct_top(rr); |
1085 | if (r != rr && BN_copy(r, rr) == NULL) |
1086 | goto err; |
1087 | |
1088 | ret = 1; |
1089 | err: |
1090 | bn_check_top(r); |
1091 | BN_CTX_end(ctx); |
1092 | return (ret); |
1093 | } |
1094 | |
1095 | void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) |
1096 | { |
1097 | BN_ULONG *rr; |
1098 | |
1099 | #ifdef BN_COUNT |
1100 | fprintf(stderr, " bn_mul_normal %d * %d\n" , na, nb); |
1101 | #endif |
1102 | |
1103 | if (na < nb) { |
1104 | int itmp; |
1105 | BN_ULONG *ltmp; |
1106 | |
1107 | itmp = na; |
1108 | na = nb; |
1109 | nb = itmp; |
1110 | ltmp = a; |
1111 | a = b; |
1112 | b = ltmp; |
1113 | |
1114 | } |
1115 | rr = &(r[na]); |
1116 | if (nb <= 0) { |
1117 | (void)bn_mul_words(r, a, na, 0); |
1118 | return; |
1119 | } else |
1120 | rr[0] = bn_mul_words(r, a, na, b[0]); |
1121 | |
1122 | for (;;) { |
1123 | if (--nb <= 0) |
1124 | return; |
1125 | rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]); |
1126 | if (--nb <= 0) |
1127 | return; |
1128 | rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]); |
1129 | if (--nb <= 0) |
1130 | return; |
1131 | rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]); |
1132 | if (--nb <= 0) |
1133 | return; |
1134 | rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]); |
1135 | rr += 4; |
1136 | r += 4; |
1137 | b += 4; |
1138 | } |
1139 | } |
1140 | |
1141 | void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) |
1142 | { |
1143 | #ifdef BN_COUNT |
1144 | fprintf(stderr, " bn_mul_low_normal %d * %d\n" , n, n); |
1145 | #endif |
1146 | bn_mul_words(r, a, n, b[0]); |
1147 | |
1148 | for (;;) { |
1149 | if (--n <= 0) |
1150 | return; |
1151 | bn_mul_add_words(&(r[1]), a, n, b[1]); |
1152 | if (--n <= 0) |
1153 | return; |
1154 | bn_mul_add_words(&(r[2]), a, n, b[2]); |
1155 | if (--n <= 0) |
1156 | return; |
1157 | bn_mul_add_words(&(r[3]), a, n, b[3]); |
1158 | if (--n <= 0) |
1159 | return; |
1160 | bn_mul_add_words(&(r[4]), a, n, b[4]); |
1161 | r += 4; |
1162 | b += 4; |
1163 | } |
1164 | } |
1165 | |