1 | /* crypto/bn/bn_asm.c */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. |
4 | * |
5 | * This package is an SSL implementation written |
6 | * by Eric Young (eay@cryptsoft.com). |
7 | * The implementation was written so as to conform with Netscapes SSL. |
8 | * |
9 | * This library is free for commercial and non-commercial use as long as |
10 | * the following conditions are aheared to. The following conditions |
11 | * apply to all code found in this distribution, be it the RC4, RSA, |
12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation |
13 | * included with this distribution is covered by the same copyright terms |
14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). |
15 | * |
16 | * Copyright remains Eric Young's, and as such any Copyright notices in |
17 | * the code are not to be removed. |
18 | * If this package is used in a product, Eric Young should be given attribution |
19 | * as the author of the parts of the library used. |
20 | * This can be in the form of a textual message at program startup or |
21 | * in documentation (online or textual) provided with the package. |
22 | * |
23 | * Redistribution and use in source and binary forms, with or without |
24 | * modification, are permitted provided that the following conditions |
25 | * are met: |
26 | * 1. Redistributions of source code must retain the copyright |
27 | * notice, this list of conditions and the following disclaimer. |
28 | * 2. Redistributions in binary form must reproduce the above copyright |
29 | * notice, this list of conditions and the following disclaimer in the |
30 | * documentation and/or other materials provided with the distribution. |
31 | * 3. All advertising materials mentioning features or use of this software |
32 | * must display the following acknowledgement: |
33 | * "This product includes cryptographic software written by |
34 | * Eric Young (eay@cryptsoft.com)" |
35 | * The word 'cryptographic' can be left out if the rouines from the library |
36 | * being used are not cryptographic related :-). |
37 | * 4. If you include any Windows specific code (or a derivative thereof) from |
38 | * the apps directory (application code) you must include an acknowledgement: |
39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" |
40 | * |
41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND |
42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
51 | * SUCH DAMAGE. |
52 | * |
53 | * The licence and distribution terms for any publically available version or |
54 | * derivative of this code cannot be changed. i.e. this code cannot simply be |
55 | * copied and put under another distribution licence |
56 | * [including the GNU Public Licence.] |
57 | */ |
58 | |
59 | #ifndef BN_DEBUG |
60 | # undef NDEBUG /* avoid conflicting definitions */ |
61 | # define NDEBUG |
62 | #endif |
63 | |
64 | #include <stdio.h> |
65 | #include <assert.h> |
66 | #include "../bn/bn_lcl.h" |
67 | |
68 | #if defined(BN_LLONG) || defined(BN_UMULT_HIGH) |
69 | |
70 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, |
71 | BN_ULONG w) |
72 | { |
73 | BN_ULONG c1 = 0; |
74 | |
75 | assert(num >= 0); |
76 | if (num <= 0) |
77 | return (c1); |
78 | |
79 | # ifndef OPENSSL_SMALL_FOOTPRINT |
80 | while (num & ~3) { |
81 | mul_add(rp[0], ap[0], w, c1); |
82 | mul_add(rp[1], ap[1], w, c1); |
83 | mul_add(rp[2], ap[2], w, c1); |
84 | mul_add(rp[3], ap[3], w, c1); |
85 | ap += 4; |
86 | rp += 4; |
87 | num -= 4; |
88 | } |
89 | # endif |
90 | while (num) { |
91 | mul_add(rp[0], ap[0], w, c1); |
92 | ap++; |
93 | rp++; |
94 | num--; |
95 | } |
96 | |
97 | return (c1); |
98 | } |
99 | |
100 | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) |
101 | { |
102 | BN_ULONG c1 = 0; |
103 | |
104 | assert(num >= 0); |
105 | if (num <= 0) |
106 | return (c1); |
107 | |
108 | # ifndef OPENSSL_SMALL_FOOTPRINT |
109 | while (num & ~3) { |
110 | mul(rp[0], ap[0], w, c1); |
111 | mul(rp[1], ap[1], w, c1); |
112 | mul(rp[2], ap[2], w, c1); |
113 | mul(rp[3], ap[3], w, c1); |
114 | ap += 4; |
115 | rp += 4; |
116 | num -= 4; |
117 | } |
118 | # endif |
119 | while (num) { |
120 | mul(rp[0], ap[0], w, c1); |
121 | ap++; |
122 | rp++; |
123 | num--; |
124 | } |
125 | return (c1); |
126 | } |
127 | |
128 | void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) |
129 | { |
130 | assert(n >= 0); |
131 | if (n <= 0) |
132 | return; |
133 | |
134 | # ifndef OPENSSL_SMALL_FOOTPRINT |
135 | while (n & ~3) { |
136 | sqr(r[0], r[1], a[0]); |
137 | sqr(r[2], r[3], a[1]); |
138 | sqr(r[4], r[5], a[2]); |
139 | sqr(r[6], r[7], a[3]); |
140 | a += 4; |
141 | r += 8; |
142 | n -= 4; |
143 | } |
144 | # endif |
145 | while (n) { |
146 | sqr(r[0], r[1], a[0]); |
147 | a++; |
148 | r += 2; |
149 | n--; |
150 | } |
151 | } |
152 | |
153 | #else /* !(defined(BN_LLONG) || |
154 | * defined(BN_UMULT_HIGH)) */ |
155 | |
156 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, |
157 | BN_ULONG w) |
158 | { |
159 | BN_ULONG c = 0; |
160 | BN_ULONG bl, bh; |
161 | |
162 | assert(num >= 0); |
163 | if (num <= 0) |
164 | return ((BN_ULONG)0); |
165 | |
166 | bl = LBITS(w); |
167 | bh = HBITS(w); |
168 | |
169 | # ifndef OPENSSL_SMALL_FOOTPRINT |
170 | while (num & ~3) { |
171 | mul_add(rp[0], ap[0], bl, bh, c); |
172 | mul_add(rp[1], ap[1], bl, bh, c); |
173 | mul_add(rp[2], ap[2], bl, bh, c); |
174 | mul_add(rp[3], ap[3], bl, bh, c); |
175 | ap += 4; |
176 | rp += 4; |
177 | num -= 4; |
178 | } |
179 | # endif |
180 | while (num) { |
181 | mul_add(rp[0], ap[0], bl, bh, c); |
182 | ap++; |
183 | rp++; |
184 | num--; |
185 | } |
186 | return (c); |
187 | } |
188 | |
189 | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) |
190 | { |
191 | BN_ULONG carry = 0; |
192 | BN_ULONG bl, bh; |
193 | |
194 | assert(num >= 0); |
195 | if (num <= 0) |
196 | return ((BN_ULONG)0); |
197 | |
198 | bl = LBITS(w); |
199 | bh = HBITS(w); |
200 | |
201 | # ifndef OPENSSL_SMALL_FOOTPRINT |
202 | while (num & ~3) { |
203 | mul(rp[0], ap[0], bl, bh, carry); |
204 | mul(rp[1], ap[1], bl, bh, carry); |
205 | mul(rp[2], ap[2], bl, bh, carry); |
206 | mul(rp[3], ap[3], bl, bh, carry); |
207 | ap += 4; |
208 | rp += 4; |
209 | num -= 4; |
210 | } |
211 | # endif |
212 | while (num) { |
213 | mul(rp[0], ap[0], bl, bh, carry); |
214 | ap++; |
215 | rp++; |
216 | num--; |
217 | } |
218 | return (carry); |
219 | } |
220 | |
221 | void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) |
222 | { |
223 | assert(n >= 0); |
224 | if (n <= 0) |
225 | return; |
226 | |
227 | # ifndef OPENSSL_SMALL_FOOTPRINT |
228 | while (n & ~3) { |
229 | sqr64(r[0], r[1], a[0]); |
230 | sqr64(r[2], r[3], a[1]); |
231 | sqr64(r[4], r[5], a[2]); |
232 | sqr64(r[6], r[7], a[3]); |
233 | a += 4; |
234 | r += 8; |
235 | n -= 4; |
236 | } |
237 | # endif |
238 | while (n) { |
239 | sqr64(r[0], r[1], a[0]); |
240 | a++; |
241 | r += 2; |
242 | n--; |
243 | } |
244 | } |
245 | |
246 | #endif /* !(defined(BN_LLONG) || |
247 | * defined(BN_UMULT_HIGH)) */ |
248 | |
249 | #if defined(BN_LLONG) && defined(BN_DIV2W) |
250 | |
251 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) |
252 | { |
253 | return ((BN_ULONG)(((((BN_ULLONG) h) << BN_BITS2) | l) / (BN_ULLONG) d)); |
254 | } |
255 | |
256 | #else |
257 | |
258 | /* Divide h,l by d and return the result. */ |
259 | /* I need to test this some more :-( */ |
260 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) |
261 | { |
262 | BN_ULONG dh, dl, q, ret = 0, th, tl, t; |
263 | int i, count = 2; |
264 | |
265 | if (d == 0) |
266 | return (BN_MASK2); |
267 | |
268 | i = BN_num_bits_word(d); |
269 | assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i)); |
270 | |
271 | i = BN_BITS2 - i; |
272 | if (h >= d) |
273 | h -= d; |
274 | |
275 | if (i) { |
276 | d <<= i; |
277 | h = (h << i) | (l >> (BN_BITS2 - i)); |
278 | l <<= i; |
279 | } |
280 | dh = (d & BN_MASK2h) >> BN_BITS4; |
281 | dl = (d & BN_MASK2l); |
282 | for (;;) { |
283 | if ((h >> BN_BITS4) == dh) |
284 | q = BN_MASK2l; |
285 | else |
286 | q = h / dh; |
287 | |
288 | th = q * dh; |
289 | tl = dl * q; |
290 | for (;;) { |
291 | t = h - th; |
292 | if ((t & BN_MASK2h) || |
293 | ((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4)))) |
294 | break; |
295 | q--; |
296 | th -= dh; |
297 | tl -= dl; |
298 | } |
299 | t = (tl >> BN_BITS4); |
300 | tl = (tl << BN_BITS4) & BN_MASK2h; |
301 | th += t; |
302 | |
303 | if (l < tl) |
304 | th++; |
305 | l -= tl; |
306 | if (h < th) { |
307 | h += d; |
308 | q--; |
309 | } |
310 | h -= th; |
311 | |
312 | if (--count == 0) |
313 | break; |
314 | |
315 | ret = q << BN_BITS4; |
316 | h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2; |
317 | l = (l & BN_MASK2l) << BN_BITS4; |
318 | } |
319 | ret |= q; |
320 | return (ret); |
321 | } |
322 | #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ |
323 | |
324 | #ifdef BN_LLONG |
325 | BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, |
326 | int n) |
327 | { |
328 | BN_ULLONG ll = 0; |
329 | |
330 | assert(n >= 0); |
331 | if (n <= 0) |
332 | return ((BN_ULONG)0); |
333 | |
334 | # ifndef OPENSSL_SMALL_FOOTPRINT |
335 | while (n & ~3) { |
336 | ll += (BN_ULLONG) a[0] + b[0]; |
337 | r[0] = (BN_ULONG)ll & BN_MASK2; |
338 | ll >>= BN_BITS2; |
339 | ll += (BN_ULLONG) a[1] + b[1]; |
340 | r[1] = (BN_ULONG)ll & BN_MASK2; |
341 | ll >>= BN_BITS2; |
342 | ll += (BN_ULLONG) a[2] + b[2]; |
343 | r[2] = (BN_ULONG)ll & BN_MASK2; |
344 | ll >>= BN_BITS2; |
345 | ll += (BN_ULLONG) a[3] + b[3]; |
346 | r[3] = (BN_ULONG)ll & BN_MASK2; |
347 | ll >>= BN_BITS2; |
348 | a += 4; |
349 | b += 4; |
350 | r += 4; |
351 | n -= 4; |
352 | } |
353 | # endif |
354 | while (n) { |
355 | ll += (BN_ULLONG) a[0] + b[0]; |
356 | r[0] = (BN_ULONG)ll & BN_MASK2; |
357 | ll >>= BN_BITS2; |
358 | a++; |
359 | b++; |
360 | r++; |
361 | n--; |
362 | } |
363 | return ((BN_ULONG)ll); |
364 | } |
365 | #else /* !BN_LLONG */ |
366 | BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, |
367 | int n) |
368 | { |
369 | BN_ULONG c, l, t; |
370 | |
371 | assert(n >= 0); |
372 | if (n <= 0) |
373 | return ((BN_ULONG)0); |
374 | |
375 | c = 0; |
376 | # ifndef OPENSSL_SMALL_FOOTPRINT |
377 | while (n & ~3) { |
378 | t = a[0]; |
379 | t = (t + c) & BN_MASK2; |
380 | c = (t < c); |
381 | l = (t + b[0]) & BN_MASK2; |
382 | c += (l < t); |
383 | r[0] = l; |
384 | t = a[1]; |
385 | t = (t + c) & BN_MASK2; |
386 | c = (t < c); |
387 | l = (t + b[1]) & BN_MASK2; |
388 | c += (l < t); |
389 | r[1] = l; |
390 | t = a[2]; |
391 | t = (t + c) & BN_MASK2; |
392 | c = (t < c); |
393 | l = (t + b[2]) & BN_MASK2; |
394 | c += (l < t); |
395 | r[2] = l; |
396 | t = a[3]; |
397 | t = (t + c) & BN_MASK2; |
398 | c = (t < c); |
399 | l = (t + b[3]) & BN_MASK2; |
400 | c += (l < t); |
401 | r[3] = l; |
402 | a += 4; |
403 | b += 4; |
404 | r += 4; |
405 | n -= 4; |
406 | } |
407 | # endif |
408 | while (n) { |
409 | t = a[0]; |
410 | t = (t + c) & BN_MASK2; |
411 | c = (t < c); |
412 | l = (t + b[0]) & BN_MASK2; |
413 | c += (l < t); |
414 | r[0] = l; |
415 | a++; |
416 | b++; |
417 | r++; |
418 | n--; |
419 | } |
420 | return ((BN_ULONG)c); |
421 | } |
422 | #endif /* !BN_LLONG */ |
423 | |
424 | BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, |
425 | int n) |
426 | { |
427 | BN_ULONG t1, t2; |
428 | int c = 0; |
429 | |
430 | assert(n >= 0); |
431 | if (n <= 0) |
432 | return ((BN_ULONG)0); |
433 | |
434 | #ifndef OPENSSL_SMALL_FOOTPRINT |
435 | while (n & ~3) { |
436 | t1 = a[0]; |
437 | t2 = b[0]; |
438 | r[0] = (t1 - t2 - c) & BN_MASK2; |
439 | if (t1 != t2) |
440 | c = (t1 < t2); |
441 | t1 = a[1]; |
442 | t2 = b[1]; |
443 | r[1] = (t1 - t2 - c) & BN_MASK2; |
444 | if (t1 != t2) |
445 | c = (t1 < t2); |
446 | t1 = a[2]; |
447 | t2 = b[2]; |
448 | r[2] = (t1 - t2 - c) & BN_MASK2; |
449 | if (t1 != t2) |
450 | c = (t1 < t2); |
451 | t1 = a[3]; |
452 | t2 = b[3]; |
453 | r[3] = (t1 - t2 - c) & BN_MASK2; |
454 | if (t1 != t2) |
455 | c = (t1 < t2); |
456 | a += 4; |
457 | b += 4; |
458 | r += 4; |
459 | n -= 4; |
460 | } |
461 | #endif |
462 | while (n) { |
463 | t1 = a[0]; |
464 | t2 = b[0]; |
465 | r[0] = (t1 - t2 - c) & BN_MASK2; |
466 | if (t1 != t2) |
467 | c = (t1 < t2); |
468 | a++; |
469 | b++; |
470 | r++; |
471 | n--; |
472 | } |
473 | return (c); |
474 | } |
475 | |
476 | #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) |
477 | |
478 | # undef bn_mul_comba8 |
479 | # undef bn_mul_comba4 |
480 | # undef bn_sqr_comba8 |
481 | # undef bn_sqr_comba4 |
482 | |
483 | /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ |
484 | /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ |
485 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ |
486 | /* |
487 | * sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number |
488 | * c=(c2,c1,c0) |
489 | */ |
490 | |
491 | # ifdef BN_LLONG |
492 | /* |
493 | * Keep in mind that additions to multiplication result can not |
494 | * overflow, because its high half cannot be all-ones. |
495 | */ |
496 | # define mul_add_c(a,b,c0,c1,c2) do { \ |
497 | BN_ULONG hi; \ |
498 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ |
499 | t += c0; /* no carry */ \ |
500 | c0 = (BN_ULONG)Lw(t); \ |
501 | hi = (BN_ULONG)Hw(t); \ |
502 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
503 | } while(0) |
504 | |
505 | # define mul_add_c2(a,b,c0,c1,c2) do { \ |
506 | BN_ULONG hi; \ |
507 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ |
508 | BN_ULLONG tt = t+c0; /* no carry */ \ |
509 | c0 = (BN_ULONG)Lw(tt); \ |
510 | hi = (BN_ULONG)Hw(tt); \ |
511 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
512 | t += c0; /* no carry */ \ |
513 | c0 = (BN_ULONG)Lw(t); \ |
514 | hi = (BN_ULONG)Hw(t); \ |
515 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
516 | } while(0) |
517 | |
518 | # define sqr_add_c(a,i,c0,c1,c2) do { \ |
519 | BN_ULONG hi; \ |
520 | BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \ |
521 | t += c0; /* no carry */ \ |
522 | c0 = (BN_ULONG)Lw(t); \ |
523 | hi = (BN_ULONG)Hw(t); \ |
524 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
525 | } while(0) |
526 | |
527 | # define sqr_add_c2(a,i,j,c0,c1,c2) \ |
528 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
529 | |
530 | # elif defined(BN_UMULT_LOHI) |
531 | /* |
532 | * Keep in mind that additions to hi can not overflow, because |
533 | * the high word of a multiplication result cannot be all-ones. |
534 | */ |
535 | # define mul_add_c(a,b,c0,c1,c2) do { \ |
536 | BN_ULONG ta = (a), tb = (b); \ |
537 | BN_ULONG lo, hi; \ |
538 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
539 | c0 += lo; hi += (c0<lo)?1:0; \ |
540 | c1 += hi; c2 += (c1<hi)?1:0; \ |
541 | } while(0) |
542 | |
543 | # define mul_add_c2(a,b,c0,c1,c2) do { \ |
544 | BN_ULONG ta = (a), tb = (b); \ |
545 | BN_ULONG lo, hi, tt; \ |
546 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
547 | c0 += lo; tt = hi+((c0<lo)?1:0); \ |
548 | c1 += tt; c2 += (c1<tt)?1:0; \ |
549 | c0 += lo; hi += (c0<lo)?1:0; \ |
550 | c1 += hi; c2 += (c1<hi)?1:0; \ |
551 | } while(0) |
552 | |
553 | # define sqr_add_c(a,i,c0,c1,c2) do { \ |
554 | BN_ULONG ta = (a)[i]; \ |
555 | BN_ULONG lo, hi; \ |
556 | BN_UMULT_LOHI(lo,hi,ta,ta); \ |
557 | c0 += lo; hi += (c0<lo)?1:0; \ |
558 | c1 += hi; c2 += (c1<hi)?1:0; \ |
559 | } while(0) |
560 | |
561 | # define sqr_add_c2(a,i,j,c0,c1,c2) \ |
562 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
563 | |
564 | # elif defined(BN_UMULT_HIGH) |
565 | /* |
566 | * Keep in mind that additions to hi can not overflow, because |
567 | * the high word of a multiplication result cannot be all-ones. |
568 | */ |
569 | # define mul_add_c(a,b,c0,c1,c2) do { \ |
570 | BN_ULONG ta = (a), tb = (b); \ |
571 | BN_ULONG lo = ta * tb; \ |
572 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ |
573 | c0 += lo; hi += (c0<lo)?1:0; \ |
574 | c1 += hi; c2 += (c1<hi)?1:0; \ |
575 | } while(0) |
576 | |
577 | # define mul_add_c2(a,b,c0,c1,c2) do { \ |
578 | BN_ULONG ta = (a), tb = (b), tt; \ |
579 | BN_ULONG lo = ta * tb; \ |
580 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ |
581 | c0 += lo; tt = hi + ((c0<lo)?1:0); \ |
582 | c1 += tt; c2 += (c1<tt)?1:0; \ |
583 | c0 += lo; hi += (c0<lo)?1:0; \ |
584 | c1 += hi; c2 += (c1<hi)?1:0; \ |
585 | } while(0) |
586 | |
587 | # define sqr_add_c(a,i,c0,c1,c2) do { \ |
588 | BN_ULONG ta = (a)[i]; \ |
589 | BN_ULONG lo = ta * ta; \ |
590 | BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \ |
591 | c0 += lo; hi += (c0<lo)?1:0; \ |
592 | c1 += hi; c2 += (c1<hi)?1:0; \ |
593 | } while(0) |
594 | |
595 | # define sqr_add_c2(a,i,j,c0,c1,c2) \ |
596 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
597 | |
598 | # else /* !BN_LLONG */ |
599 | /* |
600 | * Keep in mind that additions to hi can not overflow, because |
601 | * the high word of a multiplication result cannot be all-ones. |
602 | */ |
603 | # define mul_add_c(a,b,c0,c1,c2) do { \ |
604 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ |
605 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ |
606 | mul64(lo,hi,bl,bh); \ |
607 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ |
608 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
609 | } while(0) |
610 | |
611 | # define mul_add_c2(a,b,c0,c1,c2) do { \ |
612 | BN_ULONG tt; \ |
613 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ |
614 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ |
615 | mul64(lo,hi,bl,bh); \ |
616 | tt = hi; \ |
617 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \ |
618 | c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \ |
619 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ |
620 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
621 | } while(0) |
622 | |
623 | # define sqr_add_c(a,i,c0,c1,c2) do { \ |
624 | BN_ULONG lo, hi; \ |
625 | sqr64(lo,hi,(a)[i]); \ |
626 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ |
627 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
628 | } while(0) |
629 | |
630 | # define sqr_add_c2(a,i,j,c0,c1,c2) \ |
631 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
632 | # endif /* !BN_LLONG */ |
633 | |
634 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
635 | { |
636 | BN_ULONG c1, c2, c3; |
637 | |
638 | c1 = 0; |
639 | c2 = 0; |
640 | c3 = 0; |
641 | mul_add_c(a[0], b[0], c1, c2, c3); |
642 | r[0] = c1; |
643 | c1 = 0; |
644 | mul_add_c(a[0], b[1], c2, c3, c1); |
645 | mul_add_c(a[1], b[0], c2, c3, c1); |
646 | r[1] = c2; |
647 | c2 = 0; |
648 | mul_add_c(a[2], b[0], c3, c1, c2); |
649 | mul_add_c(a[1], b[1], c3, c1, c2); |
650 | mul_add_c(a[0], b[2], c3, c1, c2); |
651 | r[2] = c3; |
652 | c3 = 0; |
653 | mul_add_c(a[0], b[3], c1, c2, c3); |
654 | mul_add_c(a[1], b[2], c1, c2, c3); |
655 | mul_add_c(a[2], b[1], c1, c2, c3); |
656 | mul_add_c(a[3], b[0], c1, c2, c3); |
657 | r[3] = c1; |
658 | c1 = 0; |
659 | mul_add_c(a[4], b[0], c2, c3, c1); |
660 | mul_add_c(a[3], b[1], c2, c3, c1); |
661 | mul_add_c(a[2], b[2], c2, c3, c1); |
662 | mul_add_c(a[1], b[3], c2, c3, c1); |
663 | mul_add_c(a[0], b[4], c2, c3, c1); |
664 | r[4] = c2; |
665 | c2 = 0; |
666 | mul_add_c(a[0], b[5], c3, c1, c2); |
667 | mul_add_c(a[1], b[4], c3, c1, c2); |
668 | mul_add_c(a[2], b[3], c3, c1, c2); |
669 | mul_add_c(a[3], b[2], c3, c1, c2); |
670 | mul_add_c(a[4], b[1], c3, c1, c2); |
671 | mul_add_c(a[5], b[0], c3, c1, c2); |
672 | r[5] = c3; |
673 | c3 = 0; |
674 | mul_add_c(a[6], b[0], c1, c2, c3); |
675 | mul_add_c(a[5], b[1], c1, c2, c3); |
676 | mul_add_c(a[4], b[2], c1, c2, c3); |
677 | mul_add_c(a[3], b[3], c1, c2, c3); |
678 | mul_add_c(a[2], b[4], c1, c2, c3); |
679 | mul_add_c(a[1], b[5], c1, c2, c3); |
680 | mul_add_c(a[0], b[6], c1, c2, c3); |
681 | r[6] = c1; |
682 | c1 = 0; |
683 | mul_add_c(a[0], b[7], c2, c3, c1); |
684 | mul_add_c(a[1], b[6], c2, c3, c1); |
685 | mul_add_c(a[2], b[5], c2, c3, c1); |
686 | mul_add_c(a[3], b[4], c2, c3, c1); |
687 | mul_add_c(a[4], b[3], c2, c3, c1); |
688 | mul_add_c(a[5], b[2], c2, c3, c1); |
689 | mul_add_c(a[6], b[1], c2, c3, c1); |
690 | mul_add_c(a[7], b[0], c2, c3, c1); |
691 | r[7] = c2; |
692 | c2 = 0; |
693 | mul_add_c(a[7], b[1], c3, c1, c2); |
694 | mul_add_c(a[6], b[2], c3, c1, c2); |
695 | mul_add_c(a[5], b[3], c3, c1, c2); |
696 | mul_add_c(a[4], b[4], c3, c1, c2); |
697 | mul_add_c(a[3], b[5], c3, c1, c2); |
698 | mul_add_c(a[2], b[6], c3, c1, c2); |
699 | mul_add_c(a[1], b[7], c3, c1, c2); |
700 | r[8] = c3; |
701 | c3 = 0; |
702 | mul_add_c(a[2], b[7], c1, c2, c3); |
703 | mul_add_c(a[3], b[6], c1, c2, c3); |
704 | mul_add_c(a[4], b[5], c1, c2, c3); |
705 | mul_add_c(a[5], b[4], c1, c2, c3); |
706 | mul_add_c(a[6], b[3], c1, c2, c3); |
707 | mul_add_c(a[7], b[2], c1, c2, c3); |
708 | r[9] = c1; |
709 | c1 = 0; |
710 | mul_add_c(a[7], b[3], c2, c3, c1); |
711 | mul_add_c(a[6], b[4], c2, c3, c1); |
712 | mul_add_c(a[5], b[5], c2, c3, c1); |
713 | mul_add_c(a[4], b[6], c2, c3, c1); |
714 | mul_add_c(a[3], b[7], c2, c3, c1); |
715 | r[10] = c2; |
716 | c2 = 0; |
717 | mul_add_c(a[4], b[7], c3, c1, c2); |
718 | mul_add_c(a[5], b[6], c3, c1, c2); |
719 | mul_add_c(a[6], b[5], c3, c1, c2); |
720 | mul_add_c(a[7], b[4], c3, c1, c2); |
721 | r[11] = c3; |
722 | c3 = 0; |
723 | mul_add_c(a[7], b[5], c1, c2, c3); |
724 | mul_add_c(a[6], b[6], c1, c2, c3); |
725 | mul_add_c(a[5], b[7], c1, c2, c3); |
726 | r[12] = c1; |
727 | c1 = 0; |
728 | mul_add_c(a[6], b[7], c2, c3, c1); |
729 | mul_add_c(a[7], b[6], c2, c3, c1); |
730 | r[13] = c2; |
731 | c2 = 0; |
732 | mul_add_c(a[7], b[7], c3, c1, c2); |
733 | r[14] = c3; |
734 | r[15] = c1; |
735 | } |
736 | |
737 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
738 | { |
739 | BN_ULONG c1, c2, c3; |
740 | |
741 | c1 = 0; |
742 | c2 = 0; |
743 | c3 = 0; |
744 | mul_add_c(a[0], b[0], c1, c2, c3); |
745 | r[0] = c1; |
746 | c1 = 0; |
747 | mul_add_c(a[0], b[1], c2, c3, c1); |
748 | mul_add_c(a[1], b[0], c2, c3, c1); |
749 | r[1] = c2; |
750 | c2 = 0; |
751 | mul_add_c(a[2], b[0], c3, c1, c2); |
752 | mul_add_c(a[1], b[1], c3, c1, c2); |
753 | mul_add_c(a[0], b[2], c3, c1, c2); |
754 | r[2] = c3; |
755 | c3 = 0; |
756 | mul_add_c(a[0], b[3], c1, c2, c3); |
757 | mul_add_c(a[1], b[2], c1, c2, c3); |
758 | mul_add_c(a[2], b[1], c1, c2, c3); |
759 | mul_add_c(a[3], b[0], c1, c2, c3); |
760 | r[3] = c1; |
761 | c1 = 0; |
762 | mul_add_c(a[3], b[1], c2, c3, c1); |
763 | mul_add_c(a[2], b[2], c2, c3, c1); |
764 | mul_add_c(a[1], b[3], c2, c3, c1); |
765 | r[4] = c2; |
766 | c2 = 0; |
767 | mul_add_c(a[2], b[3], c3, c1, c2); |
768 | mul_add_c(a[3], b[2], c3, c1, c2); |
769 | r[5] = c3; |
770 | c3 = 0; |
771 | mul_add_c(a[3], b[3], c1, c2, c3); |
772 | r[6] = c1; |
773 | r[7] = c2; |
774 | } |
775 | |
776 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
777 | { |
778 | BN_ULONG c1, c2, c3; |
779 | |
780 | c1 = 0; |
781 | c2 = 0; |
782 | c3 = 0; |
783 | sqr_add_c(a, 0, c1, c2, c3); |
784 | r[0] = c1; |
785 | c1 = 0; |
786 | sqr_add_c2(a, 1, 0, c2, c3, c1); |
787 | r[1] = c2; |
788 | c2 = 0; |
789 | sqr_add_c(a, 1, c3, c1, c2); |
790 | sqr_add_c2(a, 2, 0, c3, c1, c2); |
791 | r[2] = c3; |
792 | c3 = 0; |
793 | sqr_add_c2(a, 3, 0, c1, c2, c3); |
794 | sqr_add_c2(a, 2, 1, c1, c2, c3); |
795 | r[3] = c1; |
796 | c1 = 0; |
797 | sqr_add_c(a, 2, c2, c3, c1); |
798 | sqr_add_c2(a, 3, 1, c2, c3, c1); |
799 | sqr_add_c2(a, 4, 0, c2, c3, c1); |
800 | r[4] = c2; |
801 | c2 = 0; |
802 | sqr_add_c2(a, 5, 0, c3, c1, c2); |
803 | sqr_add_c2(a, 4, 1, c3, c1, c2); |
804 | sqr_add_c2(a, 3, 2, c3, c1, c2); |
805 | r[5] = c3; |
806 | c3 = 0; |
807 | sqr_add_c(a, 3, c1, c2, c3); |
808 | sqr_add_c2(a, 4, 2, c1, c2, c3); |
809 | sqr_add_c2(a, 5, 1, c1, c2, c3); |
810 | sqr_add_c2(a, 6, 0, c1, c2, c3); |
811 | r[6] = c1; |
812 | c1 = 0; |
813 | sqr_add_c2(a, 7, 0, c2, c3, c1); |
814 | sqr_add_c2(a, 6, 1, c2, c3, c1); |
815 | sqr_add_c2(a, 5, 2, c2, c3, c1); |
816 | sqr_add_c2(a, 4, 3, c2, c3, c1); |
817 | r[7] = c2; |
818 | c2 = 0; |
819 | sqr_add_c(a, 4, c3, c1, c2); |
820 | sqr_add_c2(a, 5, 3, c3, c1, c2); |
821 | sqr_add_c2(a, 6, 2, c3, c1, c2); |
822 | sqr_add_c2(a, 7, 1, c3, c1, c2); |
823 | r[8] = c3; |
824 | c3 = 0; |
825 | sqr_add_c2(a, 7, 2, c1, c2, c3); |
826 | sqr_add_c2(a, 6, 3, c1, c2, c3); |
827 | sqr_add_c2(a, 5, 4, c1, c2, c3); |
828 | r[9] = c1; |
829 | c1 = 0; |
830 | sqr_add_c(a, 5, c2, c3, c1); |
831 | sqr_add_c2(a, 6, 4, c2, c3, c1); |
832 | sqr_add_c2(a, 7, 3, c2, c3, c1); |
833 | r[10] = c2; |
834 | c2 = 0; |
835 | sqr_add_c2(a, 7, 4, c3, c1, c2); |
836 | sqr_add_c2(a, 6, 5, c3, c1, c2); |
837 | r[11] = c3; |
838 | c3 = 0; |
839 | sqr_add_c(a, 6, c1, c2, c3); |
840 | sqr_add_c2(a, 7, 5, c1, c2, c3); |
841 | r[12] = c1; |
842 | c1 = 0; |
843 | sqr_add_c2(a, 7, 6, c2, c3, c1); |
844 | r[13] = c2; |
845 | c2 = 0; |
846 | sqr_add_c(a, 7, c3, c1, c2); |
847 | r[14] = c3; |
848 | r[15] = c1; |
849 | } |
850 | |
851 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
852 | { |
853 | BN_ULONG c1, c2, c3; |
854 | |
855 | c1 = 0; |
856 | c2 = 0; |
857 | c3 = 0; |
858 | sqr_add_c(a, 0, c1, c2, c3); |
859 | r[0] = c1; |
860 | c1 = 0; |
861 | sqr_add_c2(a, 1, 0, c2, c3, c1); |
862 | r[1] = c2; |
863 | c2 = 0; |
864 | sqr_add_c(a, 1, c3, c1, c2); |
865 | sqr_add_c2(a, 2, 0, c3, c1, c2); |
866 | r[2] = c3; |
867 | c3 = 0; |
868 | sqr_add_c2(a, 3, 0, c1, c2, c3); |
869 | sqr_add_c2(a, 2, 1, c1, c2, c3); |
870 | r[3] = c1; |
871 | c1 = 0; |
872 | sqr_add_c(a, 2, c2, c3, c1); |
873 | sqr_add_c2(a, 3, 1, c2, c3, c1); |
874 | r[4] = c2; |
875 | c2 = 0; |
876 | sqr_add_c2(a, 3, 2, c3, c1, c2); |
877 | r[5] = c3; |
878 | c3 = 0; |
879 | sqr_add_c(a, 3, c1, c2, c3); |
880 | r[6] = c1; |
881 | r[7] = c2; |
882 | } |
883 | |
884 | # ifdef OPENSSL_NO_ASM |
885 | # ifdef OPENSSL_BN_ASM_MONT |
886 | # include <alloca.h> |
887 | /* |
888 | * This is essentially reference implementation, which may or may not |
889 | * result in performance improvement. E.g. on IA-32 this routine was |
890 | * observed to give 40% faster rsa1024 private key operations and 10% |
891 | * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only |
892 | * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a |
893 | * reference implementation, one to be used as starting point for |
894 | * platform-specific assembler. Mentioned numbers apply to compiler |
895 | * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and |
896 | * can vary not only from platform to platform, but even for compiler |
897 | * versions. Assembler vs. assembler improvement coefficients can |
898 | * [and are known to] differ and are to be documented elsewhere. |
899 | */ |
900 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
901 | const BN_ULONG *np, const BN_ULONG *n0p, int num) |
902 | { |
903 | BN_ULONG c0, c1, ml, *tp, n0; |
904 | # ifdef mul64 |
905 | BN_ULONG mh; |
906 | # endif |
907 | volatile BN_ULONG *vp; |
908 | int i = 0, j; |
909 | |
910 | # if 0 /* template for platform-specific |
911 | * implementation */ |
912 | if (ap == bp) |
913 | return bn_sqr_mont(rp, ap, np, n0p, num); |
914 | # endif |
915 | vp = tp = alloca((num + 2) * sizeof(BN_ULONG)); |
916 | |
917 | n0 = *n0p; |
918 | |
919 | c0 = 0; |
920 | ml = bp[0]; |
921 | # ifdef mul64 |
922 | mh = HBITS(ml); |
923 | ml = LBITS(ml); |
924 | for (j = 0; j < num; ++j) |
925 | mul(tp[j], ap[j], ml, mh, c0); |
926 | # else |
927 | for (j = 0; j < num; ++j) |
928 | mul(tp[j], ap[j], ml, c0); |
929 | # endif |
930 | |
931 | tp[num] = c0; |
932 | tp[num + 1] = 0; |
933 | goto enter; |
934 | |
935 | for (i = 0; i < num; i++) { |
936 | c0 = 0; |
937 | ml = bp[i]; |
938 | # ifdef mul64 |
939 | mh = HBITS(ml); |
940 | ml = LBITS(ml); |
941 | for (j = 0; j < num; ++j) |
942 | mul_add(tp[j], ap[j], ml, mh, c0); |
943 | # else |
944 | for (j = 0; j < num; ++j) |
945 | mul_add(tp[j], ap[j], ml, c0); |
946 | # endif |
947 | c1 = (tp[num] + c0) & BN_MASK2; |
948 | tp[num] = c1; |
949 | tp[num + 1] = (c1 < c0 ? 1 : 0); |
950 | enter: |
951 | c1 = tp[0]; |
952 | ml = (c1 * n0) & BN_MASK2; |
953 | c0 = 0; |
954 | # ifdef mul64 |
955 | mh = HBITS(ml); |
956 | ml = LBITS(ml); |
957 | mul_add(c1, np[0], ml, mh, c0); |
958 | # else |
959 | mul_add(c1, ml, np[0], c0); |
960 | # endif |
961 | for (j = 1; j < num; j++) { |
962 | c1 = tp[j]; |
963 | # ifdef mul64 |
964 | mul_add(c1, np[j], ml, mh, c0); |
965 | # else |
966 | mul_add(c1, ml, np[j], c0); |
967 | # endif |
968 | tp[j - 1] = c1 & BN_MASK2; |
969 | } |
970 | c1 = (tp[num] + c0) & BN_MASK2; |
971 | tp[num - 1] = c1; |
972 | tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0); |
973 | } |
974 | |
975 | if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { |
976 | c0 = bn_sub_words(rp, tp, np, num); |
977 | if (tp[num] != 0 || c0 == 0) { |
978 | for (i = 0; i < num + 2; i++) |
979 | vp[i] = 0; |
980 | return 1; |
981 | } |
982 | } |
983 | for (i = 0; i < num; i++) |
984 | rp[i] = tp[i], vp[i] = 0; |
985 | vp[num] = 0; |
986 | vp[num + 1] = 0; |
987 | return 1; |
988 | } |
989 | # else |
990 | /* |
991 | * Return value of 0 indicates that multiplication/convolution was not |
992 | * performed to signal the caller to fall down to alternative/original |
993 | * code-path. |
994 | */ |
995 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
996 | const BN_ULONG *np, const BN_ULONG *n0, int num) |
997 | { |
998 | return 0; |
999 | } |
1000 | # endif /* OPENSSL_BN_ASM_MONT */ |
1001 | # endif |
1002 | |
1003 | #else /* !BN_MUL_COMBA */ |
1004 | |
1005 | /* hmm... is it faster just to do a multiply? */ |
1006 | # undef bn_sqr_comba4 |
1007 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
1008 | { |
1009 | BN_ULONG t[8]; |
1010 | bn_sqr_normal(r, a, 4, t); |
1011 | } |
1012 | |
1013 | # undef bn_sqr_comba8 |
1014 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
1015 | { |
1016 | BN_ULONG t[16]; |
1017 | bn_sqr_normal(r, a, 8, t); |
1018 | } |
1019 | |
1020 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
1021 | { |
1022 | r[4] = bn_mul_words(&(r[0]), a, 4, b[0]); |
1023 | r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]); |
1024 | r[6] = bn_mul_add_words(&(r[2]), a, 4, b[2]); |
1025 | r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]); |
1026 | } |
1027 | |
1028 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
1029 | { |
1030 | r[8] = bn_mul_words(&(r[0]), a, 8, b[0]); |
1031 | r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]); |
1032 | r[10] = bn_mul_add_words(&(r[2]), a, 8, b[2]); |
1033 | r[11] = bn_mul_add_words(&(r[3]), a, 8, b[3]); |
1034 | r[12] = bn_mul_add_words(&(r[4]), a, 8, b[4]); |
1035 | r[13] = bn_mul_add_words(&(r[5]), a, 8, b[5]); |
1036 | r[14] = bn_mul_add_words(&(r[6]), a, 8, b[6]); |
1037 | r[15] = bn_mul_add_words(&(r[7]), a, 8, b[7]); |
1038 | } |
1039 | |
1040 | # ifdef OPENSSL_NO_ASM |
1041 | # ifdef OPENSSL_BN_ASM_MONT |
1042 | # include <alloca.h> |
1043 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
1044 | const BN_ULONG *np, const BN_ULONG *n0p, int num) |
1045 | { |
1046 | BN_ULONG c0, c1, *tp, n0 = *n0p; |
1047 | volatile BN_ULONG *vp; |
1048 | int i = 0, j; |
1049 | |
1050 | vp = tp = alloca((num + 2) * sizeof(BN_ULONG)); |
1051 | |
1052 | for (i = 0; i <= num; i++) |
1053 | tp[i] = 0; |
1054 | |
1055 | for (i = 0; i < num; i++) { |
1056 | c0 = bn_mul_add_words(tp, ap, num, bp[i]); |
1057 | c1 = (tp[num] + c0) & BN_MASK2; |
1058 | tp[num] = c1; |
1059 | tp[num + 1] = (c1 < c0 ? 1 : 0); |
1060 | |
1061 | c0 = bn_mul_add_words(tp, np, num, tp[0] * n0); |
1062 | c1 = (tp[num] + c0) & BN_MASK2; |
1063 | tp[num] = c1; |
1064 | tp[num + 1] += (c1 < c0 ? 1 : 0); |
1065 | for (j = 0; j <= num; j++) |
1066 | tp[j] = tp[j + 1]; |
1067 | } |
1068 | |
1069 | if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { |
1070 | c0 = bn_sub_words(rp, tp, np, num); |
1071 | if (tp[num] != 0 || c0 == 0) { |
1072 | for (i = 0; i < num + 2; i++) |
1073 | vp[i] = 0; |
1074 | return 1; |
1075 | } |
1076 | } |
1077 | for (i = 0; i < num; i++) |
1078 | rp[i] = tp[i], vp[i] = 0; |
1079 | vp[num] = 0; |
1080 | vp[num + 1] = 0; |
1081 | return 1; |
1082 | } |
1083 | # else |
1084 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
1085 | const BN_ULONG *np, const BN_ULONG *n0, int num) |
1086 | { |
1087 | return 0; |
1088 | } |
1089 | # endif /* OPENSSL_BN_ASM_MONT */ |
1090 | # endif |
1091 | |
1092 | #endif /* !BN_MUL_COMBA */ |
1093 | |