montgomery_inv.c source code [engine/third_party/boringssl/src/crypto/fipsmodule/bn/montgomery_inv.c]

1	/ Copyright 2016 Brian Smith.*
2	*
3	* Permission to use, copy, modify, and/or distribute this software for any
4	* purpose with or without fee is hereby granted, provided that the above
5	* copyright notice and this permission notice appear in all copies.
6	*
7	* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8	* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9	* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10	* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11	* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12	* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13	* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15	#include <openssl/bn.h>
16
17	#include <assert.h>
18
19	#include "internal.h"
20	#include "../../internal.h"
21
22
23	static uint64_t bn_neg_inv_mod_r_u64(uint64_t n);
24
25	OPENSSL_STATIC_ASSERT(BN_MONT_CTX_N0_LIMBS == `1` \|\| BN_MONT_CTX_N0_LIMBS == `2`,
26	"BN_MONT_CTX_N0_LIMBS value is invalid");
27	OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) * BN_MONT_CTX_N0_LIMBS ==
28	sizeof(uint64_t),
29	"uint64_t is insufficient precision for n0");
30
31	// LG_LITTLE_R is log_2(r).
32	#define LG_LITTLE_R (BN_MONT_CTX_N0_LIMBS * BN_BITS2)
33
34	uint64_t bn_mont_n0(const BIGNUM *n) {
35	// These conditions are checked by the caller, \|BN_MONT_CTX_set\| or
36	// \|BN_MONT_CTX_new_consttime\|.
37	assert(!BN_is_zero(n));
38	assert(!BN_is_negative(n));
39	assert(BN_is_odd(n));
40
41	// r == 2(BN_MONT_CTX_N0_LIMBS BN_BITS2) and LG_LITTLE_R == lg(r). This*
42	// ensures that we can do integer division by \|r\| by simply ignoring
43	// \|BN_MONT_CTX_N0_LIMBS\| limbs. Similarly, we can calculate values modulo
44	// \|r\| by just looking at the lowest \|BN_MONT_CTX_N0_LIMBS\| limbs. This is
45	// what makes Montgomery multiplication efficient.
46	//
47	// As shown in Algorithm 1 of "Fast Prime Field Elliptic Curve Cryptography
48	// with 256 Bit Primes" by Shay Gueron and Vlad Krasnov, in the loop of a
49	// multi-limb Montgomery multiplication of \|a b (mod n)\|, given the*
50	// unreduced product \|t == a b\|, we repeatedly calculate:*
51	//
52	// t1 := t % r \|t1\| is \|t\|'s lowest limb (see previous paragraph).
53	// t2 := t1n0n
54	// t3 := t + t2
55	// t := t3 / r copy all limbs of \|t3\| except the lowest to \|t\|.
56	//
57	// In the last step, it would only make sense to ignore the lowest limb of
58	// \|t3\| if it were zero. The middle steps ensure that this is the case:
59	//
60	// t3 == 0 (mod r)
61	// t + t2 == 0 (mod r)
62	// t + t1n0n == 0 (mod r)
63	// t1n0n == -t (mod r)
64	// tn0n == -t (mod r)
65	// n0n == -1 (mod r)*
66	// n0 == -1/n (mod r)
67	//
68	// Thus, in each iteration of the loop, we multiply by the constant factor
69	// \|n0\|, the negative inverse of n (mod r).
70
71	// n_mod_r = n % r. As explained above, this is done by taking the lowest
72	// \|BN_MONT_CTX_N0_LIMBS\| limbs of \|n\|.
73	uint64_t n_mod_r = n->d[`0`];
74	#if BN_MONT_CTX_N0_LIMBS == 2
75	if (n->width > `1`) {
76	n_mod_r \|= (uint64_t)n->d[`1`] << BN_BITS2;
77	}
78	#endif
79
80	return bn_neg_inv_mod_r_u64(n_mod_r);
81	}
82
83	// bn_neg_inv_r_mod_n_u64 calculates the -1/n mod r; i.e. it calculates \|v\|
84	// such that ur - vn == 1. \|r\| is the constant defined in \|bn_mont_n0\|. \|n\|
85	// must be odd.
86	//
87	// This is derived from \|xbinGCD\| in Henry S. Warren, Jr.'s "Montgomery
88	// Multiplication" (http://www.hackersdelight.org/MontgomeryMultiplication.pdf).
89	// It is very similar to the MODULAR-INVERSE function in Stephen R. Dussé's and
90	// Burton S. Kaliski Jr.'s "A Cryptographic Library for the Motorola DSP56000"
91	// (http://link.springer.com/chapter/10.1007%2F3-540-46877-3_21).
92	//
93	// This is inspired by Joppe W. Bos's "Constant Time Modular Inversion"
94	// (http://www.joppebos.com/files/CTInversion.pdf) so that the inversion is
95	// constant-time with respect to \|n\|. We assume uint64_t additions,
96	// subtractions, shifts, and bitwise operations are all constant time, which
97	// may be a large leap of faith on 32-bit targets. We avoid division and
98	// multiplication, which tend to be the most problematic in terms of timing
99	// leaks.
100	//
101	// Most GCD implementations return values such that \|ur + vn == 1\|, so the
102	// caller would have to negate the resultant \|v\| for the purpose of Montgomery
103	// multiplication. This implementation does the negation implicitly by doing
104	// the computations as a difference instead of a sum.
105	static uint64_t bn_neg_inv_mod_r_u64(uint64_t n) {
106	assert(n % `2` == `1`);
107
108	// alpha == 2(lg r - 1) == r / 2.
109	static const uint64_t alpha = UINT64_C(`1`) << (LG_LITTLE_R - `1`);
110
111	const uint64_t beta = n;
112
113	uint64_t u = `1`;
114	uint64_t v = `0`;
115
116	// The invariant maintained from here on is:
117	// 2(lg r - i) == u2alpha - vbeta.*
118	for (size_t i = `0`; i < LG_LITTLE_R; ++i) {
119	#if BN_BITS2 == 64 && defined(BN_ULLONG)
120	assert((BN_ULLONG)(`1`) << (LG_LITTLE_R - i) ==
121	((BN_ULLONG)u * `2` * alpha) - ((BN_ULLONG)v * beta));
122	#endif
123
124	// Delete a common factor of 2 in u and v if \|u\| is even. Otherwise, set
125	// \|u = (u + beta) / 2\| and \|v = (v / 2) + alpha\|.
126
127	uint64_t u_is_odd = UINT64_C(`0`) - (u & `1`); // Either 0xff..ff or 0.
128
129	// The addition can overflow, so use Dietz's method for it.
130	//
131	// Dietz calculates (x+y)/2 by (x⊕y)>>1 + x&y. This is valid for all
132	// (unsigned) x and y, even when x+y overflows. Evidence for 32-bit values
133	// (embedded in 64 bits to so that overflow can be ignored):
134	//
135	// (declare-fun x () (_ BitVec 64))
136	// (declare-fun y () (_ BitVec 64))
137	// (assert (let (
138	// (one (_ bv1 64))
139	// (thirtyTwo (_ bv32 64)))
140	// (and
141	// (bvult x (bvshl one thirtyTwo))
142	// (bvult y (bvshl one thirtyTwo))
143	// (not (=
144	// (bvadd (bvlshr (bvxor x y) one) (bvand x y))
145	// (bvlshr (bvadd x y) one)))
146	// )))
147	// (check-sat)
148	uint64_t beta_if_u_is_odd = beta & u_is_odd; // Either \|beta\| or 0.
149	u = ((u ^ beta_if_u_is_odd) >> `1`) + (u & beta_if_u_is_odd);
150
151	uint64_t alpha_if_u_is_odd = alpha & u_is_odd; // Either \|alpha\| or 0.
152	v = (v >> `1`) + alpha_if_u_is_odd;
153	}
154
155	// The invariant now shows that ur - vn == 1 since r == 2 alpha.*
156	#if BN_BITS2 == 64 && defined(BN_ULLONG)
157	assert(`1` == ((BN_ULLONG)u * `2` * alpha) - ((BN_ULLONG)v * beta));
158	#endif
159
160	return v;
161	}
162
163	int bn_mod_exp_base_2_consttime(BIGNUM r, unsigned* p, const BIGNUM *n,
164	BN_CTX *ctx) {
165	assert(!BN_is_zero(n));
166	assert(!BN_is_negative(n));
167	assert(BN_is_odd(n));
168
169	BN_zero(r);
170
171	unsigned n_bits = BN_num_bits(n);
172	assert(n_bits != `0`);
173	assert(p > n_bits);
174	if (n_bits == `1`) {
175	return `1`;
176	}
177
178	// Set \|r\| to the larger power of two smaller than \|n\|, then shift with
179	// reductions the rest of the way.
180	if (!BN_set_bit(r, n_bits - `1`) \|\|
181	!bn_mod_lshift_consttime(r, r, p - (n_bits - `1`), n, ctx)) {
182	return `0`;
183	}
184
185	return `1`;
186	}
187

Browse the source code of engine/third_party/boringssl/src/crypto/fipsmodule/bn/montgomery_inv.c