ecp_nistp224.c source code [ClickHouse/contrib/openssl/crypto/ec/ecp_nistp224.c]

1	/*
2	* Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
3	*
4	* Licensed under the Apache License 2.0 (the "License"). You may not use
5	* this file except in compliance with the License. You can obtain a copy
6	* in the file LICENSE in the source distribution or at
7	* https://www.openssl.org/source/license.html
8	*/
9
10	/ Copyright 2011 Google Inc.*
11	*
12	* Licensed under the Apache License, Version 2.0 (the "License");
13	*
14	* you may not use this file except in compliance with the License.
15	* You may obtain a copy of the License at
16	*
17	* http://www.apache.org/licenses/LICENSE-2.0
18	*
19	* Unless required by applicable law or agreed to in writing, software
20	* distributed under the License is distributed on an "AS IS" BASIS,
21	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22	* See the License for the specific language governing permissions and
23	* limitations under the License.
24	*/
25
26	/*
27	* A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
28	*
29	* Inspired by Daniel J. Bernstein's public domain nistp224 implementation
30	* and Adam Langley's public domain 64-bit C implementation of curve25519
31	*/
32
33	#include <openssl/opensslconf.h>
34	#ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
35	NON_EMPTY_TRANSLATION_UNIT
36	#else
37
38	# include <stdint.h>
39	# include <string.h>
40	# include <openssl/err.h>
41	# include "ec_local.h"
42
43	# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
44	/ even with gcc, the typedef won't work for 32-bit platforms /
45	typedef __uint128_t uint128_t; / nonstandard; implemented by gcc on 64-bit*
46	* platforms */
47	# else
48	# error "Your compiler doesn't appear to support 128-bit integer types"
49	# endif
50
51	typedef uint8_t u8;
52	typedef uint64_t u64;
53
54	/****************************************************************************/
55	/-*
56	* INTERNAL REPRESENTATION OF FIELD ELEMENTS
57	*
58	* Field elements are represented as a_0 + 2^56a_1 + 2^112a_2 + 2^168*a_3
59	* using 64-bit coefficients called 'limbs',
60	* and sometimes (for multiplication results) as
61	* b_0 + 2^56b_1 + 2^112b_2 + 2^168b_3 + 2^224b_4 + 2^280b_5 + 2^336b_6
62	* using 128-bit coefficients called 'widelimbs'.
63	* A 4-limb representation is an 'felem';
64	* a 7-widelimb representation is a 'widefelem'.
65	* Even within felems, bits of adjacent limbs overlap, and we don't always
66	* reduce the representations: we ensure that inputs to each felem
67	* multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 42^602^60,
68	* and fit into a 128-bit word without overflow. The coefficients are then
69	* again partially reduced to obtain an felem satisfying a_i < 2^57.
70	* We only reduce to the unique minimal representation at the end of the
71	* computation.
72	*/
73
74	typedef uint64_t limb;
75	typedef uint128_t widelimb;
76
77	typedef limb felem[`4`];
78	typedef widelimb widefelem[`7`];
79
80	/*
81	* Field element represented as a byte array. 28*8 = 224 bits is also the
82	* group order size for the elliptic curve, and we also use this type for
83	* scalars for point multiplication.
84	*/
85	typedef u8 felem_bytearray[`28`];
86
87	static const felem_bytearray nistp224_curve_params[`5`] = {
88	{`0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, / p /
89	`0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0x00`, `0x00`, `0x00`, `0x00`,
90	`0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x01`},
91	{`0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, / a /
92	`0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFE`, `0xFF`, `0xFF`, `0xFF`, `0xFF`,
93	`0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFE`},
94	{`0xB4`, `0x05`, `0x0A`, `0x85`, `0x0C`, `0x04`, `0xB3`, `0xAB`, `0xF5`, `0x41`, / b /
95	`0x32`, `0x56`, `0x50`, `0x44`, `0xB0`, `0xB7`, `0xD7`, `0xBF`, `0xD8`, `0xBA`,
96	`0x27`, `0x0B`, `0x39`, `0x43`, `0x23`, `0x55`, `0xFF`, `0xB4`},
97	{`0xB7`, `0x0E`, `0x0C`, `0xBD`, `0x6B`, `0xB4`, `0xBF`, `0x7F`, `0x32`, `0x13`, / x /
98	`0x90`, `0xB9`, `0x4A`, `0x03`, `0xC1`, `0xD3`, `0x56`, `0xC2`, `0x11`, `0x22`,
99	`0x34`, `0x32`, `0x80`, `0xD6`, `0x11`, `0x5C`, `0x1D`, `0x21`},
100	{`0xbd`, `0x37`, `0x63`, `0x88`, `0xb5`, `0xf7`, `0x23`, `0xfb`, `0x4c`, `0x22`, / y /
101	`0xdf`, `0xe6`, `0xcd`, `0x43`, `0x75`, `0xa0`, `0x5a`, `0x07`, `0x47`, `0x64`,
102	`0x44`, `0xd5`, `0x81`, `0x99`, `0x85`, `0x00`, `0x7e`, `0x34`}
103	};
104
105	/-*
106	* Precomputed multiples of the standard generator
107	* Points are given in coordinates (X, Y, Z) where Z normally is 1
108	* (0 for the point at infinity).
109	* For each field element, slice a_0 is word 0, etc.
110	*
111	* The table has 2 * 16 elements, starting with the following:
112	* index \| bits \| point
113	* ------+---------+------------------------------
114	* 0 \| 0 0 0 0 \| 0G
115	* 1 \| 0 0 0 1 \| 1G
116	* 2 \| 0 0 1 0 \| 2^56G
117	* 3 \| 0 0 1 1 \| (2^56 + 1)G
118	* 4 \| 0 1 0 0 \| 2^112G
119	* 5 \| 0 1 0 1 \| (2^112 + 1)G
120	* 6 \| 0 1 1 0 \| (2^112 + 2^56)G
121	* 7 \| 0 1 1 1 \| (2^112 + 2^56 + 1)G
122	* 8 \| 1 0 0 0 \| 2^168G
123	* 9 \| 1 0 0 1 \| (2^168 + 1)G
124	* 10 \| 1 0 1 0 \| (2^168 + 2^56)G
125	* 11 \| 1 0 1 1 \| (2^168 + 2^56 + 1)G
126	* 12 \| 1 1 0 0 \| (2^168 + 2^112)G
127	* 13 \| 1 1 0 1 \| (2^168 + 2^112 + 1)G
128	* 14 \| 1 1 1 0 \| (2^168 + 2^112 + 2^56)G
129	* 15 \| 1 1 1 1 \| (2^168 + 2^112 + 2^56 + 1)G
130	* followed by a copy of this with each element multiplied by 2^28.
131	*
132	* The reason for this is so that we can clock bits into four different
133	* locations when doing simple scalar multiplies against the base point,
134	* and then another four locations using the second 16 elements.
135	*/
136	static const felem gmul[`2`][`16`][`3`] = {
137	{{{`0`, `0`, `0`, `0`},
138	{`0`, `0`, `0`, `0`},
139	{`0`, `0`, `0`, `0`}},
140	{{`0x3280d6115c1d21`, `0xc1d356c2112234`, `0x7f321390b94a03`, `0xb70e0cbd6bb4bf`},
141	{`0xd5819985007e34`, `0x75a05a07476444`, `0xfb4c22dfe6cd43`, `0xbd376388b5f723`},
142	{`1`, `0`, `0`, `0`}},
143	{{`0xfd9675666ebbe9`, `0xbca7664d40ce5e`, `0x2242df8d8a2a43`, `0x1f49bbb0f99bc5`},
144	{`0x29e0b892dc9c43`, `0xece8608436e662`, `0xdc858f185310d0`, `0x9812dd4eb8d321`},
145	{`1`, `0`, `0`, `0`}},
146	{{`0x6d3e678d5d8eb8`, `0x559eed1cb362f1`, `0x16e9a3bbce8a3f`, `0xeedcccd8c2a748`},
147	{`0xf19f90ed50266d`, `0xabf2b4bf65f9df`, `0x313865468fafec`, `0x5cb379ba910a17`},
148	{`1`, `0`, `0`, `0`}},
149	{{`0x0641966cab26e3`, `0x91fb2991fab0a0`, `0xefec27a4e13a0b`, `0x0499aa8a5f8ebe`},
150	{`0x7510407766af5d`, `0x84d929610d5450`, `0x81d77aae82f706`, `0x6916f6d4338c5b`},
151	{`1`, `0`, `0`, `0`}},
152	{{`0xea95ac3b1f15c6`, `0x086000905e82d4`, `0xdd323ae4d1c8b1`, `0x932b56be7685a3`},
153	{`0x9ef93dea25dbbf`, `0x41665960f390f0`, `0xfdec76dbe2a8a7`, `0x523e80f019062a`},
154	{`1`, `0`, `0`, `0`}},
155	{{`0x822fdd26732c73`, `0xa01c83531b5d0f`, `0x363f37347c1ba4`, `0xc391b45c84725c`},
156	{`0xbbd5e1b2d6ad24`, `0xddfbcde19dfaec`, `0xc393da7e222a7f`, `0x1efb7890ede244`},
157	{`1`, `0`, `0`, `0`}},
158	{{`0x4c9e90ca217da1`, `0xd11beca79159bb`, `0xff8d33c2c98b7c`, `0x2610b39409f849`},
159	{`0x44d1352ac64da0`, `0xcdbb7b2c46b4fb`, `0x966c079b753c89`, `0xfe67e4e820b112`},
160	{`1`, `0`, `0`, `0`}},
161	{{`0xe28cae2df5312d`, `0xc71b61d16f5c6e`, `0x79b7619a3e7c4c`, `0x05c73240899b47`},
162	{`0x9f7f6382c73e3a`, `0x18615165c56bda`, `0x641fab2116fd56`, `0x72855882b08394`},
163	{`1`, `0`, `0`, `0`}},
164	{{`0x0469182f161c09`, `0x74a98ca8d00fb5`, `0xb89da93489a3e0`, `0x41c98768fb0c1d`},
165	{`0xe5ea05fb32da81`, `0x3dce9ffbca6855`, `0x1cfe2d3fbf59e6`, `0x0e5e03408738a7`},
166	{`1`, `0`, `0`, `0`}},
167	{{`0xdab22b2333e87f`, `0x4430137a5dd2f6`, `0xe03ab9f738beb8`, `0xcb0c5d0dc34f24`},
168	{`0x764a7df0c8fda5`, `0x185ba5c3fa2044`, `0x9281d688bcbe50`, `0xc40331df893881`},
169	{`1`, `0`, `0`, `0`}},
170	{{`0xb89530796f0f60`, `0xade92bd26909a3`, `0x1a0c83fb4884da`, `0x1765bf22a5a984`},
171	{`0x772a9ee75db09e`, `0x23bc6c67cec16f`, `0x4c1edba8b14e2f`, `0xe2a215d9611369`},
172	{`1`, `0`, `0`, `0`}},
173	{{`0x571e509fb5efb3`, `0xade88696410552`, `0xc8ae85fada74fe`, `0x6c7e4be83bbde3`},
174	{`0xff9f51160f4652`, `0xb47ce2495a6539`, `0xa2946c53b582f4`, `0x286d2db3ee9a60`},
175	{`1`, `0`, `0`, `0`}},
176	{{`0x40bbd5081a44af`, `0x0995183b13926c`, `0xbcefba6f47f6d0`, `0x215619e9cc0057`},
177	{`0x8bc94d3b0df45e`, `0xf11c54a3694f6f`, `0x8631b93cdfe8b5`, `0xe7e3f4b0982db9`},
178	{`1`, `0`, `0`, `0`}},
179	{{`0xb17048ab3e1c7b`, `0xac38f36ff8a1d8`, `0x1c29819435d2c6`, `0xc813132f4c07e9`},
180	{`0x2891425503b11f`, `0x08781030579fea`, `0xf5426ba5cc9674`, `0x1e28ebf18562bc`},
181	{`1`, `0`, `0`, `0`}},
182	{{`0x9f31997cc864eb`, `0x06cd91d28b5e4c`, `0xff17036691a973`, `0xf1aef351497c58`},
183	{`0xdd1f2d600564ff`, `0xdead073b1402db`, `0x74a684435bd693`, `0xeea7471f962558`},
184	{`1`, `0`, `0`, `0`}}},
185	{{{`0`, `0`, `0`, `0`},
186	{`0`, `0`, `0`, `0`},
187	{`0`, `0`, `0`, `0`}},
188	{{`0x9665266dddf554`, `0x9613d78b60ef2d`, `0xce27a34cdba417`, `0xd35ab74d6afc31`},
189	{`0x85ccdd22deb15e`, `0x2137e5783a6aab`, `0xa141cffd8c93c6`, `0x355a1830e90f2d`},
190	{`1`, `0`, `0`, `0`}},
191	{{`0x1a494eadaade65`, `0xd6da4da77fe53c`, `0xe7992996abec86`, `0x65c3553c6090e3`},
192	{`0xfa610b1fb09346`, `0xf1c6540b8a4aaf`, `0xc51a13ccd3cbab`, `0x02995b1b18c28a`},
193	{`1`, `0`, `0`, `0`}},
194	{{`0x7874568e7295ef`, `0x86b419fbe38d04`, `0xdc0690a7550d9a`, `0xd3966a44beac33`},
195	{`0x2b7280ec29132f`, `0xbeaa3b6a032df3`, `0xdc7dd88ae41200`, `0xd25e2513e3a100`},
196	{`1`, `0`, `0`, `0`}},
197	{{`0x924857eb2efafd`, `0xac2bce41223190`, `0x8edaa1445553fc`, `0x825800fd3562d5`},
198	{`0x8d79148ea96621`, `0x23a01c3dd9ed8d`, `0xaf8b219f9416b5`, `0xd8db0cc277daea`},
199	{`1`, `0`, `0`, `0`}},
200	{{`0x76a9c3b1a700f0`, `0xe9acd29bc7e691`, `0x69212d1a6b0327`, `0x6322e97fe154be`},
201	{`0x469fc5465d62aa`, `0x8d41ed18883b05`, `0x1f8eae66c52b88`, `0xe4fcbe9325be51`},
202	{`1`, `0`, `0`, `0`}},
203	{{`0x825fdf583cac16`, `0x020b857c7b023a`, `0x683c17744b0165`, `0x14ffd0a2daf2f1`},
204	{`0x323b36184218f9`, `0x4944ec4e3b47d4`, `0xc15b3080841acf`, `0x0bced4b01a28bb`},
205	{`1`, `0`, `0`, `0`}},
206	{{`0x92ac22230df5c4`, `0x52f33b4063eda8`, `0xcb3f19870c0c93`, `0x40064f2ba65233`},
207	{`0xfe16f0924f8992`, `0x012da25af5b517`, `0x1a57bb24f723a6`, `0x06f8bc76760def`},
208	{`1`, `0`, `0`, `0`}},
209	{{`0x4a7084f7817cb9`, `0xbcab0738ee9a78`, `0x3ec11e11d9c326`, `0xdc0fe90e0f1aae`},
210	{`0xcf639ea5f98390`, `0x5c350aa22ffb74`, `0x9afae98a4047b7`, `0x956ec2d617fc45`},
211	{`1`, `0`, `0`, `0`}},
212	{{`0x4306d648c1be6a`, `0x9247cd8bc9a462`, `0xf5595e377d2f2e`, `0xbd1c3caff1a52e`},
213	{`0x045e14472409d0`, `0x29f3e17078f773`, `0x745a602b2d4f7d`, `0x191837685cdfbb`},
214	{`1`, `0`, `0`, `0`}},
215	{{`0x5b6ee254a8cb79`, `0x4953433f5e7026`, `0xe21faeb1d1def4`, `0xc4c225785c09de`},
216	{`0x307ce7bba1e518`, `0x31b125b1036db8`, `0x47e91868839e8f`, `0xc765866e33b9f3`},
217	{`1`, `0`, `0`, `0`}},
218	{{`0x3bfece24f96906`, `0x4794da641e5093`, `0xde5df64f95db26`, `0x297ecd89714b05`},
219	{`0x701bd3ebb2c3aa`, `0x7073b4f53cb1d5`, `0x13c5665658af16`, `0x9895089d66fe58`},
220	{`1`, `0`, `0`, `0`}},
221	{{`0x0fef05f78c4790`, `0x2d773633b05d2e`, `0x94229c3a951c94`, `0xbbbd70df4911bb`},
222	{`0xb2c6963d2c1168`, `0x105f47a72b0d73`, `0x9fdf6111614080`, `0x7b7e94b39e67b0`},
223	{`1`, `0`, `0`, `0`}},
224	{{`0xad1a7d6efbe2b3`, `0xf012482c0da69d`, `0x6b3bdf12438345`, `0x40d7558d7aa4d9`},
225	{`0x8a09fffb5c6d3d`, `0x9a356e5d9ffd38`, `0x5973f15f4f9b1c`, `0xdcd5f59f63c3ea`},
226	{`1`, `0`, `0`, `0`}},
227	{{`0xacf39f4c5ca7ab`, `0x4c8071cc5fd737`, `0xc64e3602cd1184`, `0x0acd4644c9abba`},
228	{`0x6c011a36d8bf6e`, `0xfecd87ba24e32a`, `0x19f6f56574fad8`, `0x050b204ced9405`},
229	{`1`, `0`, `0`, `0`}},
230	{{`0xed4f1cae7d9a96`, `0x5ceef7ad94c40a`, `0x778e4a3bf3ef9b`, `0x7405783dc3b55e`},
231	{`0x32477c61b6e8c6`, `0xb46a97570f018b`, `0x91176d0a7e95d1`, `0x3df90fbc4c7d0e`},
232	{`1`, `0`, `0`, `0`}}}
233	};
234
235	/ Precomputation for the group generator. /
236	struct nistp224_pre_comp_st {
237	felem g_pre_comp[`2`][`16`][`3`];
238	CRYPTO_REF_COUNT references;
239	CRYPTO_RWLOCK *lock;
240	};
241
242	const EC_METHOD EC_GFp_nistp224_method(void*)
243	{
244	static const EC_METHOD ret = {
245	EC_FLAGS_DEFAULT_OCT,
246	NID_X9_62_prime_field,
247	ec_GFp_nistp224_group_init,
248	ec_GFp_simple_group_finish,
249	ec_GFp_simple_group_clear_finish,
250	ec_GFp_nist_group_copy,
251	ec_GFp_nistp224_group_set_curve,
252	ec_GFp_simple_group_get_curve,
253	ec_GFp_simple_group_get_degree,
254	ec_group_simple_order_bits,
255	ec_GFp_simple_group_check_discriminant,
256	ec_GFp_simple_point_init,
257	ec_GFp_simple_point_finish,
258	ec_GFp_simple_point_clear_finish,
259	ec_GFp_simple_point_copy,
260	ec_GFp_simple_point_set_to_infinity,
261	ec_GFp_simple_set_Jprojective_coordinates_GFp,
262	ec_GFp_simple_get_Jprojective_coordinates_GFp,
263	ec_GFp_simple_point_set_affine_coordinates,
264	ec_GFp_nistp224_point_get_affine_coordinates,
265	`0` / point_set_compressed_coordinates / ,
266	`0` / point2oct / ,
267	`0` / oct2point / ,
268	ec_GFp_simple_add,
269	ec_GFp_simple_dbl,
270	ec_GFp_simple_invert,
271	ec_GFp_simple_is_at_infinity,
272	ec_GFp_simple_is_on_curve,
273	ec_GFp_simple_cmp,
274	ec_GFp_simple_make_affine,
275	ec_GFp_simple_points_make_affine,
276	ec_GFp_nistp224_points_mul,
277	ec_GFp_nistp224_precompute_mult,
278	ec_GFp_nistp224_have_precompute_mult,
279	ec_GFp_nist_field_mul,
280	ec_GFp_nist_field_sqr,
281	`0` / field_div / ,
282	ec_GFp_simple_field_inv,
283	`0` / field_encode / ,
284	`0` / field_decode / ,
285	`0`, / field_set_to_one /
286	ec_key_simple_priv2oct,
287	ec_key_simple_oct2priv,
288	`0`, / set private /
289	ec_key_simple_generate_key,
290	ec_key_simple_check_key,
291	ec_key_simple_generate_public_key,
292	`0`, / keycopy /
293	`0`, / keyfinish /
294	ecdh_simple_compute_key,
295	ecdsa_simple_sign_setup,
296	ecdsa_simple_sign_sig,
297	ecdsa_simple_verify_sig,
298	`0`, / field_inverse_mod_ord /
299	`0`, / blind_coordinates /
300	`0`, / ladder_pre /
301	`0`, / ladder_step /
302	`0` / ladder_post /
303	};
304
305	return &ret;
306	}
307
308	/*
309	* Helper functions to convert field elements to/from internal representation
310	*/
311	static void bin28_to_felem(felem out, const u8 in[`28`])
312	{
313	out[`0`] = ((const* uint64_t *)(in)) & `0x00ffffffffffffff`;
314	out[`1`] = (((const* uint64_t *)(in + `7`))) & `0x00ffffffffffffff`;
315	out[`2`] = (((const* uint64_t *)(in + `14`))) & `0x00ffffffffffffff`;
316	out[`3`] = (((const* uint64_t *)(in+`20`))) >> `8`;
317	}
318
319	static void felem_to_bin28(u8 out[`28`], const felem in)
320	{
321	unsigned i;
322	for (i = `0`; i < `7`; ++i) {
323	out[i] = in[`0`] >> (`8` * i);
324	out[i + `7`] = in[`1`] >> (`8` * i);
325	out[i + `14`] = in[`2`] >> (`8` * i);
326	out[i + `21`] = in[`3`] >> (`8` * i);
327	}
328	}
329
330	/ From OpenSSL BIGNUM to internal representation /
331	static int BN_to_felem(felem out, const BIGNUM *bn)
332	{
333	felem_bytearray b_out;
334	int num_bytes;
335
336	if (BN_is_negative(bn)) {
337	ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
338	return `0`;
339	}
340	num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
341	if (num_bytes < `0`) {
342	ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
343	return `0`;
344	}
345	bin28_to_felem(out, b_out);
346	return `1`;
347	}
348
349	/ From internal representation to OpenSSL BIGNUM /
350	static BIGNUM felem_to_BN(BIGNUM out, const felem in)
351	{
352	felem_bytearray b_out;
353	felem_to_bin28(b_out, in);
354	return BN_lebin2bn(b_out, sizeof(b_out), out);
355	}
356
357	/****************************************************************************/
358	/-*
359	* FIELD OPERATIONS
360	*
361	* Field operations, using the internal representation of field elements.
362	* NB! These operations are specific to our point multiplication and cannot be
363	* expected to be correct in general - e.g., multiplication with a large scalar
364	* will cause an overflow.
365	*
366	*/
367
368	static void felem_one(felem out)
369	{
370	out[`0`] = `1`;
371	out[`1`] = `0`;
372	out[`2`] = `0`;
373	out[`3`] = `0`;
374	}
375
376	static void felem_assign(felem out, const felem in)
377	{
378	out[`0`] = in[`0`];
379	out[`1`] = in[`1`];
380	out[`2`] = in[`2`];
381	out[`3`] = in[`3`];
382	}
383
384	/ Sum two field elements: out += in /
385	static void felem_sum(felem out, const felem in)
386	{
387	out[`0`] += in[`0`];
388	out[`1`] += in[`1`];
389	out[`2`] += in[`2`];
390	out[`3`] += in[`3`];
391	}
392
393	/ Subtract field elements: out -= in /
394	/ Assumes in[i] < 2^57 /
395	static void felem_diff(felem out, const felem in)
396	{
397	static const limb two58p2 = (((limb) `1`) << `58`) + (((limb) `1`) << `2`);
398	static const limb two58m2 = (((limb) `1`) << `58`) - (((limb) `1`) << `2`);
399	static const limb two58m42m2 = (((limb) `1`) << `58`) -
400	(((limb) `1`) << `42`) - (((limb) `1`) << `2`);
401
402	/ Add 0 mod 2^224-2^96+1 to ensure out > in /
403	out[`0`] += two58p2;
404	out[`1`] += two58m42m2;
405	out[`2`] += two58m2;
406	out[`3`] += two58m2;
407
408	out[`0`] -= in[`0`];
409	out[`1`] -= in[`1`];
410	out[`2`] -= in[`2`];
411	out[`3`] -= in[`3`];
412	}
413
414	/ Subtract in unreduced 128-bit mode: out -= in /
415	/ Assumes in[i] < 2^119 /
416	static void widefelem_diff(widefelem out, const widefelem in)
417	{
418	static const widelimb two120 = ((widelimb) `1`) << `120`;
419	static const widelimb two120m64 = (((widelimb) `1`) << `120`) -
420	(((widelimb) `1`) << `64`);
421	static const widelimb two120m104m64 = (((widelimb) `1`) << `120`) -
422	(((widelimb) `1`) << `104`) - (((widelimb) `1`) << `64`);
423
424	/ Add 0 mod 2^224-2^96+1 to ensure out > in /
425	out[`0`] += two120;
426	out[`1`] += two120m64;
427	out[`2`] += two120m64;
428	out[`3`] += two120;
429	out[`4`] += two120m104m64;
430	out[`5`] += two120m64;
431	out[`6`] += two120m64;
432
433	out[`0`] -= in[`0`];
434	out[`1`] -= in[`1`];
435	out[`2`] -= in[`2`];
436	out[`3`] -= in[`3`];
437	out[`4`] -= in[`4`];
438	out[`5`] -= in[`5`];
439	out[`6`] -= in[`6`];
440	}
441
442	/ Subtract in mixed mode: out128 -= in64 /
443	/ in[i] < 2^63 /
444	static void felem_diff_128_64(widefelem out, const felem in)
445	{
446	static const widelimb two64p8 = (((widelimb) `1`) << `64`) +
447	(((widelimb) `1`) << `8`);
448	static const widelimb two64m8 = (((widelimb) `1`) << `64`) -
449	(((widelimb) `1`) << `8`);
450	static const widelimb two64m48m8 = (((widelimb) `1`) << `64`) -
451	(((widelimb) `1`) << `48`) - (((widelimb) `1`) << `8`);
452
453	/ Add 0 mod 2^224-2^96+1 to ensure out > in /
454	out[`0`] += two64p8;
455	out[`1`] += two64m48m8;
456	out[`2`] += two64m8;
457	out[`3`] += two64m8;
458
459	out[`0`] -= in[`0`];
460	out[`1`] -= in[`1`];
461	out[`2`] -= in[`2`];
462	out[`3`] -= in[`3`];
463	}
464
465	/*
466	* Multiply a field element by a scalar: out = out * scalar The scalars we
467	* actually use are small, so results fit without overflow
468	*/
469	static void felem_scalar(felem out, const limb scalar)
470	{
471	out[`0`] *= scalar;
472	out[`1`] *= scalar;
473	out[`2`] *= scalar;
474	out[`3`] *= scalar;
475	}
476
477	/*
478	* Multiply an unreduced field element by a scalar: out = out * scalar The
479	* scalars we actually use are small, so results fit without overflow
480	*/
481	static void widefelem_scalar(widefelem out, const widelimb scalar)
482	{
483	out[`0`] *= scalar;
484	out[`1`] *= scalar;
485	out[`2`] *= scalar;
486	out[`3`] *= scalar;
487	out[`4`] *= scalar;
488	out[`5`] *= scalar;
489	out[`6`] *= scalar;
490	}
491
492	/ Square a field element: out = in^2 /
493	static void felem_square(widefelem out, const felem in)
494	{
495	limb tmp0, tmp1, tmp2;
496	tmp0 = `2` * in[`0`];
497	tmp1 = `2` * in[`1`];
498	tmp2 = `2` * in[`2`];
499	out[`0`] = ((widelimb) in[`0`]) * in[`0`];
500	out[`1`] = ((widelimb) in[`0`]) * tmp1;
501	out[`2`] = ((widelimb) in[`0`]) * tmp2 + ((widelimb) in[`1`]) * in[`1`];
502	out[`3`] = ((widelimb) in[`3`]) * tmp0 + ((widelimb) in[`1`]) * tmp2;
503	out[`4`] = ((widelimb) in[`3`]) * tmp1 + ((widelimb) in[`2`]) * in[`2`];
504	out[`5`] = ((widelimb) in[`3`]) * tmp2;
505	out[`6`] = ((widelimb) in[`3`]) * in[`3`];
506	}
507
508	/ Multiply two field elements: out = in1 * in2 /
509	static void felem_mul(widefelem out, const felem in1, const felem in2)
510	{
511	out[`0`] = ((widelimb) in1[`0`]) * in2[`0`];
512	out[`1`] = ((widelimb) in1[`0`]) * in2[`1`] + ((widelimb) in1[`1`]) * in2[`0`];
513	out[`2`] = ((widelimb) in1[`0`]) * in2[`2`] + ((widelimb) in1[`1`]) * in2[`1`] +
514	((widelimb) in1[`2`]) * in2[`0`];
515	out[`3`] = ((widelimb) in1[`0`]) * in2[`3`] + ((widelimb) in1[`1`]) * in2[`2`] +
516	((widelimb) in1[`2`]) * in2[`1`] + ((widelimb) in1[`3`]) * in2[`0`];
517	out[`4`] = ((widelimb) in1[`1`]) * in2[`3`] + ((widelimb) in1[`2`]) * in2[`2`] +
518	((widelimb) in1[`3`]) * in2[`1`];
519	out[`5`] = ((widelimb) in1[`2`]) * in2[`3`] + ((widelimb) in1[`3`]) * in2[`2`];
520	out[`6`] = ((widelimb) in1[`3`]) * in2[`3`];
521	}
522
523	/-*
524	* Reduce seven 128-bit coefficients to four 64-bit coefficients.
525	* Requires in[i] < 2^126,
526	* ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
527	static void felem_reduce(felem out, const widefelem in)
528	{
529	static const widelimb two127p15 = (((widelimb) `1`) << `127`) +
530	(((widelimb) `1`) << `15`);
531	static const widelimb two127m71 = (((widelimb) `1`) << `127`) -
532	(((widelimb) `1`) << `71`);
533	static const widelimb two127m71m55 = (((widelimb) `1`) << `127`) -
534	(((widelimb) `1`) << `71`) - (((widelimb) `1`) << `55`);
535	widelimb output[`5`];
536
537	/ Add 0 mod 2^224-2^96+1 to ensure all differences are positive /
538	output[`0`] = in[`0`] + two127p15;
539	output[`1`] = in[`1`] + two127m71m55;
540	output[`2`] = in[`2`] + two127m71;
541	output[`3`] = in[`3`];
542	output[`4`] = in[`4`];
543
544	/ Eliminate in[4], in[5], in[6] /
545	output[`4`] += in[`6`] >> `16`;
546	output[`3`] += (in[`6`] & `0xffff`) << `40`;
547	output[`2`] -= in[`6`];
548
549	output[`3`] += in[`5`] >> `16`;
550	output[`2`] += (in[`5`] & `0xffff`) << `40`;
551	output[`1`] -= in[`5`];
552
553	output[`2`] += output[`4`] >> `16`;
554	output[`1`] += (output[`4`] & `0xffff`) << `40`;
555	output[`0`] -= output[`4`];
556
557	/ Carry 2 -> 3 -> 4 /
558	output[`3`] += output[`2`] >> `56`;
559	output[`2`] &= `0x00ffffffffffffff`;
560
561	output[`4`] = output[`3`] >> `56`;
562	output[`3`] &= `0x00ffffffffffffff`;
563
564	/ Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 /
565
566	/ Eliminate output[4] /
567	output[`2`] += output[`4`] >> `16`;
568	/ output[2] < 2^56 + 2^56 = 2^57 /
569	output[`1`] += (output[`4`] & `0xffff`) << `40`;
570	output[`0`] -= output[`4`];
571
572	/ Carry 0 -> 1 -> 2 -> 3 /
573	output[`1`] += output[`0`] >> `56`;
574	out[`0`] = output[`0`] & `0x00ffffffffffffff`;
575
576	output[`2`] += output[`1`] >> `56`;
577	/ output[2] < 2^57 + 2^72 /
578	out[`1`] = output[`1`] & `0x00ffffffffffffff`;
579	output[`3`] += output[`2`] >> `56`;
580	/ output[3] <= 2^56 + 2^16 /
581	out[`2`] = output[`2`] & `0x00ffffffffffffff`;
582
583	/-*
584	* out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
585	* out[3] <= 2^56 + 2^16 (due to final carry),
586	* so out < 2*p
587	*/
588	out[`3`] = output[`3`];
589	}
590
591	static void felem_square_reduce(felem out, const felem in)
592	{
593	widefelem tmp;
594	felem_square(tmp, in);
595	felem_reduce(out, tmp);
596	}
597
598	static void felem_mul_reduce(felem out, const felem in1, const felem in2)
599	{
600	widefelem tmp;
601	felem_mul(tmp, in1, in2);
602	felem_reduce(out, tmp);
603	}
604
605	/*
606	* Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
607	* call felem_reduce first)
608	*/
609	static void felem_contract(felem out, const felem in)
610	{
611	static const int64_t two56 = ((limb) `1`) << `56`;
612	/ 0 <= in < 2p, p = 2^224 - 2^96 + 1 /*
613	/ if in > p , reduce in = in - 2^224 + 2^96 - 1 /
614	int64_t tmp[`4`], a;
615	tmp[`0`] = in[`0`];
616	tmp[`1`] = in[`1`];
617	tmp[`2`] = in[`2`];
618	tmp[`3`] = in[`3`];
619	/ Case 1: a = 1 iff in >= 2^224 /
620	a = (in[`3`] >> `56`);
621	tmp[`0`] -= a;
622	tmp[`1`] += a << `40`;
623	tmp[`3`] &= `0x00ffffffffffffff`;
624	/*
625	* Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
626	* and the lower part is non-zero
627	*/
628	a = ((in[`3`] & in[`2`] & (in[`1`] \| `0x000000ffffffffff`)) + `1`) \|
629	(((int64_t) (in[`0`] + (in[`1`] & `0x000000ffffffffff`)) - `1`) >> `63`);
630	a &= `0x00ffffffffffffff`;
631	/ turn a into an all-one mask (if a = 0) or an all-zero mask /
632	a = (a - `1`) >> `63`;
633	/ subtract 2^224 - 2^96 + 1 if a is all-one /
634	tmp[`3`] &= a ^ `0xffffffffffffffff`;
635	tmp[`2`] &= a ^ `0xffffffffffffffff`;
636	tmp[`1`] &= (a ^ `0xffffffffffffffff`) \| `0x000000ffffffffff`;
637	tmp[`0`] -= `1` & a;
638
639	/*
640	* eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
641	* non-zero, so we only need one step
642	*/
643	a = tmp[`0`] >> `63`;
644	tmp[`0`] += two56 & a;
645	tmp[`1`] -= `1` & a;
646
647	/ carry 1 -> 2 -> 3 /
648	tmp[`2`] += tmp[`1`] >> `56`;
649	tmp[`1`] &= `0x00ffffffffffffff`;
650
651	tmp[`3`] += tmp[`2`] >> `56`;
652	tmp[`2`] &= `0x00ffffffffffffff`;
653
654	/ Now 0 <= out < p /
655	out[`0`] = tmp[`0`];
656	out[`1`] = tmp[`1`];
657	out[`2`] = tmp[`2`];
658	out[`3`] = tmp[`3`];
659	}
660
661	/*
662	* Get negative value: out = -in
663	* Requires in[i] < 2^63,
664	* ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
665	*/
666	static void felem_neg(felem out, const felem in)
667	{
668	widefelem tmp;
669
670	memset(tmp, `0`, sizeof(tmp));
671	felem_diff_128_64(tmp, in);
672	felem_reduce(out, tmp);
673	}
674
675	/*
676	* Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
677	* elements are reduced to in < 2^225, so we only need to check three cases:
678	* 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
679	*/
680	static limb felem_is_zero(const felem in)
681	{
682	limb zero, two224m96p1, two225m97p2;
683
684	zero = in[`0`] \| in[`1`] \| in[`2`] \| in[`3`];
685	zero = (((int64_t) (zero) - `1`) >> `63`) & `1`;
686	two224m96p1 = (in[`0`] ^ `1`) \| (in[`1`] ^ `0x00ffff0000000000`)
687	\| (in[`2`] ^ `0x00ffffffffffffff`) \| (in[`3`] ^ `0x00ffffffffffffff`);
688	two224m96p1 = (((int64_t) (two224m96p1) - `1`) >> `63`) & `1`;
689	two225m97p2 = (in[`0`] ^ `2`) \| (in[`1`] ^ `0x00fffe0000000000`)
690	\| (in[`2`] ^ `0x00ffffffffffffff`) \| (in[`3`] ^ `0x01ffffffffffffff`);
691	two225m97p2 = (((int64_t) (two225m97p2) - `1`) >> `63`) & `1`;
692	return (zero \| two224m96p1 \| two225m97p2);
693	}
694
695	static int felem_is_zero_int(const void *in)
696	{
697	return (int)(felem_is_zero(in) & ((limb) `1`));
698	}
699
700	/ Invert a field element /
701	/ Computation chain copied from djb's code /
702	static void felem_inv(felem out, const felem in)
703	{
704	felem ftmp, ftmp2, ftmp3, ftmp4;
705	widefelem tmp;
706	unsigned i;
707
708	felem_square(tmp, in);
709	felem_reduce(ftmp, tmp); / 2 /
710	felem_mul(tmp, in, ftmp);
711	felem_reduce(ftmp, tmp); / 2^2 - 1 /
712	felem_square(tmp, ftmp);
713	felem_reduce(ftmp, tmp); / 2^3 - 2 /
714	felem_mul(tmp, in, ftmp);
715	felem_reduce(ftmp, tmp); / 2^3 - 1 /
716	felem_square(tmp, ftmp);
717	felem_reduce(ftmp2, tmp); / 2^4 - 2 /
718	felem_square(tmp, ftmp2);
719	felem_reduce(ftmp2, tmp); / 2^5 - 4 /
720	felem_square(tmp, ftmp2);
721	felem_reduce(ftmp2, tmp); / 2^6 - 8 /
722	felem_mul(tmp, ftmp2, ftmp);
723	felem_reduce(ftmp, tmp); / 2^6 - 1 /
724	felem_square(tmp, ftmp);
725	felem_reduce(ftmp2, tmp); / 2^7 - 2 /
726	for (i = `0`; i < `5`; ++i) { / 2^12 - 2^6 /
727	felem_square(tmp, ftmp2);
728	felem_reduce(ftmp2, tmp);
729	}
730	felem_mul(tmp, ftmp2, ftmp);
731	felem_reduce(ftmp2, tmp); / 2^12 - 1 /
732	felem_square(tmp, ftmp2);
733	felem_reduce(ftmp3, tmp); / 2^13 - 2 /
734	for (i = `0`; i < `11`; ++i) { / 2^24 - 2^12 /
735	felem_square(tmp, ftmp3);
736	felem_reduce(ftmp3, tmp);
737	}
738	felem_mul(tmp, ftmp3, ftmp2);
739	felem_reduce(ftmp2, tmp); / 2^24 - 1 /
740	felem_square(tmp, ftmp2);
741	felem_reduce(ftmp3, tmp); / 2^25 - 2 /
742	for (i = `0`; i < `23`; ++i) { / 2^48 - 2^24 /
743	felem_square(tmp, ftmp3);
744	felem_reduce(ftmp3, tmp);
745	}
746	felem_mul(tmp, ftmp3, ftmp2);
747	felem_reduce(ftmp3, tmp); / 2^48 - 1 /
748	felem_square(tmp, ftmp3);
749	felem_reduce(ftmp4, tmp); / 2^49 - 2 /
750	for (i = `0`; i < `47`; ++i) { / 2^96 - 2^48 /
751	felem_square(tmp, ftmp4);
752	felem_reduce(ftmp4, tmp);
753	}
754	felem_mul(tmp, ftmp3, ftmp4);
755	felem_reduce(ftmp3, tmp); / 2^96 - 1 /
756	felem_square(tmp, ftmp3);
757	felem_reduce(ftmp4, tmp); / 2^97 - 2 /
758	for (i = `0`; i < `23`; ++i) { / 2^120 - 2^24 /
759	felem_square(tmp, ftmp4);
760	felem_reduce(ftmp4, tmp);
761	}
762	felem_mul(tmp, ftmp2, ftmp4);
763	felem_reduce(ftmp2, tmp); / 2^120 - 1 /
764	for (i = `0`; i < `6`; ++i) { / 2^126 - 2^6 /
765	felem_square(tmp, ftmp2);
766	felem_reduce(ftmp2, tmp);
767	}
768	felem_mul(tmp, ftmp2, ftmp);
769	felem_reduce(ftmp, tmp); / 2^126 - 1 /
770	felem_square(tmp, ftmp);
771	felem_reduce(ftmp, tmp); / 2^127 - 2 /
772	felem_mul(tmp, ftmp, in);
773	felem_reduce(ftmp, tmp); / 2^127 - 1 /
774	for (i = `0`; i < `97`; ++i) { / 2^224 - 2^97 /
775	felem_square(tmp, ftmp);
776	felem_reduce(ftmp, tmp);
777	}
778	felem_mul(tmp, ftmp, ftmp3);
779	felem_reduce(out, tmp); / 2^224 - 2^96 - 1 /
780	}
781
782	/*
783	* Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
784	* out to itself.
785	*/
786	static void copy_conditional(felem out, const felem in, limb icopy)
787	{
788	unsigned i;
789	/*
790	* icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
791	*/
792	const limb copy = -icopy;
793	for (i = `0`; i < `4`; ++i) {
794	const limb tmp = copy & (in[i] ^ out[i]);
795	out[i] ^= tmp;
796	}
797	}
798
799	/****************************************************************************/
800	/-*
801	* ELLIPTIC CURVE POINT OPERATIONS
802	*
803	* Points are represented in Jacobian projective coordinates:
804	* (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
805	* or to the point at infinity if Z == 0.
806	*
807	*/
808
809	/-*
810	* Double an elliptic curve point:
811	* (X', Y', Z') = 2 * (X, Y, Z), where
812	* X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
813	* Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
814	* Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
815	* Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
816	* while x_out == y_in is not (maybe this works, but it's not tested).
817	*/
818	static void
819	point_double(felem x_out, felem y_out, felem z_out,
820	const felem x_in, const felem y_in, const felem z_in)
821	{
822	widefelem tmp, tmp2;
823	felem delta, gamma, beta, alpha, ftmp, ftmp2;
824
825	felem_assign(ftmp, x_in);
826	felem_assign(ftmp2, x_in);
827
828	/ delta = z^2 /
829	felem_square(tmp, z_in);
830	felem_reduce(delta, tmp);
831
832	/ gamma = y^2 /
833	felem_square(tmp, y_in);
834	felem_reduce(gamma, tmp);
835
836	/ beta = xgamma /*
837	felem_mul(tmp, x_in, gamma);
838	felem_reduce(beta, tmp);
839
840	/ alpha = 3(x-delta)(x+delta) /
841	felem_diff(ftmp, delta);
842	/ ftmp[i] < 2^57 + 2^58 + 2 < 2^59 /
843	felem_sum(ftmp2, delta);
844	/ ftmp2[i] < 2^57 + 2^57 = 2^58 /
845	felem_scalar(ftmp2, `3`);
846	/ ftmp2[i] < 3 * 2^58 < 2^60 /
847	felem_mul(tmp, ftmp, ftmp2);
848	/ tmp[i] < 2^60 * 2^59 * 4 = 2^121 /
849	felem_reduce(alpha, tmp);
850
851	/ x' = alpha^2 - 8beta /*
852	felem_square(tmp, alpha);
853	/ tmp[i] < 4 * 2^57 * 2^57 = 2^116 /
854	felem_assign(ftmp, beta);
855	felem_scalar(ftmp, `8`);
856	/ ftmp[i] < 8 * 2^57 = 2^60 /
857	felem_diff_128_64(tmp, ftmp);
858	/ tmp[i] < 2^116 + 2^64 + 8 < 2^117 /
859	felem_reduce(x_out, tmp);
860
861	/ z' = (y + z)^2 - gamma - delta /
862	felem_sum(delta, gamma);
863	/ delta[i] < 2^57 + 2^57 = 2^58 /
864	felem_assign(ftmp, y_in);
865	felem_sum(ftmp, z_in);
866	/ ftmp[i] < 2^57 + 2^57 = 2^58 /
867	felem_square(tmp, ftmp);
868	/ tmp[i] < 4 * 2^58 * 2^58 = 2^118 /
869	felem_diff_128_64(tmp, delta);
870	/ tmp[i] < 2^118 + 2^64 + 8 < 2^119 /
871	felem_reduce(z_out, tmp);
872
873	/ y' = alpha(4beta - x') - 8gamma^2 /*
874	felem_scalar(beta, `4`);
875	/ beta[i] < 4 * 2^57 = 2^59 /
876	felem_diff(beta, x_out);
877	/ beta[i] < 2^59 + 2^58 + 2 < 2^60 /
878	felem_mul(tmp, alpha, beta);
879	/ tmp[i] < 4 * 2^57 * 2^60 = 2^119 /
880	felem_square(tmp2, gamma);
881	/ tmp2[i] < 4 * 2^57 * 2^57 = 2^116 /
882	widefelem_scalar(tmp2, `8`);
883	/ tmp2[i] < 8 * 2^116 = 2^119 /
884	widefelem_diff(tmp, tmp2);
885	/ tmp[i] < 2^119 + 2^120 < 2^121 /
886	felem_reduce(y_out, tmp);
887	}
888
889	/-*
890	* Add two elliptic curve points:
891	* (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
892	* X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
893	* 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
894	* Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
895	* Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
896	* Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
897	*
898	* This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
899	*/
900
901	/*
902	* This function is not entirely constant-time: it includes a branch for
903	* checking whether the two input points are equal, (while not equal to the
904	* point at infinity). This case never happens during single point
905	* multiplication, so there is no timing leak for ECDH or ECDSA signing.
906	*/
907	static void point_add(felem x3, felem y3, felem z3,
908	const felem x1, const felem y1, const felem z1,
909	const int mixed, const felem x2, const felem y2,
910	const felem z2)
911	{
912	felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
913	widefelem tmp, tmp2;
914	limb z1_is_zero, z2_is_zero, x_equal, y_equal;
915
916	if (!mixed) {
917	/ ftmp2 = z2^2 /
918	felem_square(tmp, z2);
919	felem_reduce(ftmp2, tmp);
920
921	/ ftmp4 = z2^3 /
922	felem_mul(tmp, ftmp2, z2);
923	felem_reduce(ftmp4, tmp);
924
925	/ ftmp4 = z2^3y1 /*
926	felem_mul(tmp2, ftmp4, y1);
927	felem_reduce(ftmp4, tmp2);
928
929	/ ftmp2 = z2^2x1 /*
930	felem_mul(tmp2, ftmp2, x1);
931	felem_reduce(ftmp2, tmp2);
932	} else {
933	/*
934	* We'll assume z2 = 1 (special case z2 = 0 is handled later)
935	*/
936
937	/ ftmp4 = z2^3y1 /*
938	felem_assign(ftmp4, y1);
939
940	/ ftmp2 = z2^2x1 /*
941	felem_assign(ftmp2, x1);
942	}
943
944	/ ftmp = z1^2 /
945	felem_square(tmp, z1);
946	felem_reduce(ftmp, tmp);
947
948	/ ftmp3 = z1^3 /
949	felem_mul(tmp, ftmp, z1);
950	felem_reduce(ftmp3, tmp);
951
952	/ tmp = z1^3y2 /*
953	felem_mul(tmp, ftmp3, y2);
954	/ tmp[i] < 4 * 2^57 * 2^57 = 2^116 /
955
956	/ ftmp3 = z1^3y2 - z2^3y1 /
957	felem_diff_128_64(tmp, ftmp4);
958	/ tmp[i] < 2^116 + 2^64 + 8 < 2^117 /
959	felem_reduce(ftmp3, tmp);
960
961	/ tmp = z1^2x2 /*
962	felem_mul(tmp, ftmp, x2);
963	/ tmp[i] < 4 * 2^57 * 2^57 = 2^116 /
964
965	/ ftmp = z1^2x2 - z2^2x1 /
966	felem_diff_128_64(tmp, ftmp2);
967	/ tmp[i] < 2^116 + 2^64 + 8 < 2^117 /
968	felem_reduce(ftmp, tmp);
969
970	/*
971	* the formulae are incorrect if the points are equal so we check for
972	* this and do doubling if this happens
973	*/
974	x_equal = felem_is_zero(ftmp);
975	y_equal = felem_is_zero(ftmp3);
976	z1_is_zero = felem_is_zero(z1);
977	z2_is_zero = felem_is_zero(z2);
978	/ In affine coordinates, (X_1, Y_1) == (X_2, Y_2) /
979	if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
980	point_double(x3, y3, z3, x1, y1, z1);
981	return;
982	}
983
984	/ ftmp5 = z1z2 /*
985	if (!mixed) {
986	felem_mul(tmp, z1, z2);
987	felem_reduce(ftmp5, tmp);
988	} else {
989	/ special case z2 = 0 is handled later /
990	felem_assign(ftmp5, z1);
991	}
992
993	/ z_out = (z1^2x2 - z2^2x1)(z1z2) /
994	felem_mul(tmp, ftmp, ftmp5);
995	felem_reduce(z_out, tmp);
996
997	/ ftmp = (z1^2x2 - z2^2x1)^2 /
998	felem_assign(ftmp5, ftmp);
999	felem_square(tmp, ftmp);
1000	felem_reduce(ftmp, tmp);
1001
1002	/ ftmp5 = (z1^2x2 - z2^2x1)^3 /
1003	felem_mul(tmp, ftmp, ftmp5);
1004	felem_reduce(ftmp5, tmp);
1005
1006	/ ftmp2 = z2^2x1(z1^2x2 - z2^2x1)^2 /
1007	felem_mul(tmp, ftmp2, ftmp);
1008	felem_reduce(ftmp2, tmp);
1009
1010	/ tmp = z2^3y1(z1^2x2 - z2^2x1)^3 /
1011	felem_mul(tmp, ftmp4, ftmp5);
1012	/ tmp[i] < 4 * 2^57 * 2^57 = 2^116 /
1013
1014	/ tmp2 = (z1^3y2 - z2^3y1)^2 /
1015	felem_square(tmp2, ftmp3);
1016	/ tmp2[i] < 4 * 2^57 * 2^57 < 2^116 /
1017
1018	/ tmp2 = (z1^3y2 - z2^3y1)^2 - (z1^2x2 - z2^2x1)^3 /
1019	felem_diff_128_64(tmp2, ftmp5);
1020	/ tmp2[i] < 2^116 + 2^64 + 8 < 2^117 /
1021
1022	/ ftmp5 = 2z2^2x1(z1^2x2 - z2^2x1)^2 /*
1023	felem_assign(ftmp5, ftmp2);
1024	felem_scalar(ftmp5, `2`);
1025	/ ftmp5[i] < 2 * 2^57 = 2^58 /
1026
1027	/-*
1028	* x_out = (z1^3y2 - z2^3y1)^2 - (z1^2x2 - z2^2x1)^3 -
1029	* 2z2^2x1(z1^2x2 - z2^2*x1)^2
1030	*/
1031	felem_diff_128_64(tmp2, ftmp5);
1032	/ tmp2[i] < 2^117 + 2^64 + 8 < 2^118 /
1033	felem_reduce(x_out, tmp2);
1034
1035	/ ftmp2 = z2^2x1(z1^2x2 - z2^2x1)^2 - x_out /
1036	felem_diff(ftmp2, x_out);
1037	/ ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 /
1038
1039	/*
1040	* tmp2 = (z1^3y2 - z2^3y1)(z2^2x1(z1^2x2 - z2^2*x1)^2 - x_out)
1041	*/
1042	felem_mul(tmp2, ftmp3, ftmp2);
1043	/ tmp2[i] < 4 * 2^57 * 2^59 = 2^118 /
1044
1045	/-*
1046	* y_out = (z1^3y2 - z2^3y1)(z2^2x1(z1^2x2 - z2^2*x1)^2 - x_out) -
1047	* z2^3y1(z1^2x2 - z2^2x1)^3
1048	*/
1049	widefelem_diff(tmp2, tmp);
1050	/ tmp2[i] < 2^118 + 2^120 < 2^121 /
1051	felem_reduce(y_out, tmp2);
1052
1053	/*
1054	* the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1055	* the point at infinity, so we need to check for this separately
1056	*/
1057
1058	/*
1059	* if point 1 is at infinity, copy point 2 to output, and vice versa
1060	*/
1061	copy_conditional(x_out, x2, z1_is_zero);
1062	copy_conditional(x_out, x1, z2_is_zero);
1063	copy_conditional(y_out, y2, z1_is_zero);
1064	copy_conditional(y_out, y1, z2_is_zero);
1065	copy_conditional(z_out, z2, z1_is_zero);
1066	copy_conditional(z_out, z1, z2_is_zero);
1067	felem_assign(x3, x_out);
1068	felem_assign(y3, y_out);
1069	felem_assign(z3, z_out);
1070	}
1071
1072	/*
1073	* select_point selects the \|idx\|th point from a precomputation table and
1074	* copies it to out.
1075	* The pre_comp array argument should be size of \|size\| argument
1076	*/
1077	static void select_point(const u64 idx, unsigned int size,
1078	const felem pre_comp[][`3`], felem out[`3`])
1079	{
1080	unsigned i, j;
1081	limb *outlimbs = &out[`0`][`0`];
1082
1083	memset(out, `0`, sizeof(out) `3`);
1084	for (i = `0`; i < size; i++) {
1085	const limb *inlimbs = &pre_comp[i][`0`][`0`];
1086	u64 mask = i ^ idx;
1087	mask \|= mask >> `4`;
1088	mask \|= mask >> `2`;
1089	mask \|= mask >> `1`;
1090	mask &= `1`;
1091	mask--;
1092	for (j = `0`; j < `4` * `3`; j++)
1093	outlimbs[j] \|= inlimbs[j] & mask;
1094	}
1095	}
1096
1097	/ get_bit returns the \|i\|th bit in \|in\| /
1098	static char get_bit(const felem_bytearray in, unsigned i)
1099	{
1100	if (i >= `224`)
1101	return `0`;
1102	return (in[i >> `3`] >> (i & `7`)) & `1`;
1103	}
1104
1105	/*
1106	* Interleaved point multiplication using precomputed point multiples: The
1107	* small point multiples 0P, 1P, ..., 16*P are in pre_comp[], the scalars
1108	* in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1109	* generator, using certain (large) precomputed multiples in g_pre_comp.
1110	* Output point (X, Y, Z) is stored in x_out, y_out, z_out
1111	*/
1112	static void batch_mul(felem x_out, felem y_out, felem z_out,
1113	const felem_bytearray scalars[],
1114	const unsigned num_points, const u8 *g_scalar,
1115	const int mixed, const felem pre_comp[][`17`][`3`],
1116	const felem g_pre_comp[`2`][`16`][`3`])
1117	{
1118	int i, skip;
1119	unsigned num;
1120	unsigned gen_mul = (g_scalar != NULL);
1121	felem nq[`3`], tmp[`4`];
1122	u64 bits;
1123	u8 sign, digit;
1124
1125	/ set nq to the point at infinity /
1126	memset(nq, `0`, sizeof(nq));
1127
1128	/*
1129	* Loop over all scalars msb-to-lsb, interleaving additions of multiples
1130	* of the generator (two in each of the last 28 rounds) and additions of
1131	* other points multiples (every 5th round).
1132	*/
1133	skip = `1`; / save two point operations in the first*
1134	* round */
1135	for (i = (num_points ? `220` : `27`); i >= `0`; --i) {
1136	/ double /
1137	if (!skip)
1138	point_double(nq[`0`], nq[`1`], nq[`2`], nq[`0`], nq[`1`], nq[`2`]);
1139
1140	/ add multiples of the generator /
1141	if (gen_mul && (i <= `27`)) {
1142	/ first, look 28 bits upwards /
1143	bits = get_bit(g_scalar, i + `196`) << `3`;
1144	bits \|= get_bit(g_scalar, i + `140`) << `2`;
1145	bits \|= get_bit(g_scalar, i + `84`) << `1`;
1146	bits \|= get_bit(g_scalar, i + `28`);
1147	/ select the point to add, in constant time /
1148	select_point(bits, `16`, g_pre_comp[`1`], tmp);
1149
1150	if (!skip) {
1151	/ value 1 below is argument for "mixed" /
1152	point_add(nq[`0`], nq[`1`], nq[`2`],
1153	nq[`0`], nq[`1`], nq[`2`], `1`, tmp[`0`], tmp[`1`], tmp[`2`]);
1154	} else {
1155	memcpy(nq, tmp, `3` * sizeof(felem));
1156	skip = `0`;
1157	}
1158
1159	/ second, look at the current position /
1160	bits = get_bit(g_scalar, i + `168`) << `3`;
1161	bits \|= get_bit(g_scalar, i + `112`) << `2`;
1162	bits \|= get_bit(g_scalar, i + `56`) << `1`;
1163	bits \|= get_bit(g_scalar, i);
1164	/ select the point to add, in constant time /
1165	select_point(bits, `16`, g_pre_comp[`0`], tmp);
1166	point_add(nq[`0`], nq[`1`], nq[`2`],
1167	nq[`0`], nq[`1`], nq[`2`],
1168	`1` / mixed / , tmp[`0`], tmp[`1`], tmp[`2`]);
1169	}
1170
1171	/ do other additions every 5 doublings /
1172	if (num_points && (i % `5` == `0`)) {
1173	/ loop over all scalars /
1174	for (num = `0`; num < num_points; ++num) {
1175	bits = get_bit(scalars[num], i + `4`) << `5`;
1176	bits \|= get_bit(scalars[num], i + `3`) << `4`;
1177	bits \|= get_bit(scalars[num], i + `2`) << `3`;
1178	bits \|= get_bit(scalars[num], i + `1`) << `2`;
1179	bits \|= get_bit(scalars[num], i) << `1`;
1180	bits \|= get_bit(scalars[num], i - `1`);
1181	ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1182
1183	/ select the point to add or subtract /
1184	select_point(digit, `17`, pre_comp[num], tmp);
1185	felem_neg(tmp[`3`], tmp[`1`]); / (X, -Y, Z) is the negative*
1186	* point */
1187	copy_conditional(tmp[`1`], tmp[`3`], sign);
1188
1189	if (!skip) {
1190	point_add(nq[`0`], nq[`1`], nq[`2`],
1191	nq[`0`], nq[`1`], nq[`2`],
1192	mixed, tmp[`0`], tmp[`1`], tmp[`2`]);
1193	} else {
1194	memcpy(nq, tmp, `3` * sizeof(felem));
1195	skip = `0`;
1196	}
1197	}
1198	}
1199	}
1200	felem_assign(x_out, nq[`0`]);
1201	felem_assign(y_out, nq[`1`]);
1202	felem_assign(z_out, nq[`2`]);
1203	}
1204
1205	/****************************************************************************/
1206	/*
1207	* FUNCTIONS TO MANAGE PRECOMPUTATION
1208	*/
1209
1210	static NISTP224_PRE_COMP nistp224_pre_comp_new(void*)
1211	{
1212	NISTP224_PRE_COMP ret = OPENSSL_zalloc(sizeof(ret));
1213
1214	if (!ret) {
1215	ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1216	return ret;
1217	}
1218
1219	ret->references = `1`;
1220
1221	ret->lock = CRYPTO_THREAD_lock_new();
1222	if (ret->lock == NULL) {
1223	ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1224	OPENSSL_free(ret);
1225	return NULL;
1226	}
1227	return ret;
1228	}
1229
1230	NISTP224_PRE_COMP EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP p)
1231	{
1232	int i;
1233	if (p != NULL)
1234	CRYPTO_UP_REF(&p->references, &i, p->lock);
1235	return p;
1236	}
1237
1238	void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1239	{
1240	int i;
1241
1242	if (p == NULL)
1243	return;
1244
1245	CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1246	REF_PRINT_COUNT("EC_nistp224", x);
1247	if (i > `0`)
1248	return;
1249	REF_ASSERT_ISNT(i < `0`);
1250
1251	CRYPTO_THREAD_lock_free(p->lock);
1252	OPENSSL_free(p);
1253	}
1254
1255	/****************************************************************************/
1256	/*
1257	* OPENSSL EC_METHOD FUNCTIONS
1258	*/
1259
1260	int ec_GFp_nistp224_group_init(EC_GROUP *group)
1261	{
1262	int ret;
1263	ret = ec_GFp_simple_group_init(group);
1264	group->a_is_minus3 = `1`;
1265	return ret;
1266	}
1267
1268	int ec_GFp_nistp224_group_set_curve(EC_GROUP group, const* BIGNUM *p,
1269	const BIGNUM a, const* BIGNUM *b,
1270	BN_CTX *ctx)
1271	{
1272	int ret = `0`;
1273	BIGNUM curve_p, curve_a, *curve_b;
1274	#ifndef FIPS_MODE
1275	BN_CTX *new_ctx = NULL;
1276
1277	if (ctx == NULL)
1278	ctx = new_ctx = BN_CTX_new();
1279	#endif
1280	if (ctx == NULL)
1281	return `0`;
1282
1283	BN_CTX_start(ctx);
1284	curve_p = BN_CTX_get(ctx);
1285	curve_a = BN_CTX_get(ctx);
1286	curve_b = BN_CTX_get(ctx);
1287	if (curve_b == NULL)
1288	goto err;
1289	BN_bin2bn(nistp224_curve_params[`0`], sizeof(felem_bytearray), curve_p);
1290	BN_bin2bn(nistp224_curve_params[`1`], sizeof(felem_bytearray), curve_a);
1291	BN_bin2bn(nistp224_curve_params[`2`], sizeof(felem_bytearray), curve_b);
1292	if ((BN_cmp(curve_p, p)) \|\| (BN_cmp(curve_a, a)) \|\| (BN_cmp(curve_b, b))) {
1293	ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1294	EC_R_WRONG_CURVE_PARAMETERS);
1295	goto err;
1296	}
1297	group->field_mod_func = BN_nist_mod_224;
1298	ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1299	err:
1300	BN_CTX_end(ctx);
1301	#ifndef FIPS_MODE
1302	BN_CTX_free(new_ctx);
1303	#endif
1304	return ret;
1305	}
1306
1307	/*
1308	* Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1309	* (X/Z^2, Y/Z^3)
1310	*/
1311	int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1312	const EC_POINT *point,
1313	BIGNUM x, BIGNUM y,
1314	BN_CTX *ctx)
1315	{
1316	felem z1, z2, x_in, y_in, x_out, y_out;
1317	widefelem tmp;
1318
1319	if (EC_POINT_is_at_infinity(group, point)) {
1320	ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1321	EC_R_POINT_AT_INFINITY);
1322	return `0`;
1323	}
1324	if ((!BN_to_felem(x_in, point->X)) \|\| (!BN_to_felem(y_in, point->Y)) \|\|
1325	(!BN_to_felem(z1, point->Z)))
1326	return `0`;
1327	felem_inv(z2, z1);
1328	felem_square(tmp, z2);
1329	felem_reduce(z1, tmp);
1330	felem_mul(tmp, x_in, z1);
1331	felem_reduce(x_in, tmp);
1332	felem_contract(x_out, x_in);
1333	if (x != NULL) {
1334	if (!felem_to_BN(x, x_out)) {
1335	ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1336	ERR_R_BN_LIB);
1337	return `0`;
1338	}
1339	}
1340	felem_mul(tmp, z1, z2);
1341	felem_reduce(z1, tmp);
1342	felem_mul(tmp, y_in, z1);
1343	felem_reduce(y_in, tmp);
1344	felem_contract(y_out, y_in);
1345	if (y != NULL) {
1346	if (!felem_to_BN(y, y_out)) {
1347	ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1348	ERR_R_BN_LIB);
1349	return `0`;
1350	}
1351	}
1352	return `1`;
1353	}
1354
1355	static void make_points_affine(size_t num, felem points[ / num / ][`3`],
1356	felem tmp_felems[ / num+1 / ])
1357	{
1358	/*
1359	* Runs in constant time, unless an input is the point at infinity (which
1360	* normally shouldn't happen).
1361	*/
1362	ec_GFp_nistp_points_make_affine_internal(num,
1363	points,
1364	sizeof(felem),
1365	tmp_felems,
1366	(void ()(void* *))felem_one,
1367	felem_is_zero_int,
1368	(void ()(void* , const* void *))
1369	felem_assign,
1370	(void ()(void* , const* void *))
1371	felem_square_reduce, (void (*)
1372	(void *,
1373	const void
1374	*,
1375	const void
1376	*))
1377	felem_mul_reduce,
1378	(void ()(void* , const* void *))
1379	felem_inv,
1380	(void ()(void* , const* void *))
1381	felem_contract);
1382	}
1383
1384	/*
1385	* Computes scalargenerator + \sum scalars[i]points[i], ignoring NULL
1386	* values Result is stored in r (r can equal one of the inputs).
1387	*/
1388	int ec_GFp_nistp224_points_mul(const EC_GROUP group, EC_POINT r,
1389	const BIGNUM *scalar, size_t num,
1390	const EC_POINT *points[],
1391	const BIGNUM scalars[], BN_CTX ctx)
1392	{
1393	int ret = `0`;
1394	int j;
1395	unsigned i;
1396	int mixed = `0`;
1397	BIGNUM x, y, z, tmp_scalar;
1398	felem_bytearray g_secret;
1399	felem_bytearray *secrets = NULL;
1400	felem (*pre_comp)[`17`][`3`] = NULL;
1401	felem *tmp_felems = NULL;
1402	int num_bytes;
1403	int have_pre_comp = `0`;
1404	size_t num_points = num;
1405	felem x_in, y_in, z_in, x_out, y_out, z_out;
1406	NISTP224_PRE_COMP *pre = NULL;
1407	const felem(*g_pre_comp)[`16`][`3`] = NULL;
1408	EC_POINT *generator = NULL;
1409	const EC_POINT *p = NULL;
1410	const BIGNUM *p_scalar = NULL;
1411
1412	BN_CTX_start(ctx);
1413	x = BN_CTX_get(ctx);
1414	y = BN_CTX_get(ctx);
1415	z = BN_CTX_get(ctx);
1416	tmp_scalar = BN_CTX_get(ctx);
1417	if (tmp_scalar == NULL)
1418	goto err;
1419
1420	if (scalar != NULL) {
1421	pre = group->pre_comp.nistp224;
1422	if (pre)
1423	/ we have precomputation, try to use it /
1424	g_pre_comp = (const felem(*)[`16`][`3`])pre->g_pre_comp;
1425	else
1426	/ try to use the standard precomputation /
1427	g_pre_comp = &gmul[`0`];
1428	generator = EC_POINT_new(group);
1429	if (generator == NULL)
1430	goto err;
1431	/ get the generator from precomputation /
1432	if (!felem_to_BN(x, g_pre_comp[`0`][`1`][`0`]) \|\|
1433	!felem_to_BN(y, g_pre_comp[`0`][`1`][`1`]) \|\|
1434	!felem_to_BN(z, g_pre_comp[`0`][`1`][`2`])) {
1435	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1436	goto err;
1437	}
1438	if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1439	generator, x, y, z,
1440	ctx))
1441	goto err;
1442	if (`0` == EC_POINT_cmp(group, generator, group->generator, ctx))
1443	/ precomputation matches generator /
1444	have_pre_comp = `1`;
1445	else
1446	/*
1447	* we don't have valid precomputation: treat the generator as a
1448	* random point
1449	*/
1450	num_points = num_points + `1`;
1451	}
1452
1453	if (num_points > `0`) {
1454	if (num_points >= `3`) {
1455	/*
1456	* unless we precompute multiples for just one or two points,
1457	* converting those into affine form is time well spent
1458	*/
1459	mixed = `1`;
1460	}
1461	secrets = OPENSSL_zalloc(sizeof(secrets) num_points);
1462	pre_comp = OPENSSL_zalloc(sizeof(pre_comp) num_points);
1463	if (mixed)
1464	tmp_felems =
1465	OPENSSL_malloc(sizeof(felem) * (num_points * `17` + `1`));
1466	if ((secrets == NULL) \|\| (pre_comp == NULL)
1467	\|\| (mixed && (tmp_felems == NULL))) {
1468	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1469	goto err;
1470	}
1471
1472	/*
1473	* we treat NULL scalars as 0, and NULL points as points at infinity,
1474	* i.e., they contribute nothing to the linear combination
1475	*/
1476	for (i = `0`; i < num_points; ++i) {
1477	if (i == num) {
1478	/ the generator /
1479	p = EC_GROUP_get0_generator(group);
1480	p_scalar = scalar;
1481	} else {
1482	/ the i^th point /
1483	p = points[i];
1484	p_scalar = scalars[i];
1485	}
1486	if ((p_scalar != NULL) && (p != NULL)) {
1487	/ reduce scalar to 0 <= scalar < 2^224 /
1488	if ((BN_num_bits(p_scalar) > `224`)
1489	\|\| (BN_is_negative(p_scalar))) {
1490	/*
1491	* this is an unusual input, and we don't guarantee
1492	* constant-timeness
1493	*/
1494	if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1495	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1496	goto err;
1497	}
1498	num_bytes = BN_bn2lebinpad(tmp_scalar,
1499	secrets[i], sizeof(secrets[i]));
1500	} else {
1501	num_bytes = BN_bn2lebinpad(p_scalar,
1502	secrets[i], sizeof(secrets[i]));
1503	}
1504	if (num_bytes < `0`) {
1505	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1506	goto err;
1507	}
1508	/ precompute multiples /
1509	if ((!BN_to_felem(x_out, p->X)) \|\|
1510	(!BN_to_felem(y_out, p->Y)) \|\|
1511	(!BN_to_felem(z_out, p->Z)))
1512	goto err;
1513	felem_assign(pre_comp[i][`1`][`0`], x_out);
1514	felem_assign(pre_comp[i][`1`][`1`], y_out);
1515	felem_assign(pre_comp[i][`1`][`2`], z_out);
1516	for (j = `2`; j <= `16`; ++j) {
1517	if (j & `1`) {
1518	point_add(pre_comp[i][j][`0`], pre_comp[i][j][`1`],
1519	pre_comp[i][j][`2`], pre_comp[i][`1`][`0`],
1520	pre_comp[i][`1`][`1`], pre_comp[i][`1`][`2`], `0`,
1521	pre_comp[i][j - `1`][`0`],
1522	pre_comp[i][j - `1`][`1`],
1523	pre_comp[i][j - `1`][`2`]);
1524	} else {
1525	point_double(pre_comp[i][j][`0`], pre_comp[i][j][`1`],
1526	pre_comp[i][j][`2`], pre_comp[i][j / `2`][`0`],
1527	pre_comp[i][j / `2`][`1`],
1528	pre_comp[i][j / `2`][`2`]);
1529	}
1530	}
1531	}
1532	}
1533	if (mixed)
1534	make_points_affine(num_points * `17`, pre_comp[`0`], tmp_felems);
1535	}
1536
1537	/ the scalar for the generator /
1538	if ((scalar != NULL) && (have_pre_comp)) {
1539	memset(g_secret, `0`, sizeof(g_secret));
1540	/ reduce scalar to 0 <= scalar < 2^224 /
1541	if ((BN_num_bits(scalar) > `224`) \|\| (BN_is_negative(scalar))) {
1542	/*
1543	* this is an unusual input, and we don't guarantee
1544	* constant-timeness
1545	*/
1546	if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1547	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1548	goto err;
1549	}
1550	num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1551	} else {
1552	num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1553	}
1554	/ do the multiplication with generator precomputation /
1555	batch_mul(x_out, y_out, z_out,
1556	(const felem_bytearray(*))secrets, num_points,
1557	g_secret,
1558	mixed, (const felem(*)[`17`][`3`])pre_comp, g_pre_comp);
1559	} else {
1560	/ do the multiplication without generator precomputation /
1561	batch_mul(x_out, y_out, z_out,
1562	(const felem_bytearray(*))secrets, num_points,
1563	NULL, mixed, (const felem(*)[`17`][`3`])pre_comp, NULL);
1564	}
1565	/ reduce the output to its unique minimal representation /
1566	felem_contract(x_in, x_out);
1567	felem_contract(y_in, y_out);
1568	felem_contract(z_in, z_out);
1569	if ((!felem_to_BN(x, x_in)) \|\| (!felem_to_BN(y, y_in)) \|\|
1570	(!felem_to_BN(z, z_in))) {
1571	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1572	goto err;
1573	}
1574	ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1575
1576	err:
1577	BN_CTX_end(ctx);
1578	EC_POINT_free(generator);
1579	OPENSSL_free(secrets);
1580	OPENSSL_free(pre_comp);
1581	OPENSSL_free(tmp_felems);
1582	return ret;
1583	}
1584
1585	int ec_GFp_nistp224_precompute_mult(EC_GROUP group, BN_CTX ctx)
1586	{
1587	int ret = `0`;
1588	NISTP224_PRE_COMP *pre = NULL;
1589	int i, j;
1590	BIGNUM x, y;
1591	EC_POINT *generator = NULL;
1592	felem tmp_felems[`32`];
1593	#ifndef FIPS_MODE
1594	BN_CTX *new_ctx = NULL;
1595	#endif
1596
1597	/ throw away old precomputation /
1598	EC_pre_comp_free(group);
1599
1600	#ifndef FIPS_MODE
1601	if (ctx == NULL)
1602	ctx = new_ctx = BN_CTX_new();
1603	#endif
1604	if (ctx == NULL)
1605	return `0`;
1606
1607	BN_CTX_start(ctx);
1608	x = BN_CTX_get(ctx);
1609	y = BN_CTX_get(ctx);
1610	if (y == NULL)
1611	goto err;
1612	/ get the generator /
1613	if (group->generator == NULL)
1614	goto err;
1615	generator = EC_POINT_new(group);
1616	if (generator == NULL)
1617	goto err;
1618	BN_bin2bn(nistp224_curve_params[`3`], sizeof(felem_bytearray), x);
1619	BN_bin2bn(nistp224_curve_params[`4`], sizeof(felem_bytearray), y);
1620	if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1621	goto err;
1622	if ((pre = nistp224_pre_comp_new()) == NULL)
1623	goto err;
1624	/*
1625	* if the generator is the standard one, use built-in precomputation
1626	*/
1627	if (`0` == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1628	memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1629	goto done;
1630	}
1631	if ((!BN_to_felem(pre->g_pre_comp[`0`][`1`][`0`], group->generator->X)) \|\|
1632	(!BN_to_felem(pre->g_pre_comp[`0`][`1`][`1`], group->generator->Y)) \|\|
1633	(!BN_to_felem(pre->g_pre_comp[`0`][`1`][`2`], group->generator->Z)))
1634	goto err;
1635	/*
1636	* compute 2^56G, 2^112G, 2^168G for the first table, 2^28G, 2^84*G,
1637	* 2^140G, 2^196G for the second one
1638	*/
1639	for (i = `1`; i <= `8`; i <<= `1`) {
1640	point_double(pre->g_pre_comp[`1`][i][`0`], pre->g_pre_comp[`1`][i][`1`],
1641	pre->g_pre_comp[`1`][i][`2`], pre->g_pre_comp[`0`][i][`0`],
1642	pre->g_pre_comp[`0`][i][`1`], pre->g_pre_comp[`0`][i][`2`]);
1643	for (j = `0`; j < `27`; ++j) {
1644	point_double(pre->g_pre_comp[`1`][i][`0`], pre->g_pre_comp[`1`][i][`1`],
1645	pre->g_pre_comp[`1`][i][`2`], pre->g_pre_comp[`1`][i][`0`],
1646	pre->g_pre_comp[`1`][i][`1`], pre->g_pre_comp[`1`][i][`2`]);
1647	}
1648	if (i == `8`)
1649	break;
1650	point_double(pre->g_pre_comp[`0`][`2` * i][`0`],
1651	pre->g_pre_comp[`0`][`2` * i][`1`],
1652	pre->g_pre_comp[`0`][`2` * i][`2`], pre->g_pre_comp[`1`][i][`0`],
1653	pre->g_pre_comp[`1`][i][`1`], pre->g_pre_comp[`1`][i][`2`]);
1654	for (j = `0`; j < `27`; ++j) {
1655	point_double(pre->g_pre_comp[`0`][`2` * i][`0`],
1656	pre->g_pre_comp[`0`][`2` * i][`1`],
1657	pre->g_pre_comp[`0`][`2` * i][`2`],
1658	pre->g_pre_comp[`0`][`2` * i][`0`],
1659	pre->g_pre_comp[`0`][`2` * i][`1`],
1660	pre->g_pre_comp[`0`][`2` * i][`2`]);
1661	}
1662	}
1663	for (i = `0`; i < `2`; i++) {
1664	/ g_pre_comp[i][0] is the point at infinity /
1665	memset(pre->g_pre_comp[i][`0`], `0`, sizeof(pre->g_pre_comp[i][`0`]));
1666	/ the remaining multiples /
1667	/ 2^56G + 2^112G resp. 2^84G + 2^140G /
1668	point_add(pre->g_pre_comp[i][`6`][`0`], pre->g_pre_comp[i][`6`][`1`],
1669	pre->g_pre_comp[i][`6`][`2`], pre->g_pre_comp[i][`4`][`0`],
1670	pre->g_pre_comp[i][`4`][`1`], pre->g_pre_comp[i][`4`][`2`],
1671	`0`, pre->g_pre_comp[i][`2`][`0`], pre->g_pre_comp[i][`2`][`1`],
1672	pre->g_pre_comp[i][`2`][`2`]);
1673	/ 2^56G + 2^168G resp. 2^84G + 2^196G /
1674	point_add(pre->g_pre_comp[i][`10`][`0`], pre->g_pre_comp[i][`10`][`1`],
1675	pre->g_pre_comp[i][`10`][`2`], pre->g_pre_comp[i][`8`][`0`],
1676	pre->g_pre_comp[i][`8`][`1`], pre->g_pre_comp[i][`8`][`2`],
1677	`0`, pre->g_pre_comp[i][`2`][`0`], pre->g_pre_comp[i][`2`][`1`],
1678	pre->g_pre_comp[i][`2`][`2`]);
1679	/ 2^112G + 2^168G resp. 2^140G + 2^196G /
1680	point_add(pre->g_pre_comp[i][`12`][`0`], pre->g_pre_comp[i][`12`][`1`],
1681	pre->g_pre_comp[i][`12`][`2`], pre->g_pre_comp[i][`8`][`0`],
1682	pre->g_pre_comp[i][`8`][`1`], pre->g_pre_comp[i][`8`][`2`],
1683	`0`, pre->g_pre_comp[i][`4`][`0`], pre->g_pre_comp[i][`4`][`1`],
1684	pre->g_pre_comp[i][`4`][`2`]);
1685	/*
1686	* 2^56G + 2^112G + 2^168G resp. 2^84G + 2^140G + 2^196G
1687	*/
1688	point_add(pre->g_pre_comp[i][`14`][`0`], pre->g_pre_comp[i][`14`][`1`],
1689	pre->g_pre_comp[i][`14`][`2`], pre->g_pre_comp[i][`12`][`0`],
1690	pre->g_pre_comp[i][`12`][`1`], pre->g_pre_comp[i][`12`][`2`],
1691	`0`, pre->g_pre_comp[i][`2`][`0`], pre->g_pre_comp[i][`2`][`1`],
1692	pre->g_pre_comp[i][`2`][`2`]);
1693	for (j = `1`; j < `8`; ++j) {
1694	/ odd multiples: add G resp. 2^28G /*
1695	point_add(pre->g_pre_comp[i][`2` * j + `1`][`0`],
1696	pre->g_pre_comp[i][`2` * j + `1`][`1`],
1697	pre->g_pre_comp[i][`2` * j + `1`][`2`],
1698	pre->g_pre_comp[i][`2` * j][`0`],
1699	pre->g_pre_comp[i][`2` * j][`1`],
1700	pre->g_pre_comp[i][`2` * j][`2`], `0`,
1701	pre->g_pre_comp[i][`1`][`0`], pre->g_pre_comp[i][`1`][`1`],
1702	pre->g_pre_comp[i][`1`][`2`]);
1703	}
1704	}
1705	make_points_affine(`31`, &(pre->g_pre_comp[`0`][`1`]), tmp_felems);
1706
1707	done:
1708	SETPRECOMP(group, nistp224, pre);
1709	pre = NULL;
1710	ret = `1`;
1711	err:
1712	BN_CTX_end(ctx);
1713	EC_POINT_free(generator);
1714	#ifndef FIPS_MODE
1715	BN_CTX_free(new_ctx);
1716	#endif
1717	EC_nistp224_pre_comp_free(pre);
1718	return ret;
1719	}
1720
1721	int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1722	{
1723	return HAVEPRECOMP(group, nistp224);
1724	}
1725
1726	#endif
1727

Browse the source code of ClickHouse/contrib/openssl/crypto/ec/ecp_nistp224.c