| 1 | /* Copyright (c) 2015, Google Inc. |
| 2 | * |
| 3 | * Permission to use, copy, modify, and/or distribute this software for any |
| 4 | * purpose with or without fee is hereby granted, provided that the above |
| 5 | * copyright notice and this permission notice appear in all copies. |
| 6 | * |
| 7 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| 8 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 9 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
| 10 | * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 11 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
| 12 | * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
| 13 | * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ |
| 14 | |
| 15 | #include <openssl/base.h> |
| 16 | |
| 17 | #include <openssl/ec.h> |
| 18 | |
| 19 | #include "internal.h" |
| 20 | |
| 21 | |
| 22 | // This function looks at 5+1 scalar bits (5 current, 1 adjacent less |
| 23 | // significant bit), and recodes them into a signed digit for use in fast point |
| 24 | // multiplication: the use of signed rather than unsigned digits means that |
| 25 | // fewer points need to be precomputed, given that point inversion is easy (a |
| 26 | // precomputed point dP makes -dP available as well). |
| 27 | // |
| 28 | // BACKGROUND: |
| 29 | // |
| 30 | // Signed digits for multiplication were introduced by Booth ("A signed binary |
| 31 | // multiplication technique", Quart. Journ. Mech. and Applied Math., vol. IV, |
| 32 | // pt. 2 (1951), pp. 236-240), in that case for multiplication of integers. |
| 33 | // Booth's original encoding did not generally improve the density of nonzero |
| 34 | // digits over the binary representation, and was merely meant to simplify the |
| 35 | // handling of signed factors given in two's complement; but it has since been |
| 36 | // shown to be the basis of various signed-digit representations that do have |
| 37 | // further advantages, including the wNAF, using the following general |
| 38 | // approach: |
| 39 | // |
| 40 | // (1) Given a binary representation |
| 41 | // |
| 42 | // b_k ... b_2 b_1 b_0, |
| 43 | // |
| 44 | // of a nonnegative integer (b_k in {0, 1}), rewrite it in digits 0, 1, -1 |
| 45 | // by using bit-wise subtraction as follows: |
| 46 | // |
| 47 | // b_k b_(k-1) ... b_2 b_1 b_0 |
| 48 | // - b_k ... b_3 b_2 b_1 b_0 |
| 49 | // ----------------------------------------- |
| 50 | // s_(k+1) s_k ... s_3 s_2 s_1 s_0 |
| 51 | // |
| 52 | // A left-shift followed by subtraction of the original value yields a new |
| 53 | // representation of the same value, using signed bits s_i = b_(i-1) - b_i. |
| 54 | // This representation from Booth's paper has since appeared in the |
| 55 | // literature under a variety of different names including "reversed binary |
| 56 | // form", "alternating greedy expansion", "mutual opposite form", and |
| 57 | // "sign-alternating {+-1}-representation". |
| 58 | // |
| 59 | // An interesting property is that among the nonzero bits, values 1 and -1 |
| 60 | // strictly alternate. |
| 61 | // |
| 62 | // (2) Various window schemes can be applied to the Booth representation of |
| 63 | // integers: for example, right-to-left sliding windows yield the wNAF |
| 64 | // (a signed-digit encoding independently discovered by various researchers |
| 65 | // in the 1990s), and left-to-right sliding windows yield a left-to-right |
| 66 | // equivalent of the wNAF (independently discovered by various researchers |
| 67 | // around 2004). |
| 68 | // |
| 69 | // To prevent leaking information through side channels in point multiplication, |
| 70 | // we need to recode the given integer into a regular pattern: sliding windows |
| 71 | // as in wNAFs won't do, we need their fixed-window equivalent -- which is a few |
| 72 | // decades older: we'll be using the so-called "modified Booth encoding" due to |
| 73 | // MacSorley ("High-speed arithmetic in binary computers", Proc. IRE, vol. 49 |
| 74 | // (1961), pp. 67-91), in a radix-2^5 setting. That is, we always combine five |
| 75 | // signed bits into a signed digit: |
| 76 | // |
| 77 | // s_(5j + 4) s_(5j + 3) s_(5j + 2) s_(5j + 1) s_(5j) |
| 78 | // |
| 79 | // The sign-alternating property implies that the resulting digit values are |
| 80 | // integers from -16 to 16. |
| 81 | // |
| 82 | // Of course, we don't actually need to compute the signed digits s_i as an |
| 83 | // intermediate step (that's just a nice way to see how this scheme relates |
| 84 | // to the wNAF): a direct computation obtains the recoded digit from the |
| 85 | // six bits b_(5j + 4) ... b_(5j - 1). |
| 86 | // |
| 87 | // This function takes those six bits as an integer (0 .. 63), writing the |
| 88 | // recoded digit to *sign (0 for positive, 1 for negative) and *digit (absolute |
| 89 | // value, in the range 0 .. 16). Note that this integer essentially provides |
| 90 | // the input bits "shifted to the left" by one position: for example, the input |
| 91 | // to compute the least significant recoded digit, given that there's no bit |
| 92 | // b_-1, has to be b_4 b_3 b_2 b_1 b_0 0. |
| 93 | // |
| 94 | // DOUBLING CASE: |
| 95 | // |
| 96 | // Point addition formulas for short Weierstrass curves are often incomplete. |
| 97 | // Edge cases such as P + P or P + ∞ must be handled separately. This |
| 98 | // complicates constant-time requirements. P + ∞ cannot be avoided (any window |
| 99 | // may be zero) and is handled with constant-time selects. P + P (where P is not |
| 100 | // ∞) usually is not. Instead, windowing strategies are chosen to avoid this |
| 101 | // case. Whether this happens depends on the group order. |
| 102 | // |
| 103 | // Let w be the window width (in this function, w = 5). The non-trivial doubling |
| 104 | // case in single-point scalar multiplication may occur if and only if the |
| 105 | // 2^(w-1) bit of the group order is zero. |
| 106 | // |
| 107 | // Note the above only holds if the scalar is fully reduced and the group order |
| 108 | // is a prime that is much larger than 2^w. It also only holds when windows |
| 109 | // are applied from most significant to least significant, doubling between each |
| 110 | // window. It does not apply to more complex table strategies such as |
| 111 | // |EC_GFp_nistz256_method|. |
| 112 | // |
| 113 | // PROOF: |
| 114 | // |
| 115 | // Let n be the group order. Let l be the number of bits needed to represent n. |
| 116 | // Assume there exists some 0 <= k < n such that signed w-bit windowed |
| 117 | // multiplication hits the doubling case. |
| 118 | // |
| 119 | // Windowed multiplication consists of iterating over groups of s_i (defined |
| 120 | // above based on k's binary representation) from most to least significant. At |
| 121 | // iteration i (for i = ..., 3w, 2w, w, 0, starting from the most significant |
| 122 | // window), we: |
| 123 | // |
| 124 | // 1. Double the accumulator A, w times. Let A_i be the value of A at this |
| 125 | // point. |
| 126 | // |
| 127 | // 2. Set A to T_i + A_i, where T_i is a precomputed multiple of P |
| 128 | // corresponding to the window s_(i+w-1) ... s_i. |
| 129 | // |
| 130 | // Let j be the index such that A_j = T_j ≠ ∞. Looking at A_i and T_i as |
| 131 | // multiples of P, define a_i and t_i to be scalar coefficients of A_i and T_i. |
| 132 | // Thus a_j = t_j ≠ 0 (mod n). Note a_i and t_i may not be reduced mod n. t_i is |
| 133 | // the value of the w signed bits s_(i+w-1) ... s_i. a_i is computed as a_i = |
| 134 | // 2^w * (a_(i+w) + t_(i+w)). |
| 135 | // |
| 136 | // t_i is bounded by -2^(w-1) <= t_i <= 2^(w-1). Additionally, we may write it |
| 137 | // in terms of unsigned bits b_i. t_i consists of signed bits s_(i+w-1) ... s_i. |
| 138 | // This is computed as: |
| 139 | // |
| 140 | // b_(i+w-2) b_(i+w-3) ... b_i b_(i-1) |
| 141 | // - b_(i+w-1) b_(i+w-2) ... b_(i+1) b_i |
| 142 | // -------------------------------------------- |
| 143 | // t_i = s_(i+w-1) s_(i+w-2) ... s_(i+1) s_i |
| 144 | // |
| 145 | // Observe that b_(i+w-2) through b_i occur in both terms. Let x be the integer |
| 146 | // represented by that bit string, i.e. 2^(w-2)*b_(i+w-2) + ... + b_i. |
| 147 | // |
| 148 | // t_i = (2*x + b_(i-1)) - (2^(w-1)*b_(i+w-1) + x) |
| 149 | // = x - 2^(w-1)*b_(i+w-1) + b_(i-1) |
| 150 | // |
| 151 | // Or, using C notation for bit operations: |
| 152 | // |
| 153 | // t_i = (k>>i) & ((1<<(w-1)) - 1) - (k>>i) & (1<<(w-1)) + (k>>(i-1)) & 1 |
| 154 | // |
| 155 | // Note b_(i-1) is added in left-shifted by one (or doubled) from its place. |
| 156 | // This is compensated by t_(i-w)'s subtraction term. Thus, a_i may be computed |
| 157 | // by adding b_l b_(l-1) ... b_(i+1) b_i and an extra copy of b_(i-1). In C |
| 158 | // notation, this is: |
| 159 | // |
| 160 | // a_i = (k>>(i+w)) << w + ((k>>(i+w-1)) & 1) << w |
| 161 | // |
| 162 | // Observe that, while t_i may be positive or negative, a_i is bounded by |
| 163 | // 0 <= a_i < n + 2^w. Additionally, a_i can only be zero if b_(i+w-1) and up |
| 164 | // are all zero. (Note this implies a non-trivial P + (-P) is unreachable for |
| 165 | // all groups. That would imply the subsequent a_i is zero, which means all |
| 166 | // terms thus far were zero.) |
| 167 | // |
| 168 | // Returning to our doubling position, we have a_j = t_j (mod n). We now |
| 169 | // determine the value of a_j - t_j, which must be divisible by n. Our bounds on |
| 170 | // a_j and t_j imply a_j - t_j is 0 or n. If it is 0, a_j = t_j. However, 2^w |
| 171 | // divides a_j and -2^(w-1) <= t_j <= 2^(w-1), so this can only happen if |
| 172 | // a_j = t_j = 0, which is a trivial doubling. Therefore, a_j - t_j = n. |
| 173 | // |
| 174 | // Now we determine j. Suppose j > 0. w divides j, so j >= w. Then, |
| 175 | // |
| 176 | // n = a_j - t_j = (k>>(j+w)) << w + ((k>>(j+w-1)) & 1) << w - t_j |
| 177 | // <= k/2^j + 2^w - t_j |
| 178 | // < n/2^w + 2^w + 2^(w-1) |
| 179 | // |
| 180 | // n is much larger than 2^w, so this is impossible. Thus, j = 0: only the final |
| 181 | // addition may hit the doubling case. |
| 182 | // |
| 183 | // Finally, we consider bit patterns for n and k. Divide k into k_H + k_M + k_L |
| 184 | // such that k_H is the contribution from b_(l-1) .. b_w, k_M is the |
| 185 | // contribution from b_(w-1), and k_L is the contribution from b_(w-2) ... b_0. |
| 186 | // That is: |
| 187 | // |
| 188 | // - 2^w divides k_H |
| 189 | // - k_M is 0 or 2^(w-1) |
| 190 | // - 0 <= k_L < 2^(w-1) |
| 191 | // |
| 192 | // Divide n into n_H + n_M + n_L similarly. We thus have: |
| 193 | // |
| 194 | // t_0 = (k>>0) & ((1<<(w-1)) - 1) - (k>>0) & (1<<(w-1)) + (k>>(0-1)) & 1 |
| 195 | // = k & ((1<<(w-1)) - 1) - k & (1<<(w-1)) |
| 196 | // = k_L - k_M |
| 197 | // |
| 198 | // a_0 = (k>>(0+w)) << w + ((k>>(0+w-1)) & 1) << w |
| 199 | // = (k>>w) << w + ((k>>(w-1)) & 1) << w |
| 200 | // = k_H + 2*k_M |
| 201 | // |
| 202 | // n = a_0 - t_0 |
| 203 | // n_H + n_M + n_L = (k_H + 2*k_M) - (k_L - k_M) |
| 204 | // = k_H + 3*k_M - k_L |
| 205 | // |
| 206 | // k_H - k_L < k and k < n, so k_H - k_L ≠ n. Therefore k_M is not 0 and must be |
| 207 | // 2^(w-1). Now we consider k_H and n_H. We know k_H <= n_H. Suppose k_H = n_H. |
| 208 | // Then, |
| 209 | // |
| 210 | // n_M + n_L = 3*(2^(w-1)) - k_L |
| 211 | // > 3*(2^(w-1)) - 2^(w-1) |
| 212 | // = 2^w |
| 213 | // |
| 214 | // Contradiction (n_M + n_L is the bottom w bits of n). Thus k_H < n_H. Suppose |
| 215 | // k_H < n_H - 2*2^w. Then, |
| 216 | // |
| 217 | // n_H + n_M + n_L = k_H + 3*(2^(w-1)) - k_L |
| 218 | // < n_H - 2*2^w + 3*(2^(w-1)) - k_L |
| 219 | // n_M + n_L < -2^(w-1) - k_L |
| 220 | // |
| 221 | // Contradiction. Thus, k_H = n_H - 2^w. (Note 2^w divides n_H and k_H.) Thus, |
| 222 | // |
| 223 | // n_H + n_M + n_L = k_H + 3*(2^(w-1)) - k_L |
| 224 | // = n_H - 2^w + 3*(2^(w-1)) - k_L |
| 225 | // n_M + n_L = 2^(w-1) - k_L |
| 226 | // <= 2^(w-1) |
| 227 | // |
| 228 | // Equality would mean 2^(w-1) divides n, which is impossible if n is prime. |
| 229 | // Thus n_M + n_L < 2^(w-1), so n_M is zero, proving our condition. |
| 230 | // |
| 231 | // This proof constructs k, so, to show the converse, let k_H = n_H - 2^w, |
| 232 | // k_M = 2^(w-1), k_L = 2^(w-1) - n_L. This will result in a non-trivial point |
| 233 | // doubling in the final addition and is the only such scalar. |
| 234 | // |
| 235 | // COMMON CURVES: |
| 236 | // |
| 237 | // The group orders for common curves end in the following bit patterns: |
| 238 | // |
| 239 | // P-521: ...00001001; w = 4 is okay |
| 240 | // P-384: ...01110011; w = 2, 5, 6, 7 are okay |
| 241 | // P-256: ...01010001; w = 5, 7 are okay |
| 242 | // P-224: ...00111101; w = 3, 4, 5, 6 are okay |
| 243 | void ec_GFp_nistp_recode_scalar_bits(uint8_t *sign, uint8_t *digit, |
| 244 | uint8_t in) { |
| 245 | uint8_t s, d; |
| 246 | |
| 247 | s = ~((in >> 5) - 1); /* sets all bits to MSB(in), 'in' seen as |
| 248 | * 6-bit value */ |
| 249 | d = (1 << 6) - in - 1; |
| 250 | d = (d & s) | (in & ~s); |
| 251 | d = (d >> 1) + (d & 1); |
| 252 | |
| 253 | *sign = s & 1; |
| 254 | *digit = d; |
| 255 | } |
| 256 | |