github.com/maenmax/kairep@v0.0.0-20210218001208-55bf3df36788/src/golang.org/x/crypto/ed25519/internal/edwards25519/edwards25519.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package edwards25519 6 7 // This code is a port of the public domain, “ref10” implementation of ed25519 8 // from SUPERCOP. 9 10 // FieldElement represents an element of the field GF(2^255 - 19). An element 11 // t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 12 // t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on 13 // context. 14 type FieldElement [10]int32 15 16 var zero FieldElement 17 18 func FeZero(fe *FieldElement) { 19 copy(fe[:], zero[:]) 20 } 21 22 func FeOne(fe *FieldElement) { 23 FeZero(fe) 24 fe[0] = 1 25 } 26 27 func FeAdd(dst, a, b *FieldElement) { 28 dst[0] = a[0] + b[0] 29 dst[1] = a[1] + b[1] 30 dst[2] = a[2] + b[2] 31 dst[3] = a[3] + b[3] 32 dst[4] = a[4] + b[4] 33 dst[5] = a[5] + b[5] 34 dst[6] = a[6] + b[6] 35 dst[7] = a[7] + b[7] 36 dst[8] = a[8] + b[8] 37 dst[9] = a[9] + b[9] 38 } 39 40 func FeSub(dst, a, b *FieldElement) { 41 dst[0] = a[0] - b[0] 42 dst[1] = a[1] - b[1] 43 dst[2] = a[2] - b[2] 44 dst[3] = a[3] - b[3] 45 dst[4] = a[4] - b[4] 46 dst[5] = a[5] - b[5] 47 dst[6] = a[6] - b[6] 48 dst[7] = a[7] - b[7] 49 dst[8] = a[8] - b[8] 50 dst[9] = a[9] - b[9] 51 } 52 53 func FeCopy(dst, src *FieldElement) { 54 copy(dst[:], src[:]) 55 } 56 57 // Replace (f,g) with (g,g) if b == 1; 58 // replace (f,g) with (f,g) if b == 0. 59 // 60 // Preconditions: b in {0,1}. 61 func FeCMove(f, g *FieldElement, b int32) { 62 b = -b 63 f[0] ^= b & (f[0] ^ g[0]) 64 f[1] ^= b & (f[1] ^ g[1]) 65 f[2] ^= b & (f[2] ^ g[2]) 66 f[3] ^= b & (f[3] ^ g[3]) 67 f[4] ^= b & (f[4] ^ g[4]) 68 f[5] ^= b & (f[5] ^ g[5]) 69 f[6] ^= b & (f[6] ^ g[6]) 70 f[7] ^= b & (f[7] ^ g[7]) 71 f[8] ^= b & (f[8] ^ g[8]) 72 f[9] ^= b & (f[9] ^ g[9]) 73 } 74 75 func load3(in []byte) int64 { 76 var r int64 77 r = int64(in[0]) 78 r |= int64(in[1]) << 8 79 r |= int64(in[2]) << 16 80 return r 81 } 82 83 func load4(in []byte) int64 { 84 var r int64 85 r = int64(in[0]) 86 r |= int64(in[1]) << 8 87 r |= int64(in[2]) << 16 88 r |= int64(in[3]) << 24 89 return r 90 } 91 92 func FeFromBytes(dst *FieldElement, src *[32]byte) { 93 h0 := load4(src[:]) 94 h1 := load3(src[4:]) << 6 95 h2 := load3(src[7:]) << 5 96 h3 := load3(src[10:]) << 3 97 h4 := load3(src[13:]) << 2 98 h5 := load4(src[16:]) 99 h6 := load3(src[20:]) << 7 100 h7 := load3(src[23:]) << 5 101 h8 := load3(src[26:]) << 4 102 h9 := (load3(src[29:]) & 8388607) << 2 103 104 FeCombine(dst, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9) 105 } 106 107 // FeToBytes marshals h to s. 108 // Preconditions: 109 // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 110 // 111 // Write p=2^255-19; q=floor(h/p). 112 // Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). 113 // 114 // Proof: 115 // Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. 116 // Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4. 117 // 118 // Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). 119 // Then 0<y<1. 120 // 121 // Write r=h-pq. 122 // Have 0<=r<=p-1=2^255-20. 123 // Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. 124 // 125 // Write x=r+19(2^-255)r+y. 126 // Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. 127 // 128 // Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) 129 // so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. 130 func FeToBytes(s *[32]byte, h *FieldElement) { 131 var carry [10]int32 132 133 q := (19*h[9] + (1 << 24)) >> 25 134 q = (h[0] + q) >> 26 135 q = (h[1] + q) >> 25 136 q = (h[2] + q) >> 26 137 q = (h[3] + q) >> 25 138 q = (h[4] + q) >> 26 139 q = (h[5] + q) >> 25 140 q = (h[6] + q) >> 26 141 q = (h[7] + q) >> 25 142 q = (h[8] + q) >> 26 143 q = (h[9] + q) >> 25 144 145 // Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. 146 h[0] += 19 * q 147 // Goal: Output h-2^255 q, which is between 0 and 2^255-20. 148 149 carry[0] = h[0] >> 26 150 h[1] += carry[0] 151 h[0] -= carry[0] << 26 152 carry[1] = h[1] >> 25 153 h[2] += carry[1] 154 h[1] -= carry[1] << 25 155 carry[2] = h[2] >> 26 156 h[3] += carry[2] 157 h[2] -= carry[2] << 26 158 carry[3] = h[3] >> 25 159 h[4] += carry[3] 160 h[3] -= carry[3] << 25 161 carry[4] = h[4] >> 26 162 h[5] += carry[4] 163 h[4] -= carry[4] << 26 164 carry[5] = h[5] >> 25 165 h[6] += carry[5] 166 h[5] -= carry[5] << 25 167 carry[6] = h[6] >> 26 168 h[7] += carry[6] 169 h[6] -= carry[6] << 26 170 carry[7] = h[7] >> 25 171 h[8] += carry[7] 172 h[7] -= carry[7] << 25 173 carry[8] = h[8] >> 26 174 h[9] += carry[8] 175 h[8] -= carry[8] << 26 176 carry[9] = h[9] >> 25 177 h[9] -= carry[9] << 25 178 // h10 = carry9 179 180 // Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. 181 // Have h[0]+...+2^230 h[9] between 0 and 2^255-1; 182 // evidently 2^255 h10-2^255 q = 0. 183 // Goal: Output h[0]+...+2^230 h[9]. 184 185 s[0] = byte(h[0] >> 0) 186 s[1] = byte(h[0] >> 8) 187 s[2] = byte(h[0] >> 16) 188 s[3] = byte((h[0] >> 24) | (h[1] << 2)) 189 s[4] = byte(h[1] >> 6) 190 s[5] = byte(h[1] >> 14) 191 s[6] = byte((h[1] >> 22) | (h[2] << 3)) 192 s[7] = byte(h[2] >> 5) 193 s[8] = byte(h[2] >> 13) 194 s[9] = byte((h[2] >> 21) | (h[3] << 5)) 195 s[10] = byte(h[3] >> 3) 196 s[11] = byte(h[3] >> 11) 197 s[12] = byte((h[3] >> 19) | (h[4] << 6)) 198 s[13] = byte(h[4] >> 2) 199 s[14] = byte(h[4] >> 10) 200 s[15] = byte(h[4] >> 18) 201 s[16] = byte(h[5] >> 0) 202 s[17] = byte(h[5] >> 8) 203 s[18] = byte(h[5] >> 16) 204 s[19] = byte((h[5] >> 24) | (h[6] << 1)) 205 s[20] = byte(h[6] >> 7) 206 s[21] = byte(h[6] >> 15) 207 s[22] = byte((h[6] >> 23) | (h[7] << 3)) 208 s[23] = byte(h[7] >> 5) 209 s[24] = byte(h[7] >> 13) 210 s[25] = byte((h[7] >> 21) | (h[8] << 4)) 211 s[26] = byte(h[8] >> 4) 212 s[27] = byte(h[8] >> 12) 213 s[28] = byte((h[8] >> 20) | (h[9] << 6)) 214 s[29] = byte(h[9] >> 2) 215 s[30] = byte(h[9] >> 10) 216 s[31] = byte(h[9] >> 18) 217 } 218 219 func FeIsNegative(f *FieldElement) byte { 220 var s [32]byte 221 FeToBytes(&s, f) 222 return s[0] & 1 223 } 224 225 func FeIsNonZero(f *FieldElement) int32 { 226 var s [32]byte 227 FeToBytes(&s, f) 228 var x uint8 229 for _, b := range s { 230 x |= b 231 } 232 x |= x >> 4 233 x |= x >> 2 234 x |= x >> 1 235 return int32(x & 1) 236 } 237 238 // FeNeg sets h = -f 239 // 240 // Preconditions: 241 // |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 242 // 243 // Postconditions: 244 // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 245 func FeNeg(h, f *FieldElement) { 246 h[0] = -f[0] 247 h[1] = -f[1] 248 h[2] = -f[2] 249 h[3] = -f[3] 250 h[4] = -f[4] 251 h[5] = -f[5] 252 h[6] = -f[6] 253 h[7] = -f[7] 254 h[8] = -f[8] 255 h[9] = -f[9] 256 } 257 258 func FeCombine(h *FieldElement, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 int64) { 259 var c0, c1, c2, c3, c4, c5, c6, c7, c8, c9 int64 260 261 /* 262 |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38)) 263 i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8 264 |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19)) 265 i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9 266 */ 267 268 c0 = (h0 + (1 << 25)) >> 26 269 h1 += c0 270 h0 -= c0 << 26 271 c4 = (h4 + (1 << 25)) >> 26 272 h5 += c4 273 h4 -= c4 << 26 274 /* |h0| <= 2^25 */ 275 /* |h4| <= 2^25 */ 276 /* |h1| <= 1.51*2^58 */ 277 /* |h5| <= 1.51*2^58 */ 278 279 c1 = (h1 + (1 << 24)) >> 25 280 h2 += c1 281 h1 -= c1 << 25 282 c5 = (h5 + (1 << 24)) >> 25 283 h6 += c5 284 h5 -= c5 << 25 285 /* |h1| <= 2^24; from now on fits into int32 */ 286 /* |h5| <= 2^24; from now on fits into int32 */ 287 /* |h2| <= 1.21*2^59 */ 288 /* |h6| <= 1.21*2^59 */ 289 290 c2 = (h2 + (1 << 25)) >> 26 291 h3 += c2 292 h2 -= c2 << 26 293 c6 = (h6 + (1 << 25)) >> 26 294 h7 += c6 295 h6 -= c6 << 26 296 /* |h2| <= 2^25; from now on fits into int32 unchanged */ 297 /* |h6| <= 2^25; from now on fits into int32 unchanged */ 298 /* |h3| <= 1.51*2^58 */ 299 /* |h7| <= 1.51*2^58 */ 300 301 c3 = (h3 + (1 << 24)) >> 25 302 h4 += c3 303 h3 -= c3 << 25 304 c7 = (h7 + (1 << 24)) >> 25 305 h8 += c7 306 h7 -= c7 << 25 307 /* |h3| <= 2^24; from now on fits into int32 unchanged */ 308 /* |h7| <= 2^24; from now on fits into int32 unchanged */ 309 /* |h4| <= 1.52*2^33 */ 310 /* |h8| <= 1.52*2^33 */ 311 312 c4 = (h4 + (1 << 25)) >> 26 313 h5 += c4 314 h4 -= c4 << 26 315 c8 = (h8 + (1 << 25)) >> 26 316 h9 += c8 317 h8 -= c8 << 26 318 /* |h4| <= 2^25; from now on fits into int32 unchanged */ 319 /* |h8| <= 2^25; from now on fits into int32 unchanged */ 320 /* |h5| <= 1.01*2^24 */ 321 /* |h9| <= 1.51*2^58 */ 322 323 c9 = (h9 + (1 << 24)) >> 25 324 h0 += c9 * 19 325 h9 -= c9 << 25 326 /* |h9| <= 2^24; from now on fits into int32 unchanged */ 327 /* |h0| <= 1.8*2^37 */ 328 329 c0 = (h0 + (1 << 25)) >> 26 330 h1 += c0 331 h0 -= c0 << 26 332 /* |h0| <= 2^25; from now on fits into int32 unchanged */ 333 /* |h1| <= 1.01*2^24 */ 334 335 h[0] = int32(h0) 336 h[1] = int32(h1) 337 h[2] = int32(h2) 338 h[3] = int32(h3) 339 h[4] = int32(h4) 340 h[5] = int32(h5) 341 h[6] = int32(h6) 342 h[7] = int32(h7) 343 h[8] = int32(h8) 344 h[9] = int32(h9) 345 } 346 347 // FeMul calculates h = f * g 348 // Can overlap h with f or g. 349 // 350 // Preconditions: 351 // |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 352 // |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 353 // 354 // Postconditions: 355 // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 356 // 357 // Notes on implementation strategy: 358 // 359 // Using schoolbook multiplication. 360 // Karatsuba would save a little in some cost models. 361 // 362 // Most multiplications by 2 and 19 are 32-bit precomputations; 363 // cheaper than 64-bit postcomputations. 364 // 365 // There is one remaining multiplication by 19 in the carry chain; 366 // one *19 precomputation can be merged into this, 367 // but the resulting data flow is considerably less clean. 368 // 369 // There are 12 carries below. 370 // 10 of them are 2-way parallelizable and vectorizable. 371 // Can get away with 11 carries, but then data flow is much deeper. 372 // 373 // With tighter constraints on inputs, can squeeze carries into int32. 374 func FeMul(h, f, g *FieldElement) { 375 f0 := int64(f[0]) 376 f1 := int64(f[1]) 377 f2 := int64(f[2]) 378 f3 := int64(f[3]) 379 f4 := int64(f[4]) 380 f5 := int64(f[5]) 381 f6 := int64(f[6]) 382 f7 := int64(f[7]) 383 f8 := int64(f[8]) 384 f9 := int64(f[9]) 385 386 f1_2 := int64(2 * f[1]) 387 f3_2 := int64(2 * f[3]) 388 f5_2 := int64(2 * f[5]) 389 f7_2 := int64(2 * f[7]) 390 f9_2 := int64(2 * f[9]) 391 392 g0 := int64(g[0]) 393 g1 := int64(g[1]) 394 g2 := int64(g[2]) 395 g3 := int64(g[3]) 396 g4 := int64(g[4]) 397 g5 := int64(g[5]) 398 g6 := int64(g[6]) 399 g7 := int64(g[7]) 400 g8 := int64(g[8]) 401 g9 := int64(g[9]) 402 403 g1_19 := int64(19 * g[1]) /* 1.4*2^29 */ 404 g2_19 := int64(19 * g[2]) /* 1.4*2^30; still ok */ 405 g3_19 := int64(19 * g[3]) 406 g4_19 := int64(19 * g[4]) 407 g5_19 := int64(19 * g[5]) 408 g6_19 := int64(19 * g[6]) 409 g7_19 := int64(19 * g[7]) 410 g8_19 := int64(19 * g[8]) 411 g9_19 := int64(19 * g[9]) 412 413 h0 := f0*g0 + f1_2*g9_19 + f2*g8_19 + f3_2*g7_19 + f4*g6_19 + f5_2*g5_19 + f6*g4_19 + f7_2*g3_19 + f8*g2_19 + f9_2*g1_19 414 h1 := f0*g1 + f1*g0 + f2*g9_19 + f3*g8_19 + f4*g7_19 + f5*g6_19 + f6*g5_19 + f7*g4_19 + f8*g3_19 + f9*g2_19 415 h2 := f0*g2 + f1_2*g1 + f2*g0 + f3_2*g9_19 + f4*g8_19 + f5_2*g7_19 + f6*g6_19 + f7_2*g5_19 + f8*g4_19 + f9_2*g3_19 416 h3 := f0*g3 + f1*g2 + f2*g1 + f3*g0 + f4*g9_19 + f5*g8_19 + f6*g7_19 + f7*g6_19 + f8*g5_19 + f9*g4_19 417 h4 := f0*g4 + f1_2*g3 + f2*g2 + f3_2*g1 + f4*g0 + f5_2*g9_19 + f6*g8_19 + f7_2*g7_19 + f8*g6_19 + f9_2*g5_19 418 h5 := f0*g5 + f1*g4 + f2*g3 + f3*g2 + f4*g1 + f5*g0 + f6*g9_19 + f7*g8_19 + f8*g7_19 + f9*g6_19 419 h6 := f0*g6 + f1_2*g5 + f2*g4 + f3_2*g3 + f4*g2 + f5_2*g1 + f6*g0 + f7_2*g9_19 + f8*g8_19 + f9_2*g7_19 420 h7 := f0*g7 + f1*g6 + f2*g5 + f3*g4 + f4*g3 + f5*g2 + f6*g1 + f7*g0 + f8*g9_19 + f9*g8_19 421 h8 := f0*g8 + f1_2*g7 + f2*g6 + f3_2*g5 + f4*g4 + f5_2*g3 + f6*g2 + f7_2*g1 + f8*g0 + f9_2*g9_19 422 h9 := f0*g9 + f1*g8 + f2*g7 + f3*g6 + f4*g5 + f5*g4 + f6*g3 + f7*g2 + f8*g1 + f9*g0 423 424 FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9) 425 } 426 427 func feSquare(f *FieldElement) (h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 int64) { 428 f0 := int64(f[0]) 429 f1 := int64(f[1]) 430 f2 := int64(f[2]) 431 f3 := int64(f[3]) 432 f4 := int64(f[4]) 433 f5 := int64(f[5]) 434 f6 := int64(f[6]) 435 f7 := int64(f[7]) 436 f8 := int64(f[8]) 437 f9 := int64(f[9]) 438 f0_2 := int64(2 * f[0]) 439 f1_2 := int64(2 * f[1]) 440 f2_2 := int64(2 * f[2]) 441 f3_2 := int64(2 * f[3]) 442 f4_2 := int64(2 * f[4]) 443 f5_2 := int64(2 * f[5]) 444 f6_2 := int64(2 * f[6]) 445 f7_2 := int64(2 * f[7]) 446 f5_38 := 38 * f5 // 1.31*2^30 447 f6_19 := 19 * f6 // 1.31*2^30 448 f7_38 := 38 * f7 // 1.31*2^30 449 f8_19 := 19 * f8 // 1.31*2^30 450 f9_38 := 38 * f9 // 1.31*2^30 451 452 h0 = f0*f0 + f1_2*f9_38 + f2_2*f8_19 + f3_2*f7_38 + f4_2*f6_19 + f5*f5_38 453 h1 = f0_2*f1 + f2*f9_38 + f3_2*f8_19 + f4*f7_38 + f5_2*f6_19 454 h2 = f0_2*f2 + f1_2*f1 + f3_2*f9_38 + f4_2*f8_19 + f5_2*f7_38 + f6*f6_19 455 h3 = f0_2*f3 + f1_2*f2 + f4*f9_38 + f5_2*f8_19 + f6*f7_38 456 h4 = f0_2*f4 + f1_2*f3_2 + f2*f2 + f5_2*f9_38 + f6_2*f8_19 + f7*f7_38 457 h5 = f0_2*f5 + f1_2*f4 + f2_2*f3 + f6*f9_38 + f7_2*f8_19 458 h6 = f0_2*f6 + f1_2*f5_2 + f2_2*f4 + f3_2*f3 + f7_2*f9_38 + f8*f8_19 459 h7 = f0_2*f7 + f1_2*f6 + f2_2*f5 + f3_2*f4 + f8*f9_38 460 h8 = f0_2*f8 + f1_2*f7_2 + f2_2*f6 + f3_2*f5_2 + f4*f4 + f9*f9_38 461 h9 = f0_2*f9 + f1_2*f8 + f2_2*f7 + f3_2*f6 + f4_2*f5 462 463 return 464 } 465 466 // FeSquare calculates h = f*f. Can overlap h with f. 467 // 468 // Preconditions: 469 // |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 470 // 471 // Postconditions: 472 // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 473 func FeSquare(h, f *FieldElement) { 474 h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 := feSquare(f) 475 FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9) 476 } 477 478 // FeSquare2 sets h = 2 * f * f 479 // 480 // Can overlap h with f. 481 // 482 // Preconditions: 483 // |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 484 // 485 // Postconditions: 486 // |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. 487 // See fe_mul.c for discussion of implementation strategy. 488 func FeSquare2(h, f *FieldElement) { 489 h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 := feSquare(f) 490 491 h0 += h0 492 h1 += h1 493 h2 += h2 494 h3 += h3 495 h4 += h4 496 h5 += h5 497 h6 += h6 498 h7 += h7 499 h8 += h8 500 h9 += h9 501 502 FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9) 503 } 504 505 func FeInvert(out, z *FieldElement) { 506 var t0, t1, t2, t3 FieldElement 507 var i int 508 509 FeSquare(&t0, z) // 2^1 510 FeSquare(&t1, &t0) // 2^2 511 for i = 1; i < 2; i++ { // 2^3 512 FeSquare(&t1, &t1) 513 } 514 FeMul(&t1, z, &t1) // 2^3 + 2^0 515 FeMul(&t0, &t0, &t1) // 2^3 + 2^1 + 2^0 516 FeSquare(&t2, &t0) // 2^4 + 2^2 + 2^1 517 FeMul(&t1, &t1, &t2) // 2^4 + 2^3 + 2^2 + 2^1 + 2^0 518 FeSquare(&t2, &t1) // 5,4,3,2,1 519 for i = 1; i < 5; i++ { // 9,8,7,6,5 520 FeSquare(&t2, &t2) 521 } 522 FeMul(&t1, &t2, &t1) // 9,8,7,6,5,4,3,2,1,0 523 FeSquare(&t2, &t1) // 10..1 524 for i = 1; i < 10; i++ { // 19..10 525 FeSquare(&t2, &t2) 526 } 527 FeMul(&t2, &t2, &t1) // 19..0 528 FeSquare(&t3, &t2) // 20..1 529 for i = 1; i < 20; i++ { // 39..20 530 FeSquare(&t3, &t3) 531 } 532 FeMul(&t2, &t3, &t2) // 39..0 533 FeSquare(&t2, &t2) // 40..1 534 for i = 1; i < 10; i++ { // 49..10 535 FeSquare(&t2, &t2) 536 } 537 FeMul(&t1, &t2, &t1) // 49..0 538 FeSquare(&t2, &t1) // 50..1 539 for i = 1; i < 50; i++ { // 99..50 540 FeSquare(&t2, &t2) 541 } 542 FeMul(&t2, &t2, &t1) // 99..0 543 FeSquare(&t3, &t2) // 100..1 544 for i = 1; i < 100; i++ { // 199..100 545 FeSquare(&t3, &t3) 546 } 547 FeMul(&t2, &t3, &t2) // 199..0 548 FeSquare(&t2, &t2) // 200..1 549 for i = 1; i < 50; i++ { // 249..50 550 FeSquare(&t2, &t2) 551 } 552 FeMul(&t1, &t2, &t1) // 249..0 553 FeSquare(&t1, &t1) // 250..1 554 for i = 1; i < 5; i++ { // 254..5 555 FeSquare(&t1, &t1) 556 } 557 FeMul(out, &t1, &t0) // 254..5,3,1,0 558 } 559 560 func fePow22523(out, z *FieldElement) { 561 var t0, t1, t2 FieldElement 562 var i int 563 564 FeSquare(&t0, z) 565 for i = 1; i < 1; i++ { 566 FeSquare(&t0, &t0) 567 } 568 FeSquare(&t1, &t0) 569 for i = 1; i < 2; i++ { 570 FeSquare(&t1, &t1) 571 } 572 FeMul(&t1, z, &t1) 573 FeMul(&t0, &t0, &t1) 574 FeSquare(&t0, &t0) 575 for i = 1; i < 1; i++ { 576 FeSquare(&t0, &t0) 577 } 578 FeMul(&t0, &t1, &t0) 579 FeSquare(&t1, &t0) 580 for i = 1; i < 5; i++ { 581 FeSquare(&t1, &t1) 582 } 583 FeMul(&t0, &t1, &t0) 584 FeSquare(&t1, &t0) 585 for i = 1; i < 10; i++ { 586 FeSquare(&t1, &t1) 587 } 588 FeMul(&t1, &t1, &t0) 589 FeSquare(&t2, &t1) 590 for i = 1; i < 20; i++ { 591 FeSquare(&t2, &t2) 592 } 593 FeMul(&t1, &t2, &t1) 594 FeSquare(&t1, &t1) 595 for i = 1; i < 10; i++ { 596 FeSquare(&t1, &t1) 597 } 598 FeMul(&t0, &t1, &t0) 599 FeSquare(&t1, &t0) 600 for i = 1; i < 50; i++ { 601 FeSquare(&t1, &t1) 602 } 603 FeMul(&t1, &t1, &t0) 604 FeSquare(&t2, &t1) 605 for i = 1; i < 100; i++ { 606 FeSquare(&t2, &t2) 607 } 608 FeMul(&t1, &t2, &t1) 609 FeSquare(&t1, &t1) 610 for i = 1; i < 50; i++ { 611 FeSquare(&t1, &t1) 612 } 613 FeMul(&t0, &t1, &t0) 614 FeSquare(&t0, &t0) 615 for i = 1; i < 2; i++ { 616 FeSquare(&t0, &t0) 617 } 618 FeMul(out, &t0, z) 619 } 620 621 // Group elements are members of the elliptic curve -x^2 + y^2 = 1 + d * x^2 * 622 // y^2 where d = -121665/121666. 623 // 624 // Several representations are used: 625 // ProjectiveGroupElement: (X:Y:Z) satisfying x=X/Z, y=Y/Z 626 // ExtendedGroupElement: (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT 627 // CompletedGroupElement: ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T 628 // PreComputedGroupElement: (y+x,y-x,2dxy) 629 630 type ProjectiveGroupElement struct { 631 X, Y, Z FieldElement 632 } 633 634 type ExtendedGroupElement struct { 635 X, Y, Z, T FieldElement 636 } 637 638 type CompletedGroupElement struct { 639 X, Y, Z, T FieldElement 640 } 641 642 type PreComputedGroupElement struct { 643 yPlusX, yMinusX, xy2d FieldElement 644 } 645 646 type CachedGroupElement struct { 647 yPlusX, yMinusX, Z, T2d FieldElement 648 } 649 650 func (p *ProjectiveGroupElement) Zero() { 651 FeZero(&p.X) 652 FeOne(&p.Y) 653 FeOne(&p.Z) 654 } 655 656 func (p *ProjectiveGroupElement) Double(r *CompletedGroupElement) { 657 var t0 FieldElement 658 659 FeSquare(&r.X, &p.X) 660 FeSquare(&r.Z, &p.Y) 661 FeSquare2(&r.T, &p.Z) 662 FeAdd(&r.Y, &p.X, &p.Y) 663 FeSquare(&t0, &r.Y) 664 FeAdd(&r.Y, &r.Z, &r.X) 665 FeSub(&r.Z, &r.Z, &r.X) 666 FeSub(&r.X, &t0, &r.Y) 667 FeSub(&r.T, &r.T, &r.Z) 668 } 669 670 func (p *ProjectiveGroupElement) ToBytes(s *[32]byte) { 671 var recip, x, y FieldElement 672 673 FeInvert(&recip, &p.Z) 674 FeMul(&x, &p.X, &recip) 675 FeMul(&y, &p.Y, &recip) 676 FeToBytes(s, &y) 677 s[31] ^= FeIsNegative(&x) << 7 678 } 679 680 func (p *ExtendedGroupElement) Zero() { 681 FeZero(&p.X) 682 FeOne(&p.Y) 683 FeOne(&p.Z) 684 FeZero(&p.T) 685 } 686 687 func (p *ExtendedGroupElement) Double(r *CompletedGroupElement) { 688 var q ProjectiveGroupElement 689 p.ToProjective(&q) 690 q.Double(r) 691 } 692 693 func (p *ExtendedGroupElement) ToCached(r *CachedGroupElement) { 694 FeAdd(&r.yPlusX, &p.Y, &p.X) 695 FeSub(&r.yMinusX, &p.Y, &p.X) 696 FeCopy(&r.Z, &p.Z) 697 FeMul(&r.T2d, &p.T, &d2) 698 } 699 700 func (p *ExtendedGroupElement) ToProjective(r *ProjectiveGroupElement) { 701 FeCopy(&r.X, &p.X) 702 FeCopy(&r.Y, &p.Y) 703 FeCopy(&r.Z, &p.Z) 704 } 705 706 func (p *ExtendedGroupElement) ToBytes(s *[32]byte) { 707 var recip, x, y FieldElement 708 709 FeInvert(&recip, &p.Z) 710 FeMul(&x, &p.X, &recip) 711 FeMul(&y, &p.Y, &recip) 712 FeToBytes(s, &y) 713 s[31] ^= FeIsNegative(&x) << 7 714 } 715 716 func (p *ExtendedGroupElement) FromBytes(s *[32]byte) bool { 717 var u, v, v3, vxx, check FieldElement 718 719 FeFromBytes(&p.Y, s) 720 FeOne(&p.Z) 721 FeSquare(&u, &p.Y) 722 FeMul(&v, &u, &d) 723 FeSub(&u, &u, &p.Z) // y = y^2-1 724 FeAdd(&v, &v, &p.Z) // v = dy^2+1 725 726 FeSquare(&v3, &v) 727 FeMul(&v3, &v3, &v) // v3 = v^3 728 FeSquare(&p.X, &v3) 729 FeMul(&p.X, &p.X, &v) 730 FeMul(&p.X, &p.X, &u) // x = uv^7 731 732 fePow22523(&p.X, &p.X) // x = (uv^7)^((q-5)/8) 733 FeMul(&p.X, &p.X, &v3) 734 FeMul(&p.X, &p.X, &u) // x = uv^3(uv^7)^((q-5)/8) 735 736 var tmpX, tmp2 [32]byte 737 738 FeSquare(&vxx, &p.X) 739 FeMul(&vxx, &vxx, &v) 740 FeSub(&check, &vxx, &u) // vx^2-u 741 if FeIsNonZero(&check) == 1 { 742 FeAdd(&check, &vxx, &u) // vx^2+u 743 if FeIsNonZero(&check) == 1 { 744 return false 745 } 746 FeMul(&p.X, &p.X, &SqrtM1) 747 748 FeToBytes(&tmpX, &p.X) 749 for i, v := range tmpX { 750 tmp2[31-i] = v 751 } 752 } 753 754 if FeIsNegative(&p.X) != (s[31] >> 7) { 755 FeNeg(&p.X, &p.X) 756 } 757 758 FeMul(&p.T, &p.X, &p.Y) 759 return true 760 } 761 762 func (p *CompletedGroupElement) ToProjective(r *ProjectiveGroupElement) { 763 FeMul(&r.X, &p.X, &p.T) 764 FeMul(&r.Y, &p.Y, &p.Z) 765 FeMul(&r.Z, &p.Z, &p.T) 766 } 767 768 func (p *CompletedGroupElement) ToExtended(r *ExtendedGroupElement) { 769 FeMul(&r.X, &p.X, &p.T) 770 FeMul(&r.Y, &p.Y, &p.Z) 771 FeMul(&r.Z, &p.Z, &p.T) 772 FeMul(&r.T, &p.X, &p.Y) 773 } 774 775 func (p *PreComputedGroupElement) Zero() { 776 FeOne(&p.yPlusX) 777 FeOne(&p.yMinusX) 778 FeZero(&p.xy2d) 779 } 780 781 func geAdd(r *CompletedGroupElement, p *ExtendedGroupElement, q *CachedGroupElement) { 782 var t0 FieldElement 783 784 FeAdd(&r.X, &p.Y, &p.X) 785 FeSub(&r.Y, &p.Y, &p.X) 786 FeMul(&r.Z, &r.X, &q.yPlusX) 787 FeMul(&r.Y, &r.Y, &q.yMinusX) 788 FeMul(&r.T, &q.T2d, &p.T) 789 FeMul(&r.X, &p.Z, &q.Z) 790 FeAdd(&t0, &r.X, &r.X) 791 FeSub(&r.X, &r.Z, &r.Y) 792 FeAdd(&r.Y, &r.Z, &r.Y) 793 FeAdd(&r.Z, &t0, &r.T) 794 FeSub(&r.T, &t0, &r.T) 795 } 796 797 func geSub(r *CompletedGroupElement, p *ExtendedGroupElement, q *CachedGroupElement) { 798 var t0 FieldElement 799 800 FeAdd(&r.X, &p.Y, &p.X) 801 FeSub(&r.Y, &p.Y, &p.X) 802 FeMul(&r.Z, &r.X, &q.yMinusX) 803 FeMul(&r.Y, &r.Y, &q.yPlusX) 804 FeMul(&r.T, &q.T2d, &p.T) 805 FeMul(&r.X, &p.Z, &q.Z) 806 FeAdd(&t0, &r.X, &r.X) 807 FeSub(&r.X, &r.Z, &r.Y) 808 FeAdd(&r.Y, &r.Z, &r.Y) 809 FeSub(&r.Z, &t0, &r.T) 810 FeAdd(&r.T, &t0, &r.T) 811 } 812 813 func geMixedAdd(r *CompletedGroupElement, p *ExtendedGroupElement, q *PreComputedGroupElement) { 814 var t0 FieldElement 815 816 FeAdd(&r.X, &p.Y, &p.X) 817 FeSub(&r.Y, &p.Y, &p.X) 818 FeMul(&r.Z, &r.X, &q.yPlusX) 819 FeMul(&r.Y, &r.Y, &q.yMinusX) 820 FeMul(&r.T, &q.xy2d, &p.T) 821 FeAdd(&t0, &p.Z, &p.Z) 822 FeSub(&r.X, &r.Z, &r.Y) 823 FeAdd(&r.Y, &r.Z, &r.Y) 824 FeAdd(&r.Z, &t0, &r.T) 825 FeSub(&r.T, &t0, &r.T) 826 } 827 828 func geMixedSub(r *CompletedGroupElement, p *ExtendedGroupElement, q *PreComputedGroupElement) { 829 var t0 FieldElement 830 831 FeAdd(&r.X, &p.Y, &p.X) 832 FeSub(&r.Y, &p.Y, &p.X) 833 FeMul(&r.Z, &r.X, &q.yMinusX) 834 FeMul(&r.Y, &r.Y, &q.yPlusX) 835 FeMul(&r.T, &q.xy2d, &p.T) 836 FeAdd(&t0, &p.Z, &p.Z) 837 FeSub(&r.X, &r.Z, &r.Y) 838 FeAdd(&r.Y, &r.Z, &r.Y) 839 FeSub(&r.Z, &t0, &r.T) 840 FeAdd(&r.T, &t0, &r.T) 841 } 842 843 func slide(r *[256]int8, a *[32]byte) { 844 for i := range r { 845 r[i] = int8(1 & (a[i>>3] >> uint(i&7))) 846 } 847 848 for i := range r { 849 if r[i] != 0 { 850 for b := 1; b <= 6 && i+b < 256; b++ { 851 if r[i+b] != 0 { 852 if r[i]+(r[i+b]<<uint(b)) <= 15 { 853 r[i] += r[i+b] << uint(b) 854 r[i+b] = 0 855 } else if r[i]-(r[i+b]<<uint(b)) >= -15 { 856 r[i] -= r[i+b] << uint(b) 857 for k := i + b; k < 256; k++ { 858 if r[k] == 0 { 859 r[k] = 1 860 break 861 } 862 r[k] = 0 863 } 864 } else { 865 break 866 } 867 } 868 } 869 } 870 } 871 } 872 873 // GeDoubleScalarMultVartime sets r = a*A + b*B 874 // where a = a[0]+256*a[1]+...+256^31 a[31]. 875 // and b = b[0]+256*b[1]+...+256^31 b[31]. 876 // B is the Ed25519 base point (x,4/5) with x positive. 877 func GeDoubleScalarMultVartime(r *ProjectiveGroupElement, a *[32]byte, A *ExtendedGroupElement, b *[32]byte) { 878 var aSlide, bSlide [256]int8 879 var Ai [8]CachedGroupElement // A,3A,5A,7A,9A,11A,13A,15A 880 var t CompletedGroupElement 881 var u, A2 ExtendedGroupElement 882 var i int 883 884 slide(&aSlide, a) 885 slide(&bSlide, b) 886 887 A.ToCached(&Ai[0]) 888 A.Double(&t) 889 t.ToExtended(&A2) 890 891 for i := 0; i < 7; i++ { 892 geAdd(&t, &A2, &Ai[i]) 893 t.ToExtended(&u) 894 u.ToCached(&Ai[i+1]) 895 } 896 897 r.Zero() 898 899 for i = 255; i >= 0; i-- { 900 if aSlide[i] != 0 || bSlide[i] != 0 { 901 break 902 } 903 } 904 905 for ; i >= 0; i-- { 906 r.Double(&t) 907 908 if aSlide[i] > 0 { 909 t.ToExtended(&u) 910 geAdd(&t, &u, &Ai[aSlide[i]/2]) 911 } else if aSlide[i] < 0 { 912 t.ToExtended(&u) 913 geSub(&t, &u, &Ai[(-aSlide[i])/2]) 914 } 915 916 if bSlide[i] > 0 { 917 t.ToExtended(&u) 918 geMixedAdd(&t, &u, &bi[bSlide[i]/2]) 919 } else if bSlide[i] < 0 { 920 t.ToExtended(&u) 921 geMixedSub(&t, &u, &bi[(-bSlide[i])/2]) 922 } 923 924 t.ToProjective(r) 925 } 926 } 927 928 // equal returns 1 if b == c and 0 otherwise, assuming that b and c are 929 // non-negative. 930 func equal(b, c int32) int32 { 931 x := uint32(b ^ c) 932 x-- 933 return int32(x >> 31) 934 } 935 936 // negative returns 1 if b < 0 and 0 otherwise. 937 func negative(b int32) int32 { 938 return (b >> 31) & 1 939 } 940 941 func PreComputedGroupElementCMove(t, u *PreComputedGroupElement, b int32) { 942 FeCMove(&t.yPlusX, &u.yPlusX, b) 943 FeCMove(&t.yMinusX, &u.yMinusX, b) 944 FeCMove(&t.xy2d, &u.xy2d, b) 945 } 946 947 func selectPoint(t *PreComputedGroupElement, pos int32, b int32) { 948 var minusT PreComputedGroupElement 949 bNegative := negative(b) 950 bAbs := b - (((-bNegative) & b) << 1) 951 952 t.Zero() 953 for i := int32(0); i < 8; i++ { 954 PreComputedGroupElementCMove(t, &base[pos][i], equal(bAbs, i+1)) 955 } 956 FeCopy(&minusT.yPlusX, &t.yMinusX) 957 FeCopy(&minusT.yMinusX, &t.yPlusX) 958 FeNeg(&minusT.xy2d, &t.xy2d) 959 PreComputedGroupElementCMove(t, &minusT, bNegative) 960 } 961 962 // GeScalarMultBase computes h = a*B, where 963 // a = a[0]+256*a[1]+...+256^31 a[31] 964 // B is the Ed25519 base point (x,4/5) with x positive. 965 // 966 // Preconditions: 967 // a[31] <= 127 968 func GeScalarMultBase(h *ExtendedGroupElement, a *[32]byte) { 969 var e [64]int8 970 971 for i, v := range a { 972 e[2*i] = int8(v & 15) 973 e[2*i+1] = int8((v >> 4) & 15) 974 } 975 976 // each e[i] is between 0 and 15 and e[63] is between 0 and 7. 977 978 carry := int8(0) 979 for i := 0; i < 63; i++ { 980 e[i] += carry 981 carry = (e[i] + 8) >> 4 982 e[i] -= carry << 4 983 } 984 e[63] += carry 985 // each e[i] is between -8 and 8. 986 987 h.Zero() 988 var t PreComputedGroupElement 989 var r CompletedGroupElement 990 for i := int32(1); i < 64; i += 2 { 991 selectPoint(&t, i/2, int32(e[i])) 992 geMixedAdd(&r, h, &t) 993 r.ToExtended(h) 994 } 995 996 var s ProjectiveGroupElement 997 998 h.Double(&r) 999 r.ToProjective(&s) 1000 s.Double(&r) 1001 r.ToProjective(&s) 1002 s.Double(&r) 1003 r.ToProjective(&s) 1004 s.Double(&r) 1005 r.ToExtended(h) 1006 1007 for i := int32(0); i < 64; i += 2 { 1008 selectPoint(&t, i/2, int32(e[i])) 1009 geMixedAdd(&r, h, &t) 1010 r.ToExtended(h) 1011 } 1012 } 1013 1014 // The scalars are GF(2^252 + 27742317777372353535851937790883648493). 1015 1016 // Input: 1017 // a[0]+256*a[1]+...+256^31*a[31] = a 1018 // b[0]+256*b[1]+...+256^31*b[31] = b 1019 // c[0]+256*c[1]+...+256^31*c[31] = c 1020 // 1021 // Output: 1022 // s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l 1023 // where l = 2^252 + 27742317777372353535851937790883648493. 1024 func ScMulAdd(s, a, b, c *[32]byte) { 1025 a0 := 2097151 & load3(a[:]) 1026 a1 := 2097151 & (load4(a[2:]) >> 5) 1027 a2 := 2097151 & (load3(a[5:]) >> 2) 1028 a3 := 2097151 & (load4(a[7:]) >> 7) 1029 a4 := 2097151 & (load4(a[10:]) >> 4) 1030 a5 := 2097151 & (load3(a[13:]) >> 1) 1031 a6 := 2097151 & (load4(a[15:]) >> 6) 1032 a7 := 2097151 & (load3(a[18:]) >> 3) 1033 a8 := 2097151 & load3(a[21:]) 1034 a9 := 2097151 & (load4(a[23:]) >> 5) 1035 a10 := 2097151 & (load3(a[26:]) >> 2) 1036 a11 := (load4(a[28:]) >> 7) 1037 b0 := 2097151 & load3(b[:]) 1038 b1 := 2097151 & (load4(b[2:]) >> 5) 1039 b2 := 2097151 & (load3(b[5:]) >> 2) 1040 b3 := 2097151 & (load4(b[7:]) >> 7) 1041 b4 := 2097151 & (load4(b[10:]) >> 4) 1042 b5 := 2097151 & (load3(b[13:]) >> 1) 1043 b6 := 2097151 & (load4(b[15:]) >> 6) 1044 b7 := 2097151 & (load3(b[18:]) >> 3) 1045 b8 := 2097151 & load3(b[21:]) 1046 b9 := 2097151 & (load4(b[23:]) >> 5) 1047 b10 := 2097151 & (load3(b[26:]) >> 2) 1048 b11 := (load4(b[28:]) >> 7) 1049 c0 := 2097151 & load3(c[:]) 1050 c1 := 2097151 & (load4(c[2:]) >> 5) 1051 c2 := 2097151 & (load3(c[5:]) >> 2) 1052 c3 := 2097151 & (load4(c[7:]) >> 7) 1053 c4 := 2097151 & (load4(c[10:]) >> 4) 1054 c5 := 2097151 & (load3(c[13:]) >> 1) 1055 c6 := 2097151 & (load4(c[15:]) >> 6) 1056 c7 := 2097151 & (load3(c[18:]) >> 3) 1057 c8 := 2097151 & load3(c[21:]) 1058 c9 := 2097151 & (load4(c[23:]) >> 5) 1059 c10 := 2097151 & (load3(c[26:]) >> 2) 1060 c11 := (load4(c[28:]) >> 7) 1061 var carry [23]int64 1062 1063 s0 := c0 + a0*b0 1064 s1 := c1 + a0*b1 + a1*b0 1065 s2 := c2 + a0*b2 + a1*b1 + a2*b0 1066 s3 := c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0 1067 s4 := c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0 1068 s5 := c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0 1069 s6 := c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0 1070 s7 := c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0 1071 s8 := c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + a8*b0 1072 s9 := c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0 1073 s10 := c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8*b2 + a9*b1 + a10*b0 1074 s11 := c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3 + a9*b2 + a10*b1 + a11*b0 1075 s12 := a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + a10*b2 + a11*b1 1076 s13 := a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + a11*b2 1077 s14 := a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + a11*b3 1078 s15 := a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4 1079 s16 := a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5 1080 s17 := a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6 1081 s18 := a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7 1082 s19 := a8*b11 + a9*b10 + a10*b9 + a11*b8 1083 s20 := a9*b11 + a10*b10 + a11*b9 1084 s21 := a10*b11 + a11*b10 1085 s22 := a11 * b11 1086 s23 := int64(0) 1087 1088 carry[0] = (s0 + (1 << 20)) >> 21 1089 s1 += carry[0] 1090 s0 -= carry[0] << 21 1091 carry[2] = (s2 + (1 << 20)) >> 21 1092 s3 += carry[2] 1093 s2 -= carry[2] << 21 1094 carry[4] = (s4 + (1 << 20)) >> 21 1095 s5 += carry[4] 1096 s4 -= carry[4] << 21 1097 carry[6] = (s6 + (1 << 20)) >> 21 1098 s7 += carry[6] 1099 s6 -= carry[6] << 21 1100 carry[8] = (s8 + (1 << 20)) >> 21 1101 s9 += carry[8] 1102 s8 -= carry[8] << 21 1103 carry[10] = (s10 + (1 << 20)) >> 21 1104 s11 += carry[10] 1105 s10 -= carry[10] << 21 1106 carry[12] = (s12 + (1 << 20)) >> 21 1107 s13 += carry[12] 1108 s12 -= carry[12] << 21 1109 carry[14] = (s14 + (1 << 20)) >> 21 1110 s15 += carry[14] 1111 s14 -= carry[14] << 21 1112 carry[16] = (s16 + (1 << 20)) >> 21 1113 s17 += carry[16] 1114 s16 -= carry[16] << 21 1115 carry[18] = (s18 + (1 << 20)) >> 21 1116 s19 += carry[18] 1117 s18 -= carry[18] << 21 1118 carry[20] = (s20 + (1 << 20)) >> 21 1119 s21 += carry[20] 1120 s20 -= carry[20] << 21 1121 carry[22] = (s22 + (1 << 20)) >> 21 1122 s23 += carry[22] 1123 s22 -= carry[22] << 21 1124 1125 carry[1] = (s1 + (1 << 20)) >> 21 1126 s2 += carry[1] 1127 s1 -= carry[1] << 21 1128 carry[3] = (s3 + (1 << 20)) >> 21 1129 s4 += carry[3] 1130 s3 -= carry[3] << 21 1131 carry[5] = (s5 + (1 << 20)) >> 21 1132 s6 += carry[5] 1133 s5 -= carry[5] << 21 1134 carry[7] = (s7 + (1 << 20)) >> 21 1135 s8 += carry[7] 1136 s7 -= carry[7] << 21 1137 carry[9] = (s9 + (1 << 20)) >> 21 1138 s10 += carry[9] 1139 s9 -= carry[9] << 21 1140 carry[11] = (s11 + (1 << 20)) >> 21 1141 s12 += carry[11] 1142 s11 -= carry[11] << 21 1143 carry[13] = (s13 + (1 << 20)) >> 21 1144 s14 += carry[13] 1145 s13 -= carry[13] << 21 1146 carry[15] = (s15 + (1 << 20)) >> 21 1147 s16 += carry[15] 1148 s15 -= carry[15] << 21 1149 carry[17] = (s17 + (1 << 20)) >> 21 1150 s18 += carry[17] 1151 s17 -= carry[17] << 21 1152 carry[19] = (s19 + (1 << 20)) >> 21 1153 s20 += carry[19] 1154 s19 -= carry[19] << 21 1155 carry[21] = (s21 + (1 << 20)) >> 21 1156 s22 += carry[21] 1157 s21 -= carry[21] << 21 1158 1159 s11 += s23 * 666643 1160 s12 += s23 * 470296 1161 s13 += s23 * 654183 1162 s14 -= s23 * 997805 1163 s15 += s23 * 136657 1164 s16 -= s23 * 683901 1165 s23 = 0 1166 1167 s10 += s22 * 666643 1168 s11 += s22 * 470296 1169 s12 += s22 * 654183 1170 s13 -= s22 * 997805 1171 s14 += s22 * 136657 1172 s15 -= s22 * 683901 1173 s22 = 0 1174 1175 s9 += s21 * 666643 1176 s10 += s21 * 470296 1177 s11 += s21 * 654183 1178 s12 -= s21 * 997805 1179 s13 += s21 * 136657 1180 s14 -= s21 * 683901 1181 s21 = 0 1182 1183 s8 += s20 * 666643 1184 s9 += s20 * 470296 1185 s10 += s20 * 654183 1186 s11 -= s20 * 997805 1187 s12 += s20 * 136657 1188 s13 -= s20 * 683901 1189 s20 = 0 1190 1191 s7 += s19 * 666643 1192 s8 += s19 * 470296 1193 s9 += s19 * 654183 1194 s10 -= s19 * 997805 1195 s11 += s19 * 136657 1196 s12 -= s19 * 683901 1197 s19 = 0 1198 1199 s6 += s18 * 666643 1200 s7 += s18 * 470296 1201 s8 += s18 * 654183 1202 s9 -= s18 * 997805 1203 s10 += s18 * 136657 1204 s11 -= s18 * 683901 1205 s18 = 0 1206 1207 carry[6] = (s6 + (1 << 20)) >> 21 1208 s7 += carry[6] 1209 s6 -= carry[6] << 21 1210 carry[8] = (s8 + (1 << 20)) >> 21 1211 s9 += carry[8] 1212 s8 -= carry[8] << 21 1213 carry[10] = (s10 + (1 << 20)) >> 21 1214 s11 += carry[10] 1215 s10 -= carry[10] << 21 1216 carry[12] = (s12 + (1 << 20)) >> 21 1217 s13 += carry[12] 1218 s12 -= carry[12] << 21 1219 carry[14] = (s14 + (1 << 20)) >> 21 1220 s15 += carry[14] 1221 s14 -= carry[14] << 21 1222 carry[16] = (s16 + (1 << 20)) >> 21 1223 s17 += carry[16] 1224 s16 -= carry[16] << 21 1225 1226 carry[7] = (s7 + (1 << 20)) >> 21 1227 s8 += carry[7] 1228 s7 -= carry[7] << 21 1229 carry[9] = (s9 + (1 << 20)) >> 21 1230 s10 += carry[9] 1231 s9 -= carry[9] << 21 1232 carry[11] = (s11 + (1 << 20)) >> 21 1233 s12 += carry[11] 1234 s11 -= carry[11] << 21 1235 carry[13] = (s13 + (1 << 20)) >> 21 1236 s14 += carry[13] 1237 s13 -= carry[13] << 21 1238 carry[15] = (s15 + (1 << 20)) >> 21 1239 s16 += carry[15] 1240 s15 -= carry[15] << 21 1241 1242 s5 += s17 * 666643 1243 s6 += s17 * 470296 1244 s7 += s17 * 654183 1245 s8 -= s17 * 997805 1246 s9 += s17 * 136657 1247 s10 -= s17 * 683901 1248 s17 = 0 1249 1250 s4 += s16 * 666643 1251 s5 += s16 * 470296 1252 s6 += s16 * 654183 1253 s7 -= s16 * 997805 1254 s8 += s16 * 136657 1255 s9 -= s16 * 683901 1256 s16 = 0 1257 1258 s3 += s15 * 666643 1259 s4 += s15 * 470296 1260 s5 += s15 * 654183 1261 s6 -= s15 * 997805 1262 s7 += s15 * 136657 1263 s8 -= s15 * 683901 1264 s15 = 0 1265 1266 s2 += s14 * 666643 1267 s3 += s14 * 470296 1268 s4 += s14 * 654183 1269 s5 -= s14 * 997805 1270 s6 += s14 * 136657 1271 s7 -= s14 * 683901 1272 s14 = 0 1273 1274 s1 += s13 * 666643 1275 s2 += s13 * 470296 1276 s3 += s13 * 654183 1277 s4 -= s13 * 997805 1278 s5 += s13 * 136657 1279 s6 -= s13 * 683901 1280 s13 = 0 1281 1282 s0 += s12 * 666643 1283 s1 += s12 * 470296 1284 s2 += s12 * 654183 1285 s3 -= s12 * 997805 1286 s4 += s12 * 136657 1287 s5 -= s12 * 683901 1288 s12 = 0 1289 1290 carry[0] = (s0 + (1 << 20)) >> 21 1291 s1 += carry[0] 1292 s0 -= carry[0] << 21 1293 carry[2] = (s2 + (1 << 20)) >> 21 1294 s3 += carry[2] 1295 s2 -= carry[2] << 21 1296 carry[4] = (s4 + (1 << 20)) >> 21 1297 s5 += carry[4] 1298 s4 -= carry[4] << 21 1299 carry[6] = (s6 + (1 << 20)) >> 21 1300 s7 += carry[6] 1301 s6 -= carry[6] << 21 1302 carry[8] = (s8 + (1 << 20)) >> 21 1303 s9 += carry[8] 1304 s8 -= carry[8] << 21 1305 carry[10] = (s10 + (1 << 20)) >> 21 1306 s11 += carry[10] 1307 s10 -= carry[10] << 21 1308 1309 carry[1] = (s1 + (1 << 20)) >> 21 1310 s2 += carry[1] 1311 s1 -= carry[1] << 21 1312 carry[3] = (s3 + (1 << 20)) >> 21 1313 s4 += carry[3] 1314 s3 -= carry[3] << 21 1315 carry[5] = (s5 + (1 << 20)) >> 21 1316 s6 += carry[5] 1317 s5 -= carry[5] << 21 1318 carry[7] = (s7 + (1 << 20)) >> 21 1319 s8 += carry[7] 1320 s7 -= carry[7] << 21 1321 carry[9] = (s9 + (1 << 20)) >> 21 1322 s10 += carry[9] 1323 s9 -= carry[9] << 21 1324 carry[11] = (s11 + (1 << 20)) >> 21 1325 s12 += carry[11] 1326 s11 -= carry[11] << 21 1327 1328 s0 += s12 * 666643 1329 s1 += s12 * 470296 1330 s2 += s12 * 654183 1331 s3 -= s12 * 997805 1332 s4 += s12 * 136657 1333 s5 -= s12 * 683901 1334 s12 = 0 1335 1336 carry[0] = s0 >> 21 1337 s1 += carry[0] 1338 s0 -= carry[0] << 21 1339 carry[1] = s1 >> 21 1340 s2 += carry[1] 1341 s1 -= carry[1] << 21 1342 carry[2] = s2 >> 21 1343 s3 += carry[2] 1344 s2 -= carry[2] << 21 1345 carry[3] = s3 >> 21 1346 s4 += carry[3] 1347 s3 -= carry[3] << 21 1348 carry[4] = s4 >> 21 1349 s5 += carry[4] 1350 s4 -= carry[4] << 21 1351 carry[5] = s5 >> 21 1352 s6 += carry[5] 1353 s5 -= carry[5] << 21 1354 carry[6] = s6 >> 21 1355 s7 += carry[6] 1356 s6 -= carry[6] << 21 1357 carry[7] = s7 >> 21 1358 s8 += carry[7] 1359 s7 -= carry[7] << 21 1360 carry[8] = s8 >> 21 1361 s9 += carry[8] 1362 s8 -= carry[8] << 21 1363 carry[9] = s9 >> 21 1364 s10 += carry[9] 1365 s9 -= carry[9] << 21 1366 carry[10] = s10 >> 21 1367 s11 += carry[10] 1368 s10 -= carry[10] << 21 1369 carry[11] = s11 >> 21 1370 s12 += carry[11] 1371 s11 -= carry[11] << 21 1372 1373 s0 += s12 * 666643 1374 s1 += s12 * 470296 1375 s2 += s12 * 654183 1376 s3 -= s12 * 997805 1377 s4 += s12 * 136657 1378 s5 -= s12 * 683901 1379 s12 = 0 1380 1381 carry[0] = s0 >> 21 1382 s1 += carry[0] 1383 s0 -= carry[0] << 21 1384 carry[1] = s1 >> 21 1385 s2 += carry[1] 1386 s1 -= carry[1] << 21 1387 carry[2] = s2 >> 21 1388 s3 += carry[2] 1389 s2 -= carry[2] << 21 1390 carry[3] = s3 >> 21 1391 s4 += carry[3] 1392 s3 -= carry[3] << 21 1393 carry[4] = s4 >> 21 1394 s5 += carry[4] 1395 s4 -= carry[4] << 21 1396 carry[5] = s5 >> 21 1397 s6 += carry[5] 1398 s5 -= carry[5] << 21 1399 carry[6] = s6 >> 21 1400 s7 += carry[6] 1401 s6 -= carry[6] << 21 1402 carry[7] = s7 >> 21 1403 s8 += carry[7] 1404 s7 -= carry[7] << 21 1405 carry[8] = s8 >> 21 1406 s9 += carry[8] 1407 s8 -= carry[8] << 21 1408 carry[9] = s9 >> 21 1409 s10 += carry[9] 1410 s9 -= carry[9] << 21 1411 carry[10] = s10 >> 21 1412 s11 += carry[10] 1413 s10 -= carry[10] << 21 1414 1415 s[0] = byte(s0 >> 0) 1416 s[1] = byte(s0 >> 8) 1417 s[2] = byte((s0 >> 16) | (s1 << 5)) 1418 s[3] = byte(s1 >> 3) 1419 s[4] = byte(s1 >> 11) 1420 s[5] = byte((s1 >> 19) | (s2 << 2)) 1421 s[6] = byte(s2 >> 6) 1422 s[7] = byte((s2 >> 14) | (s3 << 7)) 1423 s[8] = byte(s3 >> 1) 1424 s[9] = byte(s3 >> 9) 1425 s[10] = byte((s3 >> 17) | (s4 << 4)) 1426 s[11] = byte(s4 >> 4) 1427 s[12] = byte(s4 >> 12) 1428 s[13] = byte((s4 >> 20) | (s5 << 1)) 1429 s[14] = byte(s5 >> 7) 1430 s[15] = byte((s5 >> 15) | (s6 << 6)) 1431 s[16] = byte(s6 >> 2) 1432 s[17] = byte(s6 >> 10) 1433 s[18] = byte((s6 >> 18) | (s7 << 3)) 1434 s[19] = byte(s7 >> 5) 1435 s[20] = byte(s7 >> 13) 1436 s[21] = byte(s8 >> 0) 1437 s[22] = byte(s8 >> 8) 1438 s[23] = byte((s8 >> 16) | (s9 << 5)) 1439 s[24] = byte(s9 >> 3) 1440 s[25] = byte(s9 >> 11) 1441 s[26] = byte((s9 >> 19) | (s10 << 2)) 1442 s[27] = byte(s10 >> 6) 1443 s[28] = byte((s10 >> 14) | (s11 << 7)) 1444 s[29] = byte(s11 >> 1) 1445 s[30] = byte(s11 >> 9) 1446 s[31] = byte(s11 >> 17) 1447 } 1448 1449 // Input: 1450 // s[0]+256*s[1]+...+256^63*s[63] = s 1451 // 1452 // Output: 1453 // s[0]+256*s[1]+...+256^31*s[31] = s mod l 1454 // where l = 2^252 + 27742317777372353535851937790883648493. 1455 func ScReduce(out *[32]byte, s *[64]byte) { 1456 s0 := 2097151 & load3(s[:]) 1457 s1 := 2097151 & (load4(s[2:]) >> 5) 1458 s2 := 2097151 & (load3(s[5:]) >> 2) 1459 s3 := 2097151 & (load4(s[7:]) >> 7) 1460 s4 := 2097151 & (load4(s[10:]) >> 4) 1461 s5 := 2097151 & (load3(s[13:]) >> 1) 1462 s6 := 2097151 & (load4(s[15:]) >> 6) 1463 s7 := 2097151 & (load3(s[18:]) >> 3) 1464 s8 := 2097151 & load3(s[21:]) 1465 s9 := 2097151 & (load4(s[23:]) >> 5) 1466 s10 := 2097151 & (load3(s[26:]) >> 2) 1467 s11 := 2097151 & (load4(s[28:]) >> 7) 1468 s12 := 2097151 & (load4(s[31:]) >> 4) 1469 s13 := 2097151 & (load3(s[34:]) >> 1) 1470 s14 := 2097151 & (load4(s[36:]) >> 6) 1471 s15 := 2097151 & (load3(s[39:]) >> 3) 1472 s16 := 2097151 & load3(s[42:]) 1473 s17 := 2097151 & (load4(s[44:]) >> 5) 1474 s18 := 2097151 & (load3(s[47:]) >> 2) 1475 s19 := 2097151 & (load4(s[49:]) >> 7) 1476 s20 := 2097151 & (load4(s[52:]) >> 4) 1477 s21 := 2097151 & (load3(s[55:]) >> 1) 1478 s22 := 2097151 & (load4(s[57:]) >> 6) 1479 s23 := (load4(s[60:]) >> 3) 1480 1481 s11 += s23 * 666643 1482 s12 += s23 * 470296 1483 s13 += s23 * 654183 1484 s14 -= s23 * 997805 1485 s15 += s23 * 136657 1486 s16 -= s23 * 683901 1487 s23 = 0 1488 1489 s10 += s22 * 666643 1490 s11 += s22 * 470296 1491 s12 += s22 * 654183 1492 s13 -= s22 * 997805 1493 s14 += s22 * 136657 1494 s15 -= s22 * 683901 1495 s22 = 0 1496 1497 s9 += s21 * 666643 1498 s10 += s21 * 470296 1499 s11 += s21 * 654183 1500 s12 -= s21 * 997805 1501 s13 += s21 * 136657 1502 s14 -= s21 * 683901 1503 s21 = 0 1504 1505 s8 += s20 * 666643 1506 s9 += s20 * 470296 1507 s10 += s20 * 654183 1508 s11 -= s20 * 997805 1509 s12 += s20 * 136657 1510 s13 -= s20 * 683901 1511 s20 = 0 1512 1513 s7 += s19 * 666643 1514 s8 += s19 * 470296 1515 s9 += s19 * 654183 1516 s10 -= s19 * 997805 1517 s11 += s19 * 136657 1518 s12 -= s19 * 683901 1519 s19 = 0 1520 1521 s6 += s18 * 666643 1522 s7 += s18 * 470296 1523 s8 += s18 * 654183 1524 s9 -= s18 * 997805 1525 s10 += s18 * 136657 1526 s11 -= s18 * 683901 1527 s18 = 0 1528 1529 var carry [17]int64 1530 1531 carry[6] = (s6 + (1 << 20)) >> 21 1532 s7 += carry[6] 1533 s6 -= carry[6] << 21 1534 carry[8] = (s8 + (1 << 20)) >> 21 1535 s9 += carry[8] 1536 s8 -= carry[8] << 21 1537 carry[10] = (s10 + (1 << 20)) >> 21 1538 s11 += carry[10] 1539 s10 -= carry[10] << 21 1540 carry[12] = (s12 + (1 << 20)) >> 21 1541 s13 += carry[12] 1542 s12 -= carry[12] << 21 1543 carry[14] = (s14 + (1 << 20)) >> 21 1544 s15 += carry[14] 1545 s14 -= carry[14] << 21 1546 carry[16] = (s16 + (1 << 20)) >> 21 1547 s17 += carry[16] 1548 s16 -= carry[16] << 21 1549 1550 carry[7] = (s7 + (1 << 20)) >> 21 1551 s8 += carry[7] 1552 s7 -= carry[7] << 21 1553 carry[9] = (s9 + (1 << 20)) >> 21 1554 s10 += carry[9] 1555 s9 -= carry[9] << 21 1556 carry[11] = (s11 + (1 << 20)) >> 21 1557 s12 += carry[11] 1558 s11 -= carry[11] << 21 1559 carry[13] = (s13 + (1 << 20)) >> 21 1560 s14 += carry[13] 1561 s13 -= carry[13] << 21 1562 carry[15] = (s15 + (1 << 20)) >> 21 1563 s16 += carry[15] 1564 s15 -= carry[15] << 21 1565 1566 s5 += s17 * 666643 1567 s6 += s17 * 470296 1568 s7 += s17 * 654183 1569 s8 -= s17 * 997805 1570 s9 += s17 * 136657 1571 s10 -= s17 * 683901 1572 s17 = 0 1573 1574 s4 += s16 * 666643 1575 s5 += s16 * 470296 1576 s6 += s16 * 654183 1577 s7 -= s16 * 997805 1578 s8 += s16 * 136657 1579 s9 -= s16 * 683901 1580 s16 = 0 1581 1582 s3 += s15 * 666643 1583 s4 += s15 * 470296 1584 s5 += s15 * 654183 1585 s6 -= s15 * 997805 1586 s7 += s15 * 136657 1587 s8 -= s15 * 683901 1588 s15 = 0 1589 1590 s2 += s14 * 666643 1591 s3 += s14 * 470296 1592 s4 += s14 * 654183 1593 s5 -= s14 * 997805 1594 s6 += s14 * 136657 1595 s7 -= s14 * 683901 1596 s14 = 0 1597 1598 s1 += s13 * 666643 1599 s2 += s13 * 470296 1600 s3 += s13 * 654183 1601 s4 -= s13 * 997805 1602 s5 += s13 * 136657 1603 s6 -= s13 * 683901 1604 s13 = 0 1605 1606 s0 += s12 * 666643 1607 s1 += s12 * 470296 1608 s2 += s12 * 654183 1609 s3 -= s12 * 997805 1610 s4 += s12 * 136657 1611 s5 -= s12 * 683901 1612 s12 = 0 1613 1614 carry[0] = (s0 + (1 << 20)) >> 21 1615 s1 += carry[0] 1616 s0 -= carry[0] << 21 1617 carry[2] = (s2 + (1 << 20)) >> 21 1618 s3 += carry[2] 1619 s2 -= carry[2] << 21 1620 carry[4] = (s4 + (1 << 20)) >> 21 1621 s5 += carry[4] 1622 s4 -= carry[4] << 21 1623 carry[6] = (s6 + (1 << 20)) >> 21 1624 s7 += carry[6] 1625 s6 -= carry[6] << 21 1626 carry[8] = (s8 + (1 << 20)) >> 21 1627 s9 += carry[8] 1628 s8 -= carry[8] << 21 1629 carry[10] = (s10 + (1 << 20)) >> 21 1630 s11 += carry[10] 1631 s10 -= carry[10] << 21 1632 1633 carry[1] = (s1 + (1 << 20)) >> 21 1634 s2 += carry[1] 1635 s1 -= carry[1] << 21 1636 carry[3] = (s3 + (1 << 20)) >> 21 1637 s4 += carry[3] 1638 s3 -= carry[3] << 21 1639 carry[5] = (s5 + (1 << 20)) >> 21 1640 s6 += carry[5] 1641 s5 -= carry[5] << 21 1642 carry[7] = (s7 + (1 << 20)) >> 21 1643 s8 += carry[7] 1644 s7 -= carry[7] << 21 1645 carry[9] = (s9 + (1 << 20)) >> 21 1646 s10 += carry[9] 1647 s9 -= carry[9] << 21 1648 carry[11] = (s11 + (1 << 20)) >> 21 1649 s12 += carry[11] 1650 s11 -= carry[11] << 21 1651 1652 s0 += s12 * 666643 1653 s1 += s12 * 470296 1654 s2 += s12 * 654183 1655 s3 -= s12 * 997805 1656 s4 += s12 * 136657 1657 s5 -= s12 * 683901 1658 s12 = 0 1659 1660 carry[0] = s0 >> 21 1661 s1 += carry[0] 1662 s0 -= carry[0] << 21 1663 carry[1] = s1 >> 21 1664 s2 += carry[1] 1665 s1 -= carry[1] << 21 1666 carry[2] = s2 >> 21 1667 s3 += carry[2] 1668 s2 -= carry[2] << 21 1669 carry[3] = s3 >> 21 1670 s4 += carry[3] 1671 s3 -= carry[3] << 21 1672 carry[4] = s4 >> 21 1673 s5 += carry[4] 1674 s4 -= carry[4] << 21 1675 carry[5] = s5 >> 21 1676 s6 += carry[5] 1677 s5 -= carry[5] << 21 1678 carry[6] = s6 >> 21 1679 s7 += carry[6] 1680 s6 -= carry[6] << 21 1681 carry[7] = s7 >> 21 1682 s8 += carry[7] 1683 s7 -= carry[7] << 21 1684 carry[8] = s8 >> 21 1685 s9 += carry[8] 1686 s8 -= carry[8] << 21 1687 carry[9] = s9 >> 21 1688 s10 += carry[9] 1689 s9 -= carry[9] << 21 1690 carry[10] = s10 >> 21 1691 s11 += carry[10] 1692 s10 -= carry[10] << 21 1693 carry[11] = s11 >> 21 1694 s12 += carry[11] 1695 s11 -= carry[11] << 21 1696 1697 s0 += s12 * 666643 1698 s1 += s12 * 470296 1699 s2 += s12 * 654183 1700 s3 -= s12 * 997805 1701 s4 += s12 * 136657 1702 s5 -= s12 * 683901 1703 s12 = 0 1704 1705 carry[0] = s0 >> 21 1706 s1 += carry[0] 1707 s0 -= carry[0] << 21 1708 carry[1] = s1 >> 21 1709 s2 += carry[1] 1710 s1 -= carry[1] << 21 1711 carry[2] = s2 >> 21 1712 s3 += carry[2] 1713 s2 -= carry[2] << 21 1714 carry[3] = s3 >> 21 1715 s4 += carry[3] 1716 s3 -= carry[3] << 21 1717 carry[4] = s4 >> 21 1718 s5 += carry[4] 1719 s4 -= carry[4] << 21 1720 carry[5] = s5 >> 21 1721 s6 += carry[5] 1722 s5 -= carry[5] << 21 1723 carry[6] = s6 >> 21 1724 s7 += carry[6] 1725 s6 -= carry[6] << 21 1726 carry[7] = s7 >> 21 1727 s8 += carry[7] 1728 s7 -= carry[7] << 21 1729 carry[8] = s8 >> 21 1730 s9 += carry[8] 1731 s8 -= carry[8] << 21 1732 carry[9] = s9 >> 21 1733 s10 += carry[9] 1734 s9 -= carry[9] << 21 1735 carry[10] = s10 >> 21 1736 s11 += carry[10] 1737 s10 -= carry[10] << 21 1738 1739 out[0] = byte(s0 >> 0) 1740 out[1] = byte(s0 >> 8) 1741 out[2] = byte((s0 >> 16) | (s1 << 5)) 1742 out[3] = byte(s1 >> 3) 1743 out[4] = byte(s1 >> 11) 1744 out[5] = byte((s1 >> 19) | (s2 << 2)) 1745 out[6] = byte(s2 >> 6) 1746 out[7] = byte((s2 >> 14) | (s3 << 7)) 1747 out[8] = byte(s3 >> 1) 1748 out[9] = byte(s3 >> 9) 1749 out[10] = byte((s3 >> 17) | (s4 << 4)) 1750 out[11] = byte(s4 >> 4) 1751 out[12] = byte(s4 >> 12) 1752 out[13] = byte((s4 >> 20) | (s5 << 1)) 1753 out[14] = byte(s5 >> 7) 1754 out[15] = byte((s5 >> 15) | (s6 << 6)) 1755 out[16] = byte(s6 >> 2) 1756 out[17] = byte(s6 >> 10) 1757 out[18] = byte((s6 >> 18) | (s7 << 3)) 1758 out[19] = byte(s7 >> 5) 1759 out[20] = byte(s7 >> 13) 1760 out[21] = byte(s8 >> 0) 1761 out[22] = byte(s8 >> 8) 1762 out[23] = byte((s8 >> 16) | (s9 << 5)) 1763 out[24] = byte(s9 >> 3) 1764 out[25] = byte(s9 >> 11) 1765 out[26] = byte((s9 >> 19) | (s10 << 2)) 1766 out[27] = byte(s10 >> 6) 1767 out[28] = byte((s10 >> 14) | (s11 << 7)) 1768 out[29] = byte(s11 >> 1) 1769 out[30] = byte(s11 >> 9) 1770 out[31] = byte(s11 >> 17) 1771 }