github.com/condensat/bank-core@v0.1.0/security/internal/edwards25519/edwards25519.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package edwards25519
     6  
     7  import "encoding/binary"
     8  
     9  // This code is a port of the public domain, “ref10” implementation of ed25519
    10  // from SUPERCOP.
    11  
    12  // FieldElement represents an element of the field GF(2^255 - 19).  An element
    13  // t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
    14  // t[3]+2^102 t[4]+...+2^230 t[9].  Bounds on each t[i] vary depending on
    15  // context.
    16  type FieldElement [10]int32
    17  
    18  var zero FieldElement
    19  
    20  func FeZero(fe *FieldElement) {
    21  	copy(fe[:], zero[:])
    22  }
    23  
    24  func FeOne(fe *FieldElement) {
    25  	FeZero(fe)
    26  	fe[0] = 1
    27  }
    28  
    29  func FeAdd(dst, a, b *FieldElement) {
    30  	dst[0] = a[0] + b[0]
    31  	dst[1] = a[1] + b[1]
    32  	dst[2] = a[2] + b[2]
    33  	dst[3] = a[3] + b[3]
    34  	dst[4] = a[4] + b[4]
    35  	dst[5] = a[5] + b[5]
    36  	dst[6] = a[6] + b[6]
    37  	dst[7] = a[7] + b[7]
    38  	dst[8] = a[8] + b[8]
    39  	dst[9] = a[9] + b[9]
    40  }
    41  
    42  func FeSub(dst, a, b *FieldElement) {
    43  	dst[0] = a[0] - b[0]
    44  	dst[1] = a[1] - b[1]
    45  	dst[2] = a[2] - b[2]
    46  	dst[3] = a[3] - b[3]
    47  	dst[4] = a[4] - b[4]
    48  	dst[5] = a[5] - b[5]
    49  	dst[6] = a[6] - b[6]
    50  	dst[7] = a[7] - b[7]
    51  	dst[8] = a[8] - b[8]
    52  	dst[9] = a[9] - b[9]
    53  }
    54  
    55  func FeCopy(dst, src *FieldElement) {
    56  	copy(dst[:], src[:])
    57  }
    58  
    59  // Replace (f,g) with (g,g) if b == 1;
    60  // replace (f,g) with (f,g) if b == 0.
    61  //
    62  // Preconditions: b in {0,1}.
    63  func FeCMove(f, g *FieldElement, b int32) {
    64  	b = -b
    65  	f[0] ^= b & (f[0] ^ g[0])
    66  	f[1] ^= b & (f[1] ^ g[1])
    67  	f[2] ^= b & (f[2] ^ g[2])
    68  	f[3] ^= b & (f[3] ^ g[3])
    69  	f[4] ^= b & (f[4] ^ g[4])
    70  	f[5] ^= b & (f[5] ^ g[5])
    71  	f[6] ^= b & (f[6] ^ g[6])
    72  	f[7] ^= b & (f[7] ^ g[7])
    73  	f[8] ^= b & (f[8] ^ g[8])
    74  	f[9] ^= b & (f[9] ^ g[9])
    75  }
    76  
    77  func load3(in []byte) int64 {
    78  	var r int64
    79  	r = int64(in[0])
    80  	r |= int64(in[1]) << 8
    81  	r |= int64(in[2]) << 16
    82  	return r
    83  }
    84  
    85  func load4(in []byte) int64 {
    86  	var r int64
    87  	r = int64(in[0])
    88  	r |= int64(in[1]) << 8
    89  	r |= int64(in[2]) << 16
    90  	r |= int64(in[3]) << 24
    91  	return r
    92  }
    93  
    94  func FeFromBytes(dst *FieldElement, src *[32]byte) {
    95  	h0 := load4(src[:])
    96  	h1 := load3(src[4:]) << 6
    97  	h2 := load3(src[7:]) << 5
    98  	h3 := load3(src[10:]) << 3
    99  	h4 := load3(src[13:]) << 2
   100  	h5 := load4(src[16:])
   101  	h6 := load3(src[20:]) << 7
   102  	h7 := load3(src[23:]) << 5
   103  	h8 := load3(src[26:]) << 4
   104  	h9 := (load3(src[29:]) & 8388607) << 2
   105  
   106  	FeCombine(dst, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
   107  }
   108  
   109  // FeToBytes marshals h to s.
   110  // Preconditions:
   111  //   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   112  //
   113  // Write p=2^255-19; q=floor(h/p).
   114  // Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
   115  //
   116  // Proof:
   117  //   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
   118  //   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
   119  //
   120  //   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
   121  //   Then 0<y<1.
   122  //
   123  //   Write r=h-pq.
   124  //   Have 0<=r<=p-1=2^255-20.
   125  //   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
   126  //
   127  //   Write x=r+19(2^-255)r+y.
   128  //   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
   129  //
   130  //   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
   131  //   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
   132  func FeToBytes(s *[32]byte, h *FieldElement) {
   133  	var carry [10]int32
   134  
   135  	q := (19*h[9] + (1 << 24)) >> 25
   136  	q = (h[0] + q) >> 26
   137  	q = (h[1] + q) >> 25
   138  	q = (h[2] + q) >> 26
   139  	q = (h[3] + q) >> 25
   140  	q = (h[4] + q) >> 26
   141  	q = (h[5] + q) >> 25
   142  	q = (h[6] + q) >> 26
   143  	q = (h[7] + q) >> 25
   144  	q = (h[8] + q) >> 26
   145  	q = (h[9] + q) >> 25
   146  
   147  	// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
   148  	h[0] += 19 * q
   149  	// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
   150  
   151  	carry[0] = h[0] >> 26
   152  	h[1] += carry[0]
   153  	h[0] -= carry[0] << 26
   154  	carry[1] = h[1] >> 25
   155  	h[2] += carry[1]
   156  	h[1] -= carry[1] << 25
   157  	carry[2] = h[2] >> 26
   158  	h[3] += carry[2]
   159  	h[2] -= carry[2] << 26
   160  	carry[3] = h[3] >> 25
   161  	h[4] += carry[3]
   162  	h[3] -= carry[3] << 25
   163  	carry[4] = h[4] >> 26
   164  	h[5] += carry[4]
   165  	h[4] -= carry[4] << 26
   166  	carry[5] = h[5] >> 25
   167  	h[6] += carry[5]
   168  	h[5] -= carry[5] << 25
   169  	carry[6] = h[6] >> 26
   170  	h[7] += carry[6]
   171  	h[6] -= carry[6] << 26
   172  	carry[7] = h[7] >> 25
   173  	h[8] += carry[7]
   174  	h[7] -= carry[7] << 25
   175  	carry[8] = h[8] >> 26
   176  	h[9] += carry[8]
   177  	h[8] -= carry[8] << 26
   178  	carry[9] = h[9] >> 25
   179  	h[9] -= carry[9] << 25
   180  	// h10 = carry9
   181  
   182  	// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
   183  	// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
   184  	// evidently 2^255 h10-2^255 q = 0.
   185  	// Goal: Output h[0]+...+2^230 h[9].
   186  
   187  	s[0] = byte(h[0] >> 0)
   188  	s[1] = byte(h[0] >> 8)
   189  	s[2] = byte(h[0] >> 16)
   190  	s[3] = byte((h[0] >> 24) | (h[1] << 2))
   191  	s[4] = byte(h[1] >> 6)
   192  	s[5] = byte(h[1] >> 14)
   193  	s[6] = byte((h[1] >> 22) | (h[2] << 3))
   194  	s[7] = byte(h[2] >> 5)
   195  	s[8] = byte(h[2] >> 13)
   196  	s[9] = byte((h[2] >> 21) | (h[3] << 5))
   197  	s[10] = byte(h[3] >> 3)
   198  	s[11] = byte(h[3] >> 11)
   199  	s[12] = byte((h[3] >> 19) | (h[4] << 6))
   200  	s[13] = byte(h[4] >> 2)
   201  	s[14] = byte(h[4] >> 10)
   202  	s[15] = byte(h[4] >> 18)
   203  	s[16] = byte(h[5] >> 0)
   204  	s[17] = byte(h[5] >> 8)
   205  	s[18] = byte(h[5] >> 16)
   206  	s[19] = byte((h[5] >> 24) | (h[6] << 1))
   207  	s[20] = byte(h[6] >> 7)
   208  	s[21] = byte(h[6] >> 15)
   209  	s[22] = byte((h[6] >> 23) | (h[7] << 3))
   210  	s[23] = byte(h[7] >> 5)
   211  	s[24] = byte(h[7] >> 13)
   212  	s[25] = byte((h[7] >> 21) | (h[8] << 4))
   213  	s[26] = byte(h[8] >> 4)
   214  	s[27] = byte(h[8] >> 12)
   215  	s[28] = byte((h[8] >> 20) | (h[9] << 6))
   216  	s[29] = byte(h[9] >> 2)
   217  	s[30] = byte(h[9] >> 10)
   218  	s[31] = byte(h[9] >> 18)
   219  }
   220  
   221  func FeIsNegative(f *FieldElement) byte {
   222  	var s [32]byte
   223  	FeToBytes(&s, f)
   224  	return s[0] & 1
   225  }
   226  
   227  func FeIsNonZero(f *FieldElement) int32 {
   228  	var s [32]byte
   229  	FeToBytes(&s, f)
   230  	var x uint8
   231  	for _, b := range s {
   232  		x |= b
   233  	}
   234  	x |= x >> 4
   235  	x |= x >> 2
   236  	x |= x >> 1
   237  	return int32(x & 1)
   238  }
   239  
   240  // FeNeg sets h = -f
   241  //
   242  // Preconditions:
   243  //    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   244  //
   245  // Postconditions:
   246  //    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   247  func FeNeg(h, f *FieldElement) {
   248  	h[0] = -f[0]
   249  	h[1] = -f[1]
   250  	h[2] = -f[2]
   251  	h[3] = -f[3]
   252  	h[4] = -f[4]
   253  	h[5] = -f[5]
   254  	h[6] = -f[6]
   255  	h[7] = -f[7]
   256  	h[8] = -f[8]
   257  	h[9] = -f[9]
   258  }
   259  
   260  func FeCombine(h *FieldElement, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 int64) {
   261  	var c0, c1, c2, c3, c4, c5, c6, c7, c8, c9 int64
   262  
   263  	/*
   264  	  |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
   265  	    i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
   266  	  |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
   267  	    i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
   268  	*/
   269  
   270  	c0 = (h0 + (1 << 25)) >> 26
   271  	h1 += c0
   272  	h0 -= c0 << 26
   273  	c4 = (h4 + (1 << 25)) >> 26
   274  	h5 += c4
   275  	h4 -= c4 << 26
   276  	/* |h0| <= 2^25 */
   277  	/* |h4| <= 2^25 */
   278  	/* |h1| <= 1.51*2^58 */
   279  	/* |h5| <= 1.51*2^58 */
   280  
   281  	c1 = (h1 + (1 << 24)) >> 25
   282  	h2 += c1
   283  	h1 -= c1 << 25
   284  	c5 = (h5 + (1 << 24)) >> 25
   285  	h6 += c5
   286  	h5 -= c5 << 25
   287  	/* |h1| <= 2^24; from now on fits into int32 */
   288  	/* |h5| <= 2^24; from now on fits into int32 */
   289  	/* |h2| <= 1.21*2^59 */
   290  	/* |h6| <= 1.21*2^59 */
   291  
   292  	c2 = (h2 + (1 << 25)) >> 26
   293  	h3 += c2
   294  	h2 -= c2 << 26
   295  	c6 = (h6 + (1 << 25)) >> 26
   296  	h7 += c6
   297  	h6 -= c6 << 26
   298  	/* |h2| <= 2^25; from now on fits into int32 unchanged */
   299  	/* |h6| <= 2^25; from now on fits into int32 unchanged */
   300  	/* |h3| <= 1.51*2^58 */
   301  	/* |h7| <= 1.51*2^58 */
   302  
   303  	c3 = (h3 + (1 << 24)) >> 25
   304  	h4 += c3
   305  	h3 -= c3 << 25
   306  	c7 = (h7 + (1 << 24)) >> 25
   307  	h8 += c7
   308  	h7 -= c7 << 25
   309  	/* |h3| <= 2^24; from now on fits into int32 unchanged */
   310  	/* |h7| <= 2^24; from now on fits into int32 unchanged */
   311  	/* |h4| <= 1.52*2^33 */
   312  	/* |h8| <= 1.52*2^33 */
   313  
   314  	c4 = (h4 + (1 << 25)) >> 26
   315  	h5 += c4
   316  	h4 -= c4 << 26
   317  	c8 = (h8 + (1 << 25)) >> 26
   318  	h9 += c8
   319  	h8 -= c8 << 26
   320  	/* |h4| <= 2^25; from now on fits into int32 unchanged */
   321  	/* |h8| <= 2^25; from now on fits into int32 unchanged */
   322  	/* |h5| <= 1.01*2^24 */
   323  	/* |h9| <= 1.51*2^58 */
   324  
   325  	c9 = (h9 + (1 << 24)) >> 25
   326  	h0 += c9 * 19
   327  	h9 -= c9 << 25
   328  	/* |h9| <= 2^24; from now on fits into int32 unchanged */
   329  	/* |h0| <= 1.8*2^37 */
   330  
   331  	c0 = (h0 + (1 << 25)) >> 26
   332  	h1 += c0
   333  	h0 -= c0 << 26
   334  	/* |h0| <= 2^25; from now on fits into int32 unchanged */
   335  	/* |h1| <= 1.01*2^24 */
   336  
   337  	h[0] = int32(h0)
   338  	h[1] = int32(h1)
   339  	h[2] = int32(h2)
   340  	h[3] = int32(h3)
   341  	h[4] = int32(h4)
   342  	h[5] = int32(h5)
   343  	h[6] = int32(h6)
   344  	h[7] = int32(h7)
   345  	h[8] = int32(h8)
   346  	h[9] = int32(h9)
   347  }
   348  
   349  // FeMul calculates h = f * g
   350  // Can overlap h with f or g.
   351  //
   352  // Preconditions:
   353  //    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
   354  //    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
   355  //
   356  // Postconditions:
   357  //    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   358  //
   359  // Notes on implementation strategy:
   360  //
   361  // Using schoolbook multiplication.
   362  // Karatsuba would save a little in some cost models.
   363  //
   364  // Most multiplications by 2 and 19 are 32-bit precomputations;
   365  // cheaper than 64-bit postcomputations.
   366  //
   367  // There is one remaining multiplication by 19 in the carry chain;
   368  // one *19 precomputation can be merged into this,
   369  // but the resulting data flow is considerably less clean.
   370  //
   371  // There are 12 carries below.
   372  // 10 of them are 2-way parallelizable and vectorizable.
   373  // Can get away with 11 carries, but then data flow is much deeper.
   374  //
   375  // With tighter constraints on inputs, can squeeze carries into int32.
   376  func FeMul(h, f, g *FieldElement) {
   377  	f0 := int64(f[0])
   378  	f1 := int64(f[1])
   379  	f2 := int64(f[2])
   380  	f3 := int64(f[3])
   381  	f4 := int64(f[4])
   382  	f5 := int64(f[5])
   383  	f6 := int64(f[6])
   384  	f7 := int64(f[7])
   385  	f8 := int64(f[8])
   386  	f9 := int64(f[9])
   387  
   388  	f1_2 := int64(2 * f[1])
   389  	f3_2 := int64(2 * f[3])
   390  	f5_2 := int64(2 * f[5])
   391  	f7_2 := int64(2 * f[7])
   392  	f9_2 := int64(2 * f[9])
   393  
   394  	g0 := int64(g[0])
   395  	g1 := int64(g[1])
   396  	g2 := int64(g[2])
   397  	g3 := int64(g[3])
   398  	g4 := int64(g[4])
   399  	g5 := int64(g[5])
   400  	g6 := int64(g[6])
   401  	g7 := int64(g[7])
   402  	g8 := int64(g[8])
   403  	g9 := int64(g[9])
   404  
   405  	g1_19 := int64(19 * g[1]) /* 1.4*2^29 */
   406  	g2_19 := int64(19 * g[2]) /* 1.4*2^30; still ok */
   407  	g3_19 := int64(19 * g[3])
   408  	g4_19 := int64(19 * g[4])
   409  	g5_19 := int64(19 * g[5])
   410  	g6_19 := int64(19 * g[6])
   411  	g7_19 := int64(19 * g[7])
   412  	g8_19 := int64(19 * g[8])
   413  	g9_19 := int64(19 * g[9])
   414  
   415  	h0 := f0*g0 + f1_2*g9_19 + f2*g8_19 + f3_2*g7_19 + f4*g6_19 + f5_2*g5_19 + f6*g4_19 + f7_2*g3_19 + f8*g2_19 + f9_2*g1_19
   416  	h1 := f0*g1 + f1*g0 + f2*g9_19 + f3*g8_19 + f4*g7_19 + f5*g6_19 + f6*g5_19 + f7*g4_19 + f8*g3_19 + f9*g2_19
   417  	h2 := f0*g2 + f1_2*g1 + f2*g0 + f3_2*g9_19 + f4*g8_19 + f5_2*g7_19 + f6*g6_19 + f7_2*g5_19 + f8*g4_19 + f9_2*g3_19
   418  	h3 := f0*g3 + f1*g2 + f2*g1 + f3*g0 + f4*g9_19 + f5*g8_19 + f6*g7_19 + f7*g6_19 + f8*g5_19 + f9*g4_19
   419  	h4 := f0*g4 + f1_2*g3 + f2*g2 + f3_2*g1 + f4*g0 + f5_2*g9_19 + f6*g8_19 + f7_2*g7_19 + f8*g6_19 + f9_2*g5_19
   420  	h5 := f0*g5 + f1*g4 + f2*g3 + f3*g2 + f4*g1 + f5*g0 + f6*g9_19 + f7*g8_19 + f8*g7_19 + f9*g6_19
   421  	h6 := f0*g6 + f1_2*g5 + f2*g4 + f3_2*g3 + f4*g2 + f5_2*g1 + f6*g0 + f7_2*g9_19 + f8*g8_19 + f9_2*g7_19
   422  	h7 := f0*g7 + f1*g6 + f2*g5 + f3*g4 + f4*g3 + f5*g2 + f6*g1 + f7*g0 + f8*g9_19 + f9*g8_19
   423  	h8 := f0*g8 + f1_2*g7 + f2*g6 + f3_2*g5 + f4*g4 + f5_2*g3 + f6*g2 + f7_2*g1 + f8*g0 + f9_2*g9_19
   424  	h9 := f0*g9 + f1*g8 + f2*g7 + f3*g6 + f4*g5 + f5*g4 + f6*g3 + f7*g2 + f8*g1 + f9*g0
   425  
   426  	FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
   427  }
   428  
   429  func feSquare(f *FieldElement) (h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 int64) {
   430  	f0 := int64(f[0])
   431  	f1 := int64(f[1])
   432  	f2 := int64(f[2])
   433  	f3 := int64(f[3])
   434  	f4 := int64(f[4])
   435  	f5 := int64(f[5])
   436  	f6 := int64(f[6])
   437  	f7 := int64(f[7])
   438  	f8 := int64(f[8])
   439  	f9 := int64(f[9])
   440  	f0_2 := int64(2 * f[0])
   441  	f1_2 := int64(2 * f[1])
   442  	f2_2 := int64(2 * f[2])
   443  	f3_2 := int64(2 * f[3])
   444  	f4_2 := int64(2 * f[4])
   445  	f5_2 := int64(2 * f[5])
   446  	f6_2 := int64(2 * f[6])
   447  	f7_2 := int64(2 * f[7])
   448  	f5_38 := 38 * f5 // 1.31*2^30
   449  	f6_19 := 19 * f6 // 1.31*2^30
   450  	f7_38 := 38 * f7 // 1.31*2^30
   451  	f8_19 := 19 * f8 // 1.31*2^30
   452  	f9_38 := 38 * f9 // 1.31*2^30
   453  
   454  	h0 = f0*f0 + f1_2*f9_38 + f2_2*f8_19 + f3_2*f7_38 + f4_2*f6_19 + f5*f5_38
   455  	h1 = f0_2*f1 + f2*f9_38 + f3_2*f8_19 + f4*f7_38 + f5_2*f6_19
   456  	h2 = f0_2*f2 + f1_2*f1 + f3_2*f9_38 + f4_2*f8_19 + f5_2*f7_38 + f6*f6_19
   457  	h3 = f0_2*f3 + f1_2*f2 + f4*f9_38 + f5_2*f8_19 + f6*f7_38
   458  	h4 = f0_2*f4 + f1_2*f3_2 + f2*f2 + f5_2*f9_38 + f6_2*f8_19 + f7*f7_38
   459  	h5 = f0_2*f5 + f1_2*f4 + f2_2*f3 + f6*f9_38 + f7_2*f8_19
   460  	h6 = f0_2*f6 + f1_2*f5_2 + f2_2*f4 + f3_2*f3 + f7_2*f9_38 + f8*f8_19
   461  	h7 = f0_2*f7 + f1_2*f6 + f2_2*f5 + f3_2*f4 + f8*f9_38
   462  	h8 = f0_2*f8 + f1_2*f7_2 + f2_2*f6 + f3_2*f5_2 + f4*f4 + f9*f9_38
   463  	h9 = f0_2*f9 + f1_2*f8 + f2_2*f7 + f3_2*f6 + f4_2*f5
   464  
   465  	return
   466  }
   467  
   468  // FeSquare calculates h = f*f. Can overlap h with f.
   469  //
   470  // Preconditions:
   471  //    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
   472  //
   473  // Postconditions:
   474  //    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   475  func FeSquare(h, f *FieldElement) {
   476  	h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 := feSquare(f)
   477  	FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
   478  }
   479  
   480  // FeSquare2 sets h = 2 * f * f
   481  //
   482  // Can overlap h with f.
   483  //
   484  // Preconditions:
   485  //    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
   486  //
   487  // Postconditions:
   488  //    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
   489  // See fe_mul.c for discussion of implementation strategy.
   490  func FeSquare2(h, f *FieldElement) {
   491  	h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 := feSquare(f)
   492  
   493  	h0 += h0
   494  	h1 += h1
   495  	h2 += h2
   496  	h3 += h3
   497  	h4 += h4
   498  	h5 += h5
   499  	h6 += h6
   500  	h7 += h7
   501  	h8 += h8
   502  	h9 += h9
   503  
   504  	FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
   505  }
   506  
   507  func FeInvert(out, z *FieldElement) {
   508  	var t0, t1, t2, t3 FieldElement
   509  	var i int
   510  
   511  	FeSquare(&t0, z)        // 2^1
   512  	FeSquare(&t1, &t0)      // 2^2
   513  	for i = 1; i < 2; i++ { // 2^3
   514  		FeSquare(&t1, &t1)
   515  	}
   516  	FeMul(&t1, z, &t1)      // 2^3 + 2^0
   517  	FeMul(&t0, &t0, &t1)    // 2^3 + 2^1 + 2^0
   518  	FeSquare(&t2, &t0)      // 2^4 + 2^2 + 2^1
   519  	FeMul(&t1, &t1, &t2)    // 2^4 + 2^3 + 2^2 + 2^1 + 2^0
   520  	FeSquare(&t2, &t1)      // 5,4,3,2,1
   521  	for i = 1; i < 5; i++ { // 9,8,7,6,5
   522  		FeSquare(&t2, &t2)
   523  	}
   524  	FeMul(&t1, &t2, &t1)     // 9,8,7,6,5,4,3,2,1,0
   525  	FeSquare(&t2, &t1)       // 10..1
   526  	for i = 1; i < 10; i++ { // 19..10
   527  		FeSquare(&t2, &t2)
   528  	}
   529  	FeMul(&t2, &t2, &t1)     // 19..0
   530  	FeSquare(&t3, &t2)       // 20..1
   531  	for i = 1; i < 20; i++ { // 39..20
   532  		FeSquare(&t3, &t3)
   533  	}
   534  	FeMul(&t2, &t3, &t2)     // 39..0
   535  	FeSquare(&t2, &t2)       // 40..1
   536  	for i = 1; i < 10; i++ { // 49..10
   537  		FeSquare(&t2, &t2)
   538  	}
   539  	FeMul(&t1, &t2, &t1)     // 49..0
   540  	FeSquare(&t2, &t1)       // 50..1
   541  	for i = 1; i < 50; i++ { // 99..50
   542  		FeSquare(&t2, &t2)
   543  	}
   544  	FeMul(&t2, &t2, &t1)      // 99..0
   545  	FeSquare(&t3, &t2)        // 100..1
   546  	for i = 1; i < 100; i++ { // 199..100
   547  		FeSquare(&t3, &t3)
   548  	}
   549  	FeMul(&t2, &t3, &t2)     // 199..0
   550  	FeSquare(&t2, &t2)       // 200..1
   551  	for i = 1; i < 50; i++ { // 249..50
   552  		FeSquare(&t2, &t2)
   553  	}
   554  	FeMul(&t1, &t2, &t1)    // 249..0
   555  	FeSquare(&t1, &t1)      // 250..1
   556  	for i = 1; i < 5; i++ { // 254..5
   557  		FeSquare(&t1, &t1)
   558  	}
   559  	FeMul(out, &t1, &t0) // 254..5,3,1,0
   560  }
   561  
   562  func fePow22523(out, z *FieldElement) {
   563  	var t0, t1, t2 FieldElement
   564  	var i int
   565  
   566  	FeSquare(&t0, z)
   567  	for i = 1; i < 1; i++ {
   568  		FeSquare(&t0, &t0)
   569  	}
   570  	FeSquare(&t1, &t0)
   571  	for i = 1; i < 2; i++ {
   572  		FeSquare(&t1, &t1)
   573  	}
   574  	FeMul(&t1, z, &t1)
   575  	FeMul(&t0, &t0, &t1)
   576  	FeSquare(&t0, &t0)
   577  	for i = 1; i < 1; i++ {
   578  		FeSquare(&t0, &t0)
   579  	}
   580  	FeMul(&t0, &t1, &t0)
   581  	FeSquare(&t1, &t0)
   582  	for i = 1; i < 5; i++ {
   583  		FeSquare(&t1, &t1)
   584  	}
   585  	FeMul(&t0, &t1, &t0)
   586  	FeSquare(&t1, &t0)
   587  	for i = 1; i < 10; i++ {
   588  		FeSquare(&t1, &t1)
   589  	}
   590  	FeMul(&t1, &t1, &t0)
   591  	FeSquare(&t2, &t1)
   592  	for i = 1; i < 20; i++ {
   593  		FeSquare(&t2, &t2)
   594  	}
   595  	FeMul(&t1, &t2, &t1)
   596  	FeSquare(&t1, &t1)
   597  	for i = 1; i < 10; i++ {
   598  		FeSquare(&t1, &t1)
   599  	}
   600  	FeMul(&t0, &t1, &t0)
   601  	FeSquare(&t1, &t0)
   602  	for i = 1; i < 50; i++ {
   603  		FeSquare(&t1, &t1)
   604  	}
   605  	FeMul(&t1, &t1, &t0)
   606  	FeSquare(&t2, &t1)
   607  	for i = 1; i < 100; i++ {
   608  		FeSquare(&t2, &t2)
   609  	}
   610  	FeMul(&t1, &t2, &t1)
   611  	FeSquare(&t1, &t1)
   612  	for i = 1; i < 50; i++ {
   613  		FeSquare(&t1, &t1)
   614  	}
   615  	FeMul(&t0, &t1, &t0)
   616  	FeSquare(&t0, &t0)
   617  	for i = 1; i < 2; i++ {
   618  		FeSquare(&t0, &t0)
   619  	}
   620  	FeMul(out, &t0, z)
   621  }
   622  
   623  // Group elements are members of the elliptic curve -x^2 + y^2 = 1 + d * x^2 *
   624  // y^2 where d = -121665/121666.
   625  //
   626  // Several representations are used:
   627  //   ProjectiveGroupElement: (X:Y:Z) satisfying x=X/Z, y=Y/Z
   628  //   ExtendedGroupElement: (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
   629  //   CompletedGroupElement: ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
   630  //   PreComputedGroupElement: (y+x,y-x,2dxy)
   631  
   632  type ProjectiveGroupElement struct {
   633  	X, Y, Z FieldElement
   634  }
   635  
   636  type ExtendedGroupElement struct {
   637  	X, Y, Z, T FieldElement
   638  }
   639  
   640  type CompletedGroupElement struct {
   641  	X, Y, Z, T FieldElement
   642  }
   643  
   644  type PreComputedGroupElement struct {
   645  	yPlusX, yMinusX, xy2d FieldElement
   646  }
   647  
   648  type CachedGroupElement struct {
   649  	yPlusX, yMinusX, Z, T2d FieldElement
   650  }
   651  
   652  func (p *ProjectiveGroupElement) Zero() {
   653  	FeZero(&p.X)
   654  	FeOne(&p.Y)
   655  	FeOne(&p.Z)
   656  }
   657  
   658  func (p *ProjectiveGroupElement) Double(r *CompletedGroupElement) {
   659  	var t0 FieldElement
   660  
   661  	FeSquare(&r.X, &p.X)
   662  	FeSquare(&r.Z, &p.Y)
   663  	FeSquare2(&r.T, &p.Z)
   664  	FeAdd(&r.Y, &p.X, &p.Y)
   665  	FeSquare(&t0, &r.Y)
   666  	FeAdd(&r.Y, &r.Z, &r.X)
   667  	FeSub(&r.Z, &r.Z, &r.X)
   668  	FeSub(&r.X, &t0, &r.Y)
   669  	FeSub(&r.T, &r.T, &r.Z)
   670  }
   671  
   672  func (p *ProjectiveGroupElement) ToBytes(s *[32]byte) {
   673  	var recip, x, y FieldElement
   674  
   675  	FeInvert(&recip, &p.Z)
   676  	FeMul(&x, &p.X, &recip)
   677  	FeMul(&y, &p.Y, &recip)
   678  	FeToBytes(s, &y)
   679  	s[31] ^= FeIsNegative(&x) << 7
   680  }
   681  
   682  func (p *ExtendedGroupElement) Zero() {
   683  	FeZero(&p.X)
   684  	FeOne(&p.Y)
   685  	FeOne(&p.Z)
   686  	FeZero(&p.T)
   687  }
   688  
   689  func (p *ExtendedGroupElement) Double(r *CompletedGroupElement) {
   690  	var q ProjectiveGroupElement
   691  	p.ToProjective(&q)
   692  	q.Double(r)
   693  }
   694  
   695  func (p *ExtendedGroupElement) ToCached(r *CachedGroupElement) {
   696  	FeAdd(&r.yPlusX, &p.Y, &p.X)
   697  	FeSub(&r.yMinusX, &p.Y, &p.X)
   698  	FeCopy(&r.Z, &p.Z)
   699  	FeMul(&r.T2d, &p.T, &d2)
   700  }
   701  
   702  func (p *ExtendedGroupElement) ToProjective(r *ProjectiveGroupElement) {
   703  	FeCopy(&r.X, &p.X)
   704  	FeCopy(&r.Y, &p.Y)
   705  	FeCopy(&r.Z, &p.Z)
   706  }
   707  
   708  func (p *ExtendedGroupElement) ToBytes(s *[32]byte) {
   709  	var recip, x, y FieldElement
   710  
   711  	FeInvert(&recip, &p.Z)
   712  	FeMul(&x, &p.X, &recip)
   713  	FeMul(&y, &p.Y, &recip)
   714  	FeToBytes(s, &y)
   715  	s[31] ^= FeIsNegative(&x) << 7
   716  }
   717  
   718  func (p *ExtendedGroupElement) FromBytes(s *[32]byte) bool {
   719  	var u, v, v3, vxx, check FieldElement
   720  
   721  	FeFromBytes(&p.Y, s)
   722  	FeOne(&p.Z)
   723  	FeSquare(&u, &p.Y)
   724  	FeMul(&v, &u, &d)
   725  	FeSub(&u, &u, &p.Z) // y = y^2-1
   726  	FeAdd(&v, &v, &p.Z) // v = dy^2+1
   727  
   728  	FeSquare(&v3, &v)
   729  	FeMul(&v3, &v3, &v) // v3 = v^3
   730  	FeSquare(&p.X, &v3)
   731  	FeMul(&p.X, &p.X, &v)
   732  	FeMul(&p.X, &p.X, &u) // x = uv^7
   733  
   734  	fePow22523(&p.X, &p.X) // x = (uv^7)^((q-5)/8)
   735  	FeMul(&p.X, &p.X, &v3)
   736  	FeMul(&p.X, &p.X, &u) // x = uv^3(uv^7)^((q-5)/8)
   737  
   738  	var tmpX, tmp2 [32]byte
   739  
   740  	FeSquare(&vxx, &p.X)
   741  	FeMul(&vxx, &vxx, &v)
   742  	FeSub(&check, &vxx, &u) // vx^2-u
   743  	if FeIsNonZero(&check) == 1 {
   744  		FeAdd(&check, &vxx, &u) // vx^2+u
   745  		if FeIsNonZero(&check) == 1 {
   746  			return false
   747  		}
   748  		FeMul(&p.X, &p.X, &SqrtM1)
   749  
   750  		FeToBytes(&tmpX, &p.X)
   751  		for i, v := range tmpX {
   752  			tmp2[31-i] = v
   753  		}
   754  	}
   755  
   756  	if FeIsNegative(&p.X) != (s[31] >> 7) {
   757  		FeNeg(&p.X, &p.X)
   758  	}
   759  
   760  	FeMul(&p.T, &p.X, &p.Y)
   761  	return true
   762  }
   763  
   764  func (p *CompletedGroupElement) ToProjective(r *ProjectiveGroupElement) {
   765  	FeMul(&r.X, &p.X, &p.T)
   766  	FeMul(&r.Y, &p.Y, &p.Z)
   767  	FeMul(&r.Z, &p.Z, &p.T)
   768  }
   769  
   770  func (p *CompletedGroupElement) ToExtended(r *ExtendedGroupElement) {
   771  	FeMul(&r.X, &p.X, &p.T)
   772  	FeMul(&r.Y, &p.Y, &p.Z)
   773  	FeMul(&r.Z, &p.Z, &p.T)
   774  	FeMul(&r.T, &p.X, &p.Y)
   775  }
   776  
   777  func (p *PreComputedGroupElement) Zero() {
   778  	FeOne(&p.yPlusX)
   779  	FeOne(&p.yMinusX)
   780  	FeZero(&p.xy2d)
   781  }
   782  
   783  func geAdd(r *CompletedGroupElement, p *ExtendedGroupElement, q *CachedGroupElement) {
   784  	var t0 FieldElement
   785  
   786  	FeAdd(&r.X, &p.Y, &p.X)
   787  	FeSub(&r.Y, &p.Y, &p.X)
   788  	FeMul(&r.Z, &r.X, &q.yPlusX)
   789  	FeMul(&r.Y, &r.Y, &q.yMinusX)
   790  	FeMul(&r.T, &q.T2d, &p.T)
   791  	FeMul(&r.X, &p.Z, &q.Z)
   792  	FeAdd(&t0, &r.X, &r.X)
   793  	FeSub(&r.X, &r.Z, &r.Y)
   794  	FeAdd(&r.Y, &r.Z, &r.Y)
   795  	FeAdd(&r.Z, &t0, &r.T)
   796  	FeSub(&r.T, &t0, &r.T)
   797  }
   798  
   799  func geSub(r *CompletedGroupElement, p *ExtendedGroupElement, q *CachedGroupElement) {
   800  	var t0 FieldElement
   801  
   802  	FeAdd(&r.X, &p.Y, &p.X)
   803  	FeSub(&r.Y, &p.Y, &p.X)
   804  	FeMul(&r.Z, &r.X, &q.yMinusX)
   805  	FeMul(&r.Y, &r.Y, &q.yPlusX)
   806  	FeMul(&r.T, &q.T2d, &p.T)
   807  	FeMul(&r.X, &p.Z, &q.Z)
   808  	FeAdd(&t0, &r.X, &r.X)
   809  	FeSub(&r.X, &r.Z, &r.Y)
   810  	FeAdd(&r.Y, &r.Z, &r.Y)
   811  	FeSub(&r.Z, &t0, &r.T)
   812  	FeAdd(&r.T, &t0, &r.T)
   813  }
   814  
   815  func geMixedAdd(r *CompletedGroupElement, p *ExtendedGroupElement, q *PreComputedGroupElement) {
   816  	var t0 FieldElement
   817  
   818  	FeAdd(&r.X, &p.Y, &p.X)
   819  	FeSub(&r.Y, &p.Y, &p.X)
   820  	FeMul(&r.Z, &r.X, &q.yPlusX)
   821  	FeMul(&r.Y, &r.Y, &q.yMinusX)
   822  	FeMul(&r.T, &q.xy2d, &p.T)
   823  	FeAdd(&t0, &p.Z, &p.Z)
   824  	FeSub(&r.X, &r.Z, &r.Y)
   825  	FeAdd(&r.Y, &r.Z, &r.Y)
   826  	FeAdd(&r.Z, &t0, &r.T)
   827  	FeSub(&r.T, &t0, &r.T)
   828  }
   829  
   830  func geMixedSub(r *CompletedGroupElement, p *ExtendedGroupElement, q *PreComputedGroupElement) {
   831  	var t0 FieldElement
   832  
   833  	FeAdd(&r.X, &p.Y, &p.X)
   834  	FeSub(&r.Y, &p.Y, &p.X)
   835  	FeMul(&r.Z, &r.X, &q.yMinusX)
   836  	FeMul(&r.Y, &r.Y, &q.yPlusX)
   837  	FeMul(&r.T, &q.xy2d, &p.T)
   838  	FeAdd(&t0, &p.Z, &p.Z)
   839  	FeSub(&r.X, &r.Z, &r.Y)
   840  	FeAdd(&r.Y, &r.Z, &r.Y)
   841  	FeSub(&r.Z, &t0, &r.T)
   842  	FeAdd(&r.T, &t0, &r.T)
   843  }
   844  
   845  func slide(r *[256]int8, a *[32]byte) {
   846  	for i := range r {
   847  		r[i] = int8(1 & (a[i>>3] >> uint(i&7)))
   848  	}
   849  
   850  	for i := range r {
   851  		if r[i] != 0 {
   852  			for b := 1; b <= 6 && i+b < 256; b++ {
   853  				if r[i+b] != 0 {
   854  					if r[i]+(r[i+b]<<uint(b)) <= 15 {
   855  						r[i] += r[i+b] << uint(b)
   856  						r[i+b] = 0
   857  					} else if r[i]-(r[i+b]<<uint(b)) >= -15 {
   858  						r[i] -= r[i+b] << uint(b)
   859  						for k := i + b; k < 256; k++ {
   860  							if r[k] == 0 {
   861  								r[k] = 1
   862  								break
   863  							}
   864  							r[k] = 0
   865  						}
   866  					} else {
   867  						break
   868  					}
   869  				}
   870  			}
   871  		}
   872  	}
   873  }
   874  
   875  // GeDoubleScalarMultVartime sets r = a*A + b*B
   876  // where a = a[0]+256*a[1]+...+256^31 a[31].
   877  // and b = b[0]+256*b[1]+...+256^31 b[31].
   878  // B is the Ed25519 base point (x,4/5) with x positive.
   879  func GeDoubleScalarMultVartime(r *ProjectiveGroupElement, a *[32]byte, A *ExtendedGroupElement, b *[32]byte) {
   880  	var aSlide, bSlide [256]int8
   881  	var Ai [8]CachedGroupElement // A,3A,5A,7A,9A,11A,13A,15A
   882  	var t CompletedGroupElement
   883  	var u, A2 ExtendedGroupElement
   884  	var i int
   885  
   886  	slide(&aSlide, a)
   887  	slide(&bSlide, b)
   888  
   889  	A.ToCached(&Ai[0])
   890  	A.Double(&t)
   891  	t.ToExtended(&A2)
   892  
   893  	for i := 0; i < 7; i++ {
   894  		geAdd(&t, &A2, &Ai[i])
   895  		t.ToExtended(&u)
   896  		u.ToCached(&Ai[i+1])
   897  	}
   898  
   899  	r.Zero()
   900  
   901  	for i = 255; i >= 0; i-- {
   902  		if aSlide[i] != 0 || bSlide[i] != 0 {
   903  			break
   904  		}
   905  	}
   906  
   907  	for ; i >= 0; i-- {
   908  		r.Double(&t)
   909  
   910  		if aSlide[i] > 0 {
   911  			t.ToExtended(&u)
   912  			geAdd(&t, &u, &Ai[aSlide[i]/2])
   913  		} else if aSlide[i] < 0 {
   914  			t.ToExtended(&u)
   915  			geSub(&t, &u, &Ai[(-aSlide[i])/2])
   916  		}
   917  
   918  		if bSlide[i] > 0 {
   919  			t.ToExtended(&u)
   920  			geMixedAdd(&t, &u, &bi[bSlide[i]/2])
   921  		} else if bSlide[i] < 0 {
   922  			t.ToExtended(&u)
   923  			geMixedSub(&t, &u, &bi[(-bSlide[i])/2])
   924  		}
   925  
   926  		t.ToProjective(r)
   927  	}
   928  }
   929  
   930  // equal returns 1 if b == c and 0 otherwise, assuming that b and c are
   931  // non-negative.
   932  func equal(b, c int32) int32 {
   933  	x := uint32(b ^ c)
   934  	x--
   935  	return int32(x >> 31)
   936  }
   937  
   938  // negative returns 1 if b < 0 and 0 otherwise.
   939  func negative(b int32) int32 {
   940  	return (b >> 31) & 1
   941  }
   942  
   943  func PreComputedGroupElementCMove(t, u *PreComputedGroupElement, b int32) {
   944  	FeCMove(&t.yPlusX, &u.yPlusX, b)
   945  	FeCMove(&t.yMinusX, &u.yMinusX, b)
   946  	FeCMove(&t.xy2d, &u.xy2d, b)
   947  }
   948  
   949  func selectPoint(t *PreComputedGroupElement, pos int32, b int32) {
   950  	var minusT PreComputedGroupElement
   951  	bNegative := negative(b)
   952  	bAbs := b - (((-bNegative) & b) << 1)
   953  
   954  	t.Zero()
   955  	for i := int32(0); i < 8; i++ {
   956  		PreComputedGroupElementCMove(t, &base[pos][i], equal(bAbs, i+1))
   957  	}
   958  	FeCopy(&minusT.yPlusX, &t.yMinusX)
   959  	FeCopy(&minusT.yMinusX, &t.yPlusX)
   960  	FeNeg(&minusT.xy2d, &t.xy2d)
   961  	PreComputedGroupElementCMove(t, &minusT, bNegative)
   962  }
   963  
   964  // GeScalarMultBase computes h = a*B, where
   965  //   a = a[0]+256*a[1]+...+256^31 a[31]
   966  //   B is the Ed25519 base point (x,4/5) with x positive.
   967  //
   968  // Preconditions:
   969  //   a[31] <= 127
   970  func GeScalarMultBase(h *ExtendedGroupElement, a *[32]byte) {
   971  	var e [64]int8
   972  
   973  	for i, v := range a {
   974  		e[2*i] = int8(v & 15)
   975  		e[2*i+1] = int8((v >> 4) & 15)
   976  	}
   977  
   978  	// each e[i] is between 0 and 15 and e[63] is between 0 and 7.
   979  
   980  	carry := int8(0)
   981  	for i := 0; i < 63; i++ {
   982  		e[i] += carry
   983  		carry = (e[i] + 8) >> 4
   984  		e[i] -= carry << 4
   985  	}
   986  	e[63] += carry
   987  	// each e[i] is between -8 and 8.
   988  
   989  	h.Zero()
   990  	var t PreComputedGroupElement
   991  	var r CompletedGroupElement
   992  	for i := int32(1); i < 64; i += 2 {
   993  		selectPoint(&t, i/2, int32(e[i]))
   994  		geMixedAdd(&r, h, &t)
   995  		r.ToExtended(h)
   996  	}
   997  
   998  	var s ProjectiveGroupElement
   999  
  1000  	h.Double(&r)
  1001  	r.ToProjective(&s)
  1002  	s.Double(&r)
  1003  	r.ToProjective(&s)
  1004  	s.Double(&r)
  1005  	r.ToProjective(&s)
  1006  	s.Double(&r)
  1007  	r.ToExtended(h)
  1008  
  1009  	for i := int32(0); i < 64; i += 2 {
  1010  		selectPoint(&t, i/2, int32(e[i]))
  1011  		geMixedAdd(&r, h, &t)
  1012  		r.ToExtended(h)
  1013  	}
  1014  }
  1015  
  1016  // The scalars are GF(2^252 + 27742317777372353535851937790883648493).
  1017  
  1018  // Input:
  1019  //   a[0]+256*a[1]+...+256^31*a[31] = a
  1020  //   b[0]+256*b[1]+...+256^31*b[31] = b
  1021  //   c[0]+256*c[1]+...+256^31*c[31] = c
  1022  //
  1023  // Output:
  1024  //   s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
  1025  //   where l = 2^252 + 27742317777372353535851937790883648493.
  1026  func ScMulAdd(s, a, b, c *[32]byte) {
  1027  	a0 := 2097151 & load3(a[:])
  1028  	a1 := 2097151 & (load4(a[2:]) >> 5)
  1029  	a2 := 2097151 & (load3(a[5:]) >> 2)
  1030  	a3 := 2097151 & (load4(a[7:]) >> 7)
  1031  	a4 := 2097151 & (load4(a[10:]) >> 4)
  1032  	a5 := 2097151 & (load3(a[13:]) >> 1)
  1033  	a6 := 2097151 & (load4(a[15:]) >> 6)
  1034  	a7 := 2097151 & (load3(a[18:]) >> 3)
  1035  	a8 := 2097151 & load3(a[21:])
  1036  	a9 := 2097151 & (load4(a[23:]) >> 5)
  1037  	a10 := 2097151 & (load3(a[26:]) >> 2)
  1038  	a11 := (load4(a[28:]) >> 7)
  1039  	b0 := 2097151 & load3(b[:])
  1040  	b1 := 2097151 & (load4(b[2:]) >> 5)
  1041  	b2 := 2097151 & (load3(b[5:]) >> 2)
  1042  	b3 := 2097151 & (load4(b[7:]) >> 7)
  1043  	b4 := 2097151 & (load4(b[10:]) >> 4)
  1044  	b5 := 2097151 & (load3(b[13:]) >> 1)
  1045  	b6 := 2097151 & (load4(b[15:]) >> 6)
  1046  	b7 := 2097151 & (load3(b[18:]) >> 3)
  1047  	b8 := 2097151 & load3(b[21:])
  1048  	b9 := 2097151 & (load4(b[23:]) >> 5)
  1049  	b10 := 2097151 & (load3(b[26:]) >> 2)
  1050  	b11 := (load4(b[28:]) >> 7)
  1051  	c0 := 2097151 & load3(c[:])
  1052  	c1 := 2097151 & (load4(c[2:]) >> 5)
  1053  	c2 := 2097151 & (load3(c[5:]) >> 2)
  1054  	c3 := 2097151 & (load4(c[7:]) >> 7)
  1055  	c4 := 2097151 & (load4(c[10:]) >> 4)
  1056  	c5 := 2097151 & (load3(c[13:]) >> 1)
  1057  	c6 := 2097151 & (load4(c[15:]) >> 6)
  1058  	c7 := 2097151 & (load3(c[18:]) >> 3)
  1059  	c8 := 2097151 & load3(c[21:])
  1060  	c9 := 2097151 & (load4(c[23:]) >> 5)
  1061  	c10 := 2097151 & (load3(c[26:]) >> 2)
  1062  	c11 := (load4(c[28:]) >> 7)
  1063  	var carry [23]int64
  1064  
  1065  	s0 := c0 + a0*b0
  1066  	s1 := c1 + a0*b1 + a1*b0
  1067  	s2 := c2 + a0*b2 + a1*b1 + a2*b0
  1068  	s3 := c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0
  1069  	s4 := c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0
  1070  	s5 := c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0
  1071  	s6 := c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0
  1072  	s7 := c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0
  1073  	s8 := c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + a8*b0
  1074  	s9 := c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0
  1075  	s10 := c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8*b2 + a9*b1 + a10*b0
  1076  	s11 := c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3 + a9*b2 + a10*b1 + a11*b0
  1077  	s12 := a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + a10*b2 + a11*b1
  1078  	s13 := a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + a11*b2
  1079  	s14 := a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + a11*b3
  1080  	s15 := a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4
  1081  	s16 := a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5
  1082  	s17 := a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6
  1083  	s18 := a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7
  1084  	s19 := a8*b11 + a9*b10 + a10*b9 + a11*b8
  1085  	s20 := a9*b11 + a10*b10 + a11*b9
  1086  	s21 := a10*b11 + a11*b10
  1087  	s22 := a11 * b11
  1088  	s23 := int64(0)
  1089  
  1090  	carry[0] = (s0 + (1 << 20)) >> 21
  1091  	s1 += carry[0]
  1092  	s0 -= carry[0] << 21
  1093  	carry[2] = (s2 + (1 << 20)) >> 21
  1094  	s3 += carry[2]
  1095  	s2 -= carry[2] << 21
  1096  	carry[4] = (s4 + (1 << 20)) >> 21
  1097  	s5 += carry[4]
  1098  	s4 -= carry[4] << 21
  1099  	carry[6] = (s6 + (1 << 20)) >> 21
  1100  	s7 += carry[6]
  1101  	s6 -= carry[6] << 21
  1102  	carry[8] = (s8 + (1 << 20)) >> 21
  1103  	s9 += carry[8]
  1104  	s8 -= carry[8] << 21
  1105  	carry[10] = (s10 + (1 << 20)) >> 21
  1106  	s11 += carry[10]
  1107  	s10 -= carry[10] << 21
  1108  	carry[12] = (s12 + (1 << 20)) >> 21
  1109  	s13 += carry[12]
  1110  	s12 -= carry[12] << 21
  1111  	carry[14] = (s14 + (1 << 20)) >> 21
  1112  	s15 += carry[14]
  1113  	s14 -= carry[14] << 21
  1114  	carry[16] = (s16 + (1 << 20)) >> 21
  1115  	s17 += carry[16]
  1116  	s16 -= carry[16] << 21
  1117  	carry[18] = (s18 + (1 << 20)) >> 21
  1118  	s19 += carry[18]
  1119  	s18 -= carry[18] << 21
  1120  	carry[20] = (s20 + (1 << 20)) >> 21
  1121  	s21 += carry[20]
  1122  	s20 -= carry[20] << 21
  1123  	carry[22] = (s22 + (1 << 20)) >> 21
  1124  	s23 += carry[22]
  1125  	s22 -= carry[22] << 21
  1126  
  1127  	carry[1] = (s1 + (1 << 20)) >> 21
  1128  	s2 += carry[1]
  1129  	s1 -= carry[1] << 21
  1130  	carry[3] = (s3 + (1 << 20)) >> 21
  1131  	s4 += carry[3]
  1132  	s3 -= carry[3] << 21
  1133  	carry[5] = (s5 + (1 << 20)) >> 21
  1134  	s6 += carry[5]
  1135  	s5 -= carry[5] << 21
  1136  	carry[7] = (s7 + (1 << 20)) >> 21
  1137  	s8 += carry[7]
  1138  	s7 -= carry[7] << 21
  1139  	carry[9] = (s9 + (1 << 20)) >> 21
  1140  	s10 += carry[9]
  1141  	s9 -= carry[9] << 21
  1142  	carry[11] = (s11 + (1 << 20)) >> 21
  1143  	s12 += carry[11]
  1144  	s11 -= carry[11] << 21
  1145  	carry[13] = (s13 + (1 << 20)) >> 21
  1146  	s14 += carry[13]
  1147  	s13 -= carry[13] << 21
  1148  	carry[15] = (s15 + (1 << 20)) >> 21
  1149  	s16 += carry[15]
  1150  	s15 -= carry[15] << 21
  1151  	carry[17] = (s17 + (1 << 20)) >> 21
  1152  	s18 += carry[17]
  1153  	s17 -= carry[17] << 21
  1154  	carry[19] = (s19 + (1 << 20)) >> 21
  1155  	s20 += carry[19]
  1156  	s19 -= carry[19] << 21
  1157  	carry[21] = (s21 + (1 << 20)) >> 21
  1158  	s22 += carry[21]
  1159  	s21 -= carry[21] << 21
  1160  
  1161  	s11 += s23 * 666643
  1162  	s12 += s23 * 470296
  1163  	s13 += s23 * 654183
  1164  	s14 -= s23 * 997805
  1165  	s15 += s23 * 136657
  1166  	s16 -= s23 * 683901
  1167  	s23 = 0
  1168  
  1169  	s10 += s22 * 666643
  1170  	s11 += s22 * 470296
  1171  	s12 += s22 * 654183
  1172  	s13 -= s22 * 997805
  1173  	s14 += s22 * 136657
  1174  	s15 -= s22 * 683901
  1175  	s22 = 0
  1176  
  1177  	s9 += s21 * 666643
  1178  	s10 += s21 * 470296
  1179  	s11 += s21 * 654183
  1180  	s12 -= s21 * 997805
  1181  	s13 += s21 * 136657
  1182  	s14 -= s21 * 683901
  1183  	s21 = 0
  1184  
  1185  	s8 += s20 * 666643
  1186  	s9 += s20 * 470296
  1187  	s10 += s20 * 654183
  1188  	s11 -= s20 * 997805
  1189  	s12 += s20 * 136657
  1190  	s13 -= s20 * 683901
  1191  	s20 = 0
  1192  
  1193  	s7 += s19 * 666643
  1194  	s8 += s19 * 470296
  1195  	s9 += s19 * 654183
  1196  	s10 -= s19 * 997805
  1197  	s11 += s19 * 136657
  1198  	s12 -= s19 * 683901
  1199  	s19 = 0
  1200  
  1201  	s6 += s18 * 666643
  1202  	s7 += s18 * 470296
  1203  	s8 += s18 * 654183
  1204  	s9 -= s18 * 997805
  1205  	s10 += s18 * 136657
  1206  	s11 -= s18 * 683901
  1207  	s18 = 0
  1208  
  1209  	carry[6] = (s6 + (1 << 20)) >> 21
  1210  	s7 += carry[6]
  1211  	s6 -= carry[6] << 21
  1212  	carry[8] = (s8 + (1 << 20)) >> 21
  1213  	s9 += carry[8]
  1214  	s8 -= carry[8] << 21
  1215  	carry[10] = (s10 + (1 << 20)) >> 21
  1216  	s11 += carry[10]
  1217  	s10 -= carry[10] << 21
  1218  	carry[12] = (s12 + (1 << 20)) >> 21
  1219  	s13 += carry[12]
  1220  	s12 -= carry[12] << 21
  1221  	carry[14] = (s14 + (1 << 20)) >> 21
  1222  	s15 += carry[14]
  1223  	s14 -= carry[14] << 21
  1224  	carry[16] = (s16 + (1 << 20)) >> 21
  1225  	s17 += carry[16]
  1226  	s16 -= carry[16] << 21
  1227  
  1228  	carry[7] = (s7 + (1 << 20)) >> 21
  1229  	s8 += carry[7]
  1230  	s7 -= carry[7] << 21
  1231  	carry[9] = (s9 + (1 << 20)) >> 21
  1232  	s10 += carry[9]
  1233  	s9 -= carry[9] << 21
  1234  	carry[11] = (s11 + (1 << 20)) >> 21
  1235  	s12 += carry[11]
  1236  	s11 -= carry[11] << 21
  1237  	carry[13] = (s13 + (1 << 20)) >> 21
  1238  	s14 += carry[13]
  1239  	s13 -= carry[13] << 21
  1240  	carry[15] = (s15 + (1 << 20)) >> 21
  1241  	s16 += carry[15]
  1242  	s15 -= carry[15] << 21
  1243  
  1244  	s5 += s17 * 666643
  1245  	s6 += s17 * 470296
  1246  	s7 += s17 * 654183
  1247  	s8 -= s17 * 997805
  1248  	s9 += s17 * 136657
  1249  	s10 -= s17 * 683901
  1250  	s17 = 0
  1251  
  1252  	s4 += s16 * 666643
  1253  	s5 += s16 * 470296
  1254  	s6 += s16 * 654183
  1255  	s7 -= s16 * 997805
  1256  	s8 += s16 * 136657
  1257  	s9 -= s16 * 683901
  1258  	s16 = 0
  1259  
  1260  	s3 += s15 * 666643
  1261  	s4 += s15 * 470296
  1262  	s5 += s15 * 654183
  1263  	s6 -= s15 * 997805
  1264  	s7 += s15 * 136657
  1265  	s8 -= s15 * 683901
  1266  	s15 = 0
  1267  
  1268  	s2 += s14 * 666643
  1269  	s3 += s14 * 470296
  1270  	s4 += s14 * 654183
  1271  	s5 -= s14 * 997805
  1272  	s6 += s14 * 136657
  1273  	s7 -= s14 * 683901
  1274  	s14 = 0
  1275  
  1276  	s1 += s13 * 666643
  1277  	s2 += s13 * 470296
  1278  	s3 += s13 * 654183
  1279  	s4 -= s13 * 997805
  1280  	s5 += s13 * 136657
  1281  	s6 -= s13 * 683901
  1282  	s13 = 0
  1283  
  1284  	s0 += s12 * 666643
  1285  	s1 += s12 * 470296
  1286  	s2 += s12 * 654183
  1287  	s3 -= s12 * 997805
  1288  	s4 += s12 * 136657
  1289  	s5 -= s12 * 683901
  1290  	s12 = 0
  1291  
  1292  	carry[0] = (s0 + (1 << 20)) >> 21
  1293  	s1 += carry[0]
  1294  	s0 -= carry[0] << 21
  1295  	carry[2] = (s2 + (1 << 20)) >> 21
  1296  	s3 += carry[2]
  1297  	s2 -= carry[2] << 21
  1298  	carry[4] = (s4 + (1 << 20)) >> 21
  1299  	s5 += carry[4]
  1300  	s4 -= carry[4] << 21
  1301  	carry[6] = (s6 + (1 << 20)) >> 21
  1302  	s7 += carry[6]
  1303  	s6 -= carry[6] << 21
  1304  	carry[8] = (s8 + (1 << 20)) >> 21
  1305  	s9 += carry[8]
  1306  	s8 -= carry[8] << 21
  1307  	carry[10] = (s10 + (1 << 20)) >> 21
  1308  	s11 += carry[10]
  1309  	s10 -= carry[10] << 21
  1310  
  1311  	carry[1] = (s1 + (1 << 20)) >> 21
  1312  	s2 += carry[1]
  1313  	s1 -= carry[1] << 21
  1314  	carry[3] = (s3 + (1 << 20)) >> 21
  1315  	s4 += carry[3]
  1316  	s3 -= carry[3] << 21
  1317  	carry[5] = (s5 + (1 << 20)) >> 21
  1318  	s6 += carry[5]
  1319  	s5 -= carry[5] << 21
  1320  	carry[7] = (s7 + (1 << 20)) >> 21
  1321  	s8 += carry[7]
  1322  	s7 -= carry[7] << 21
  1323  	carry[9] = (s9 + (1 << 20)) >> 21
  1324  	s10 += carry[9]
  1325  	s9 -= carry[9] << 21
  1326  	carry[11] = (s11 + (1 << 20)) >> 21
  1327  	s12 += carry[11]
  1328  	s11 -= carry[11] << 21
  1329  
  1330  	s0 += s12 * 666643
  1331  	s1 += s12 * 470296
  1332  	s2 += s12 * 654183
  1333  	s3 -= s12 * 997805
  1334  	s4 += s12 * 136657
  1335  	s5 -= s12 * 683901
  1336  	s12 = 0
  1337  
  1338  	carry[0] = s0 >> 21
  1339  	s1 += carry[0]
  1340  	s0 -= carry[0] << 21
  1341  	carry[1] = s1 >> 21
  1342  	s2 += carry[1]
  1343  	s1 -= carry[1] << 21
  1344  	carry[2] = s2 >> 21
  1345  	s3 += carry[2]
  1346  	s2 -= carry[2] << 21
  1347  	carry[3] = s3 >> 21
  1348  	s4 += carry[3]
  1349  	s3 -= carry[3] << 21
  1350  	carry[4] = s4 >> 21
  1351  	s5 += carry[4]
  1352  	s4 -= carry[4] << 21
  1353  	carry[5] = s5 >> 21
  1354  	s6 += carry[5]
  1355  	s5 -= carry[5] << 21
  1356  	carry[6] = s6 >> 21
  1357  	s7 += carry[6]
  1358  	s6 -= carry[6] << 21
  1359  	carry[7] = s7 >> 21
  1360  	s8 += carry[7]
  1361  	s7 -= carry[7] << 21
  1362  	carry[8] = s8 >> 21
  1363  	s9 += carry[8]
  1364  	s8 -= carry[8] << 21
  1365  	carry[9] = s9 >> 21
  1366  	s10 += carry[9]
  1367  	s9 -= carry[9] << 21
  1368  	carry[10] = s10 >> 21
  1369  	s11 += carry[10]
  1370  	s10 -= carry[10] << 21
  1371  	carry[11] = s11 >> 21
  1372  	s12 += carry[11]
  1373  	s11 -= carry[11] << 21
  1374  
  1375  	s0 += s12 * 666643
  1376  	s1 += s12 * 470296
  1377  	s2 += s12 * 654183
  1378  	s3 -= s12 * 997805
  1379  	s4 += s12 * 136657
  1380  	s5 -= s12 * 683901
  1381  	s12 = 0
  1382  
  1383  	carry[0] = s0 >> 21
  1384  	s1 += carry[0]
  1385  	s0 -= carry[0] << 21
  1386  	carry[1] = s1 >> 21
  1387  	s2 += carry[1]
  1388  	s1 -= carry[1] << 21
  1389  	carry[2] = s2 >> 21
  1390  	s3 += carry[2]
  1391  	s2 -= carry[2] << 21
  1392  	carry[3] = s3 >> 21
  1393  	s4 += carry[3]
  1394  	s3 -= carry[3] << 21
  1395  	carry[4] = s4 >> 21
  1396  	s5 += carry[4]
  1397  	s4 -= carry[4] << 21
  1398  	carry[5] = s5 >> 21
  1399  	s6 += carry[5]
  1400  	s5 -= carry[5] << 21
  1401  	carry[6] = s6 >> 21
  1402  	s7 += carry[6]
  1403  	s6 -= carry[6] << 21
  1404  	carry[7] = s7 >> 21
  1405  	s8 += carry[7]
  1406  	s7 -= carry[7] << 21
  1407  	carry[8] = s8 >> 21
  1408  	s9 += carry[8]
  1409  	s8 -= carry[8] << 21
  1410  	carry[9] = s9 >> 21
  1411  	s10 += carry[9]
  1412  	s9 -= carry[9] << 21
  1413  	carry[10] = s10 >> 21
  1414  	s11 += carry[10]
  1415  	s10 -= carry[10] << 21
  1416  
  1417  	s[0] = byte(s0 >> 0)
  1418  	s[1] = byte(s0 >> 8)
  1419  	s[2] = byte((s0 >> 16) | (s1 << 5))
  1420  	s[3] = byte(s1 >> 3)
  1421  	s[4] = byte(s1 >> 11)
  1422  	s[5] = byte((s1 >> 19) | (s2 << 2))
  1423  	s[6] = byte(s2 >> 6)
  1424  	s[7] = byte((s2 >> 14) | (s3 << 7))
  1425  	s[8] = byte(s3 >> 1)
  1426  	s[9] = byte(s3 >> 9)
  1427  	s[10] = byte((s3 >> 17) | (s4 << 4))
  1428  	s[11] = byte(s4 >> 4)
  1429  	s[12] = byte(s4 >> 12)
  1430  	s[13] = byte((s4 >> 20) | (s5 << 1))
  1431  	s[14] = byte(s5 >> 7)
  1432  	s[15] = byte((s5 >> 15) | (s6 << 6))
  1433  	s[16] = byte(s6 >> 2)
  1434  	s[17] = byte(s6 >> 10)
  1435  	s[18] = byte((s6 >> 18) | (s7 << 3))
  1436  	s[19] = byte(s7 >> 5)
  1437  	s[20] = byte(s7 >> 13)
  1438  	s[21] = byte(s8 >> 0)
  1439  	s[22] = byte(s8 >> 8)
  1440  	s[23] = byte((s8 >> 16) | (s9 << 5))
  1441  	s[24] = byte(s9 >> 3)
  1442  	s[25] = byte(s9 >> 11)
  1443  	s[26] = byte((s9 >> 19) | (s10 << 2))
  1444  	s[27] = byte(s10 >> 6)
  1445  	s[28] = byte((s10 >> 14) | (s11 << 7))
  1446  	s[29] = byte(s11 >> 1)
  1447  	s[30] = byte(s11 >> 9)
  1448  	s[31] = byte(s11 >> 17)
  1449  }
  1450  
  1451  // Input:
  1452  //   s[0]+256*s[1]+...+256^63*s[63] = s
  1453  //
  1454  // Output:
  1455  //   s[0]+256*s[1]+...+256^31*s[31] = s mod l
  1456  //   where l = 2^252 + 27742317777372353535851937790883648493.
  1457  func ScReduce(out *[32]byte, s *[64]byte) {
  1458  	s0 := 2097151 & load3(s[:])
  1459  	s1 := 2097151 & (load4(s[2:]) >> 5)
  1460  	s2 := 2097151 & (load3(s[5:]) >> 2)
  1461  	s3 := 2097151 & (load4(s[7:]) >> 7)
  1462  	s4 := 2097151 & (load4(s[10:]) >> 4)
  1463  	s5 := 2097151 & (load3(s[13:]) >> 1)
  1464  	s6 := 2097151 & (load4(s[15:]) >> 6)
  1465  	s7 := 2097151 & (load3(s[18:]) >> 3)
  1466  	s8 := 2097151 & load3(s[21:])
  1467  	s9 := 2097151 & (load4(s[23:]) >> 5)
  1468  	s10 := 2097151 & (load3(s[26:]) >> 2)
  1469  	s11 := 2097151 & (load4(s[28:]) >> 7)
  1470  	s12 := 2097151 & (load4(s[31:]) >> 4)
  1471  	s13 := 2097151 & (load3(s[34:]) >> 1)
  1472  	s14 := 2097151 & (load4(s[36:]) >> 6)
  1473  	s15 := 2097151 & (load3(s[39:]) >> 3)
  1474  	s16 := 2097151 & load3(s[42:])
  1475  	s17 := 2097151 & (load4(s[44:]) >> 5)
  1476  	s18 := 2097151 & (load3(s[47:]) >> 2)
  1477  	s19 := 2097151 & (load4(s[49:]) >> 7)
  1478  	s20 := 2097151 & (load4(s[52:]) >> 4)
  1479  	s21 := 2097151 & (load3(s[55:]) >> 1)
  1480  	s22 := 2097151 & (load4(s[57:]) >> 6)
  1481  	s23 := (load4(s[60:]) >> 3)
  1482  
  1483  	s11 += s23 * 666643
  1484  	s12 += s23 * 470296
  1485  	s13 += s23 * 654183
  1486  	s14 -= s23 * 997805
  1487  	s15 += s23 * 136657
  1488  	s16 -= s23 * 683901
  1489  	s23 = 0
  1490  
  1491  	s10 += s22 * 666643
  1492  	s11 += s22 * 470296
  1493  	s12 += s22 * 654183
  1494  	s13 -= s22 * 997805
  1495  	s14 += s22 * 136657
  1496  	s15 -= s22 * 683901
  1497  	s22 = 0
  1498  
  1499  	s9 += s21 * 666643
  1500  	s10 += s21 * 470296
  1501  	s11 += s21 * 654183
  1502  	s12 -= s21 * 997805
  1503  	s13 += s21 * 136657
  1504  	s14 -= s21 * 683901
  1505  	s21 = 0
  1506  
  1507  	s8 += s20 * 666643
  1508  	s9 += s20 * 470296
  1509  	s10 += s20 * 654183
  1510  	s11 -= s20 * 997805
  1511  	s12 += s20 * 136657
  1512  	s13 -= s20 * 683901
  1513  	s20 = 0
  1514  
  1515  	s7 += s19 * 666643
  1516  	s8 += s19 * 470296
  1517  	s9 += s19 * 654183
  1518  	s10 -= s19 * 997805
  1519  	s11 += s19 * 136657
  1520  	s12 -= s19 * 683901
  1521  	s19 = 0
  1522  
  1523  	s6 += s18 * 666643
  1524  	s7 += s18 * 470296
  1525  	s8 += s18 * 654183
  1526  	s9 -= s18 * 997805
  1527  	s10 += s18 * 136657
  1528  	s11 -= s18 * 683901
  1529  	s18 = 0
  1530  
  1531  	var carry [17]int64
  1532  
  1533  	carry[6] = (s6 + (1 << 20)) >> 21
  1534  	s7 += carry[6]
  1535  	s6 -= carry[6] << 21
  1536  	carry[8] = (s8 + (1 << 20)) >> 21
  1537  	s9 += carry[8]
  1538  	s8 -= carry[8] << 21
  1539  	carry[10] = (s10 + (1 << 20)) >> 21
  1540  	s11 += carry[10]
  1541  	s10 -= carry[10] << 21
  1542  	carry[12] = (s12 + (1 << 20)) >> 21
  1543  	s13 += carry[12]
  1544  	s12 -= carry[12] << 21
  1545  	carry[14] = (s14 + (1 << 20)) >> 21
  1546  	s15 += carry[14]
  1547  	s14 -= carry[14] << 21
  1548  	carry[16] = (s16 + (1 << 20)) >> 21
  1549  	s17 += carry[16]
  1550  	s16 -= carry[16] << 21
  1551  
  1552  	carry[7] = (s7 + (1 << 20)) >> 21
  1553  	s8 += carry[7]
  1554  	s7 -= carry[7] << 21
  1555  	carry[9] = (s9 + (1 << 20)) >> 21
  1556  	s10 += carry[9]
  1557  	s9 -= carry[9] << 21
  1558  	carry[11] = (s11 + (1 << 20)) >> 21
  1559  	s12 += carry[11]
  1560  	s11 -= carry[11] << 21
  1561  	carry[13] = (s13 + (1 << 20)) >> 21
  1562  	s14 += carry[13]
  1563  	s13 -= carry[13] << 21
  1564  	carry[15] = (s15 + (1 << 20)) >> 21
  1565  	s16 += carry[15]
  1566  	s15 -= carry[15] << 21
  1567  
  1568  	s5 += s17 * 666643
  1569  	s6 += s17 * 470296
  1570  	s7 += s17 * 654183
  1571  	s8 -= s17 * 997805
  1572  	s9 += s17 * 136657
  1573  	s10 -= s17 * 683901
  1574  	s17 = 0
  1575  
  1576  	s4 += s16 * 666643
  1577  	s5 += s16 * 470296
  1578  	s6 += s16 * 654183
  1579  	s7 -= s16 * 997805
  1580  	s8 += s16 * 136657
  1581  	s9 -= s16 * 683901
  1582  	s16 = 0
  1583  
  1584  	s3 += s15 * 666643
  1585  	s4 += s15 * 470296
  1586  	s5 += s15 * 654183
  1587  	s6 -= s15 * 997805
  1588  	s7 += s15 * 136657
  1589  	s8 -= s15 * 683901
  1590  	s15 = 0
  1591  
  1592  	s2 += s14 * 666643
  1593  	s3 += s14 * 470296
  1594  	s4 += s14 * 654183
  1595  	s5 -= s14 * 997805
  1596  	s6 += s14 * 136657
  1597  	s7 -= s14 * 683901
  1598  	s14 = 0
  1599  
  1600  	s1 += s13 * 666643
  1601  	s2 += s13 * 470296
  1602  	s3 += s13 * 654183
  1603  	s4 -= s13 * 997805
  1604  	s5 += s13 * 136657
  1605  	s6 -= s13 * 683901
  1606  	s13 = 0
  1607  
  1608  	s0 += s12 * 666643
  1609  	s1 += s12 * 470296
  1610  	s2 += s12 * 654183
  1611  	s3 -= s12 * 997805
  1612  	s4 += s12 * 136657
  1613  	s5 -= s12 * 683901
  1614  	s12 = 0
  1615  
  1616  	carry[0] = (s0 + (1 << 20)) >> 21
  1617  	s1 += carry[0]
  1618  	s0 -= carry[0] << 21
  1619  	carry[2] = (s2 + (1 << 20)) >> 21
  1620  	s3 += carry[2]
  1621  	s2 -= carry[2] << 21
  1622  	carry[4] = (s4 + (1 << 20)) >> 21
  1623  	s5 += carry[4]
  1624  	s4 -= carry[4] << 21
  1625  	carry[6] = (s6 + (1 << 20)) >> 21
  1626  	s7 += carry[6]
  1627  	s6 -= carry[6] << 21
  1628  	carry[8] = (s8 + (1 << 20)) >> 21
  1629  	s9 += carry[8]
  1630  	s8 -= carry[8] << 21
  1631  	carry[10] = (s10 + (1 << 20)) >> 21
  1632  	s11 += carry[10]
  1633  	s10 -= carry[10] << 21
  1634  
  1635  	carry[1] = (s1 + (1 << 20)) >> 21
  1636  	s2 += carry[1]
  1637  	s1 -= carry[1] << 21
  1638  	carry[3] = (s3 + (1 << 20)) >> 21
  1639  	s4 += carry[3]
  1640  	s3 -= carry[3] << 21
  1641  	carry[5] = (s5 + (1 << 20)) >> 21
  1642  	s6 += carry[5]
  1643  	s5 -= carry[5] << 21
  1644  	carry[7] = (s7 + (1 << 20)) >> 21
  1645  	s8 += carry[7]
  1646  	s7 -= carry[7] << 21
  1647  	carry[9] = (s9 + (1 << 20)) >> 21
  1648  	s10 += carry[9]
  1649  	s9 -= carry[9] << 21
  1650  	carry[11] = (s11 + (1 << 20)) >> 21
  1651  	s12 += carry[11]
  1652  	s11 -= carry[11] << 21
  1653  
  1654  	s0 += s12 * 666643
  1655  	s1 += s12 * 470296
  1656  	s2 += s12 * 654183
  1657  	s3 -= s12 * 997805
  1658  	s4 += s12 * 136657
  1659  	s5 -= s12 * 683901
  1660  	s12 = 0
  1661  
  1662  	carry[0] = s0 >> 21
  1663  	s1 += carry[0]
  1664  	s0 -= carry[0] << 21
  1665  	carry[1] = s1 >> 21
  1666  	s2 += carry[1]
  1667  	s1 -= carry[1] << 21
  1668  	carry[2] = s2 >> 21
  1669  	s3 += carry[2]
  1670  	s2 -= carry[2] << 21
  1671  	carry[3] = s3 >> 21
  1672  	s4 += carry[3]
  1673  	s3 -= carry[3] << 21
  1674  	carry[4] = s4 >> 21
  1675  	s5 += carry[4]
  1676  	s4 -= carry[4] << 21
  1677  	carry[5] = s5 >> 21
  1678  	s6 += carry[5]
  1679  	s5 -= carry[5] << 21
  1680  	carry[6] = s6 >> 21
  1681  	s7 += carry[6]
  1682  	s6 -= carry[6] << 21
  1683  	carry[7] = s7 >> 21
  1684  	s8 += carry[7]
  1685  	s7 -= carry[7] << 21
  1686  	carry[8] = s8 >> 21
  1687  	s9 += carry[8]
  1688  	s8 -= carry[8] << 21
  1689  	carry[9] = s9 >> 21
  1690  	s10 += carry[9]
  1691  	s9 -= carry[9] << 21
  1692  	carry[10] = s10 >> 21
  1693  	s11 += carry[10]
  1694  	s10 -= carry[10] << 21
  1695  	carry[11] = s11 >> 21
  1696  	s12 += carry[11]
  1697  	s11 -= carry[11] << 21
  1698  
  1699  	s0 += s12 * 666643
  1700  	s1 += s12 * 470296
  1701  	s2 += s12 * 654183
  1702  	s3 -= s12 * 997805
  1703  	s4 += s12 * 136657
  1704  	s5 -= s12 * 683901
  1705  	s12 = 0
  1706  
  1707  	carry[0] = s0 >> 21
  1708  	s1 += carry[0]
  1709  	s0 -= carry[0] << 21
  1710  	carry[1] = s1 >> 21
  1711  	s2 += carry[1]
  1712  	s1 -= carry[1] << 21
  1713  	carry[2] = s2 >> 21
  1714  	s3 += carry[2]
  1715  	s2 -= carry[2] << 21
  1716  	carry[3] = s3 >> 21
  1717  	s4 += carry[3]
  1718  	s3 -= carry[3] << 21
  1719  	carry[4] = s4 >> 21
  1720  	s5 += carry[4]
  1721  	s4 -= carry[4] << 21
  1722  	carry[5] = s5 >> 21
  1723  	s6 += carry[5]
  1724  	s5 -= carry[5] << 21
  1725  	carry[6] = s6 >> 21
  1726  	s7 += carry[6]
  1727  	s6 -= carry[6] << 21
  1728  	carry[7] = s7 >> 21
  1729  	s8 += carry[7]
  1730  	s7 -= carry[7] << 21
  1731  	carry[8] = s8 >> 21
  1732  	s9 += carry[8]
  1733  	s8 -= carry[8] << 21
  1734  	carry[9] = s9 >> 21
  1735  	s10 += carry[9]
  1736  	s9 -= carry[9] << 21
  1737  	carry[10] = s10 >> 21
  1738  	s11 += carry[10]
  1739  	s10 -= carry[10] << 21
  1740  
  1741  	out[0] = byte(s0 >> 0)
  1742  	out[1] = byte(s0 >> 8)
  1743  	out[2] = byte((s0 >> 16) | (s1 << 5))
  1744  	out[3] = byte(s1 >> 3)
  1745  	out[4] = byte(s1 >> 11)
  1746  	out[5] = byte((s1 >> 19) | (s2 << 2))
  1747  	out[6] = byte(s2 >> 6)
  1748  	out[7] = byte((s2 >> 14) | (s3 << 7))
  1749  	out[8] = byte(s3 >> 1)
  1750  	out[9] = byte(s3 >> 9)
  1751  	out[10] = byte((s3 >> 17) | (s4 << 4))
  1752  	out[11] = byte(s4 >> 4)
  1753  	out[12] = byte(s4 >> 12)
  1754  	out[13] = byte((s4 >> 20) | (s5 << 1))
  1755  	out[14] = byte(s5 >> 7)
  1756  	out[15] = byte((s5 >> 15) | (s6 << 6))
  1757  	out[16] = byte(s6 >> 2)
  1758  	out[17] = byte(s6 >> 10)
  1759  	out[18] = byte((s6 >> 18) | (s7 << 3))
  1760  	out[19] = byte(s7 >> 5)
  1761  	out[20] = byte(s7 >> 13)
  1762  	out[21] = byte(s8 >> 0)
  1763  	out[22] = byte(s8 >> 8)
  1764  	out[23] = byte((s8 >> 16) | (s9 << 5))
  1765  	out[24] = byte(s9 >> 3)
  1766  	out[25] = byte(s9 >> 11)
  1767  	out[26] = byte((s9 >> 19) | (s10 << 2))
  1768  	out[27] = byte(s10 >> 6)
  1769  	out[28] = byte((s10 >> 14) | (s11 << 7))
  1770  	out[29] = byte(s11 >> 1)
  1771  	out[30] = byte(s11 >> 9)
  1772  	out[31] = byte(s11 >> 17)
  1773  }
  1774  
  1775  // order is the order of Curve25519 in little-endian form.
  1776  var order = [4]uint64{0x5812631a5cf5d3ed, 0x14def9dea2f79cd6, 0, 0x1000000000000000}
  1777  
  1778  // ScMinimal returns true if the given scalar is less than the order of the
  1779  // curve.
  1780  func ScMinimal(scalar *[32]byte) bool {
  1781  	for i := 3; ; i-- {
  1782  		v := binary.LittleEndian.Uint64(scalar[i*8:])
  1783  		if v > order[i] {
  1784  			return false
  1785  		} else if v < order[i] {
  1786  			break
  1787  		} else if i == 0 {
  1788  			return false
  1789  		}
  1790  	}
  1791  
  1792  	return true
  1793  }