github.com/annchain/OG@v0.0.9/deprecated/ogcrypto/ed25519/ed25519.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package edwards25519 implements operations in GF(2**255-19) and on an
     6  // Edwards curve that is isomorphic to curve25519. See
     7  // http://ed25519.cr.yp.to/.
     8  package edwards25519
     9  
    10  import "math"
    11  
    12  // This code is a port of the public domain, "ref10" implementation of ed25519
    13  // from SUPERCOP.
    14  
    15  // FieldElement represents an element of the field GF(2^255 - 19).  An element
    16  // t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
    17  // t[3]+2^102 t[4]+...+2^230 t[9].  Bounds on each t[i] vary depending on
    18  // context.
    19  type FieldElement [10]int32
    20  
    21  func FeZero(fe *FieldElement) {
    22  	for i := range fe {
    23  		fe[i] = 0
    24  	}
    25  }
    26  
    27  func FeOne(fe *FieldElement) {
    28  	FeZero(fe)
    29  	fe[0] = 1
    30  }
    31  
    32  func FeAdd(dst, a, b *FieldElement) {
    33  	for i := range dst {
    34  		dst[i] = a[i] + b[i]
    35  	}
    36  }
    37  
    38  func FeSub(dst, a, b *FieldElement) {
    39  	for i := range dst {
    40  		dst[i] = a[i] - b[i]
    41  	}
    42  }
    43  
    44  func FeCopy(dst, src *FieldElement) {
    45  	for i := range dst {
    46  		dst[i] = src[i]
    47  	}
    48  }
    49  
    50  // Replace (f,g) with (g,g) if b == 1;
    51  // replace (f,g) with (f,g) if b == 0.
    52  //
    53  // Preconditions: b in {0,1}.
    54  func FeCMove(f, g *FieldElement, b int32) {
    55  	var x FieldElement
    56  	b = -b
    57  	for i := range x {
    58  		x[i] = b & (f[i] ^ g[i])
    59  	}
    60  
    61  	for i := range f {
    62  		f[i] ^= x[i]
    63  	}
    64  }
    65  
    66  func load3(in []byte) int64 {
    67  	var r int64
    68  	r = int64(in[0])
    69  	r |= int64(in[1]) << 8
    70  	r |= int64(in[2]) << 16
    71  	return r
    72  }
    73  
    74  func load4(in []byte) int64 {
    75  	var r int64
    76  	r = int64(in[0])
    77  	r |= int64(in[1]) << 8
    78  	r |= int64(in[2]) << 16
    79  	r |= int64(in[3]) << 24
    80  	return r
    81  }
    82  
    83  func FeFromBytes(dst *FieldElement, src *[32]byte) {
    84  	h0 := load4(src[:])
    85  	h1 := load3(src[4:]) << 6
    86  	h2 := load3(src[7:]) << 5
    87  	h3 := load3(src[10:]) << 3
    88  	h4 := load3(src[13:]) << 2
    89  	h5 := load4(src[16:])
    90  	h6 := load3(src[20:]) << 7
    91  	h7 := load3(src[23:]) << 5
    92  	h8 := load3(src[26:]) << 4
    93  	h9 := (load3(src[29:]) & 8388607) << 2
    94  
    95  	var carry [10]int64
    96  	carry[9] = (h9 + 1<<24) >> 25
    97  	h0 += carry[9] * 19
    98  	h9 -= carry[9] << 25
    99  	carry[1] = (h1 + 1<<24) >> 25
   100  	h2 += carry[1]
   101  	h1 -= carry[1] << 25
   102  	carry[3] = (h3 + 1<<24) >> 25
   103  	h4 += carry[3]
   104  	h3 -= carry[3] << 25
   105  	carry[5] = (h5 + 1<<24) >> 25
   106  	h6 += carry[5]
   107  	h5 -= carry[5] << 25
   108  	carry[7] = (h7 + 1<<24) >> 25
   109  	h8 += carry[7]
   110  	h7 -= carry[7] << 25
   111  
   112  	carry[0] = (h0 + 1<<25) >> 26
   113  	h1 += carry[0]
   114  	h0 -= carry[0] << 26
   115  	carry[2] = (h2 + 1<<25) >> 26
   116  	h3 += carry[2]
   117  	h2 -= carry[2] << 26
   118  	carry[4] = (h4 + 1<<25) >> 26
   119  	h5 += carry[4]
   120  	h4 -= carry[4] << 26
   121  	carry[6] = (h6 + 1<<25) >> 26
   122  	h7 += carry[6]
   123  	h6 -= carry[6] << 26
   124  	carry[8] = (h8 + 1<<25) >> 26
   125  	h9 += carry[8]
   126  	h8 -= carry[8] << 26
   127  
   128  	dst[0] = int32(h0)
   129  	dst[1] = int32(h1)
   130  	dst[2] = int32(h2)
   131  	dst[3] = int32(h3)
   132  	dst[4] = int32(h4)
   133  	dst[5] = int32(h5)
   134  	dst[6] = int32(h6)
   135  	dst[7] = int32(h7)
   136  	dst[8] = int32(h8)
   137  	dst[9] = int32(h9)
   138  }
   139  
   140  // FeToBytes marshals h to s.
   141  // Preconditions:
   142  //   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   143  //
   144  // Write p=2^255-19; q=floor(h/p).
   145  // Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
   146  //
   147  // Proof:
   148  //   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
   149  //   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
   150  //
   151  //   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
   152  //   Then 0<y<1.
   153  //
   154  //   Write r=h-pq.
   155  //   Have 0<=r<=p-1=2^255-20.
   156  //   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
   157  //
   158  //   Write x=r+19(2^-255)r+y.
   159  //   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
   160  //
   161  //   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
   162  //   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
   163  func FeToBytes(s *[32]byte, h *FieldElement) {
   164  	var carry [10]int32
   165  
   166  	q := (19*h[9] + (1 << 24)) >> 25
   167  	q = (h[0] + q) >> 26
   168  	q = (h[1] + q) >> 25
   169  	q = (h[2] + q) >> 26
   170  	q = (h[3] + q) >> 25
   171  	q = (h[4] + q) >> 26
   172  	q = (h[5] + q) >> 25
   173  	q = (h[6] + q) >> 26
   174  	q = (h[7] + q) >> 25
   175  	q = (h[8] + q) >> 26
   176  	q = (h[9] + q) >> 25
   177  
   178  	// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
   179  	h[0] += 19 * q
   180  	// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
   181  
   182  	carry[0] = h[0] >> 26
   183  	h[1] += carry[0]
   184  	h[0] -= carry[0] << 26
   185  	carry[1] = h[1] >> 25
   186  	h[2] += carry[1]
   187  	h[1] -= carry[1] << 25
   188  	carry[2] = h[2] >> 26
   189  	h[3] += carry[2]
   190  	h[2] -= carry[2] << 26
   191  	carry[3] = h[3] >> 25
   192  	h[4] += carry[3]
   193  	h[3] -= carry[3] << 25
   194  	carry[4] = h[4] >> 26
   195  	h[5] += carry[4]
   196  	h[4] -= carry[4] << 26
   197  	carry[5] = h[5] >> 25
   198  	h[6] += carry[5]
   199  	h[5] -= carry[5] << 25
   200  	carry[6] = h[6] >> 26
   201  	h[7] += carry[6]
   202  	h[6] -= carry[6] << 26
   203  	carry[7] = h[7] >> 25
   204  	h[8] += carry[7]
   205  	h[7] -= carry[7] << 25
   206  	carry[8] = h[8] >> 26
   207  	h[9] += carry[8]
   208  	h[8] -= carry[8] << 26
   209  	carry[9] = h[9] >> 25
   210  	h[9] -= carry[9] << 25
   211  	// h10 = carry9
   212  
   213  	// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
   214  	// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
   215  	// evidently 2^255 h10-2^255 q = 0.
   216  	// Goal: Output h[0]+...+2^230 h[9].
   217  
   218  	s[0] = byte(h[0] >> 0)
   219  	s[1] = byte(h[0] >> 8)
   220  	s[2] = byte(h[0] >> 16)
   221  	s[3] = byte((h[0] >> 24) | (h[1] << 2))
   222  	s[4] = byte(h[1] >> 6)
   223  	s[5] = byte(h[1] >> 14)
   224  	s[6] = byte((h[1] >> 22) | (h[2] << 3))
   225  	s[7] = byte(h[2] >> 5)
   226  	s[8] = byte(h[2] >> 13)
   227  	s[9] = byte((h[2] >> 21) | (h[3] << 5))
   228  	s[10] = byte(h[3] >> 3)
   229  	s[11] = byte(h[3] >> 11)
   230  	s[12] = byte((h[3] >> 19) | (h[4] << 6))
   231  	s[13] = byte(h[4] >> 2)
   232  	s[14] = byte(h[4] >> 10)
   233  	s[15] = byte(h[4] >> 18)
   234  	s[16] = byte(h[5] >> 0)
   235  	s[17] = byte(h[5] >> 8)
   236  	s[18] = byte(h[5] >> 16)
   237  	s[19] = byte((h[5] >> 24) | (h[6] << 1))
   238  	s[20] = byte(h[6] >> 7)
   239  	s[21] = byte(h[6] >> 15)
   240  	s[22] = byte((h[6] >> 23) | (h[7] << 3))
   241  	s[23] = byte(h[7] >> 5)
   242  	s[24] = byte(h[7] >> 13)
   243  	s[25] = byte((h[7] >> 21) | (h[8] << 4))
   244  	s[26] = byte(h[8] >> 4)
   245  	s[27] = byte(h[8] >> 12)
   246  	s[28] = byte((h[8] >> 20) | (h[9] << 6))
   247  	s[29] = byte(h[9] >> 2)
   248  	s[30] = byte(h[9] >> 10)
   249  	s[31] = byte(h[9] >> 18)
   250  }
   251  
   252  func FeIsNegative(f *FieldElement) byte {
   253  	var s [32]byte
   254  	FeToBytes(&s, f)
   255  	return s[0] & 1
   256  }
   257  
   258  func FeIsNonZero(f *FieldElement) int32 {
   259  	var s [32]byte
   260  	FeToBytes(&s, f)
   261  	var x uint8
   262  	for _, b := range s {
   263  		x |= b
   264  	}
   265  	x |= x >> 4
   266  	x |= x >> 2
   267  	x |= x >> 1
   268  	return int32(x & 1)
   269  }
   270  
   271  // FeNeg sets h = -f
   272  //
   273  // Preconditions:
   274  //    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   275  //
   276  // Postconditions:
   277  //    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   278  func FeNeg(h, f *FieldElement) {
   279  	for i := range h {
   280  		h[i] = -f[i]
   281  	}
   282  }
   283  
   284  // FeMul calculates h = f * g
   285  // Can overlap h with f or g.
   286  //
   287  // Preconditions:
   288  //    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
   289  //    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
   290  //
   291  // Postconditions:
   292  //    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   293  //
   294  // Notes on implementation strategy:
   295  //
   296  // Using schoolbook multiplication.
   297  // Karatsuba would save a little in some cost models.
   298  //
   299  // Most multiplications by 2 and 19 are 32-bit precomputations;
   300  // cheaper than 64-bit postcomputations.
   301  //
   302  // There is one remaining multiplication by 19 in the carry chain;
   303  // one *19 precomputation can be merged into this,
   304  // but the resulting data flow is considerably less clean.
   305  //
   306  // There are 12 carries below.
   307  // 10 of them are 2-way parallelizable and vectorizable.
   308  // Can get away with 11 carries, but then data flow is much deeper.
   309  //
   310  // With tighter constraints on inputs can squeeze carries into int32.
   311  func FeMul(h, f, g *FieldElement) {
   312  	f0 := f[0]
   313  	f1 := f[1]
   314  	f2 := f[2]
   315  	f3 := f[3]
   316  	f4 := f[4]
   317  	f5 := f[5]
   318  	f6 := f[6]
   319  	f7 := f[7]
   320  	f8 := f[8]
   321  	f9 := f[9]
   322  	g0 := g[0]
   323  	g1 := g[1]
   324  	g2 := g[2]
   325  	g3 := g[3]
   326  	g4 := g[4]
   327  	g5 := g[5]
   328  	g6 := g[6]
   329  	g7 := g[7]
   330  	g8 := g[8]
   331  	g9 := g[9]
   332  	g1_19 := 19 * g1 /* 1.4*2^29 */
   333  	g2_19 := 19 * g2 /* 1.4*2^30; still ok */
   334  	g3_19 := 19 * g3
   335  	g4_19 := 19 * g4
   336  	g5_19 := 19 * g5
   337  	g6_19 := 19 * g6
   338  	g7_19 := 19 * g7
   339  	g8_19 := 19 * g8
   340  	g9_19 := 19 * g9
   341  	f1_2 := 2 * f1
   342  	f3_2 := 2 * f3
   343  	f5_2 := 2 * f5
   344  	f7_2 := 2 * f7
   345  	f9_2 := 2 * f9
   346  	f0g0 := int64(f0) * int64(g0)
   347  	f0g1 := int64(f0) * int64(g1)
   348  	f0g2 := int64(f0) * int64(g2)
   349  	f0g3 := int64(f0) * int64(g3)
   350  	f0g4 := int64(f0) * int64(g4)
   351  	f0g5 := int64(f0) * int64(g5)
   352  	f0g6 := int64(f0) * int64(g6)
   353  	f0g7 := int64(f0) * int64(g7)
   354  	f0g8 := int64(f0) * int64(g8)
   355  	f0g9 := int64(f0) * int64(g9)
   356  	f1g0 := int64(f1) * int64(g0)
   357  	f1g1_2 := int64(f1_2) * int64(g1)
   358  	f1g2 := int64(f1) * int64(g2)
   359  	f1g3_2 := int64(f1_2) * int64(g3)
   360  	f1g4 := int64(f1) * int64(g4)
   361  	f1g5_2 := int64(f1_2) * int64(g5)
   362  	f1g6 := int64(f1) * int64(g6)
   363  	f1g7_2 := int64(f1_2) * int64(g7)
   364  	f1g8 := int64(f1) * int64(g8)
   365  	f1g9_38 := int64(f1_2) * int64(g9_19)
   366  	f2g0 := int64(f2) * int64(g0)
   367  	f2g1 := int64(f2) * int64(g1)
   368  	f2g2 := int64(f2) * int64(g2)
   369  	f2g3 := int64(f2) * int64(g3)
   370  	f2g4 := int64(f2) * int64(g4)
   371  	f2g5 := int64(f2) * int64(g5)
   372  	f2g6 := int64(f2) * int64(g6)
   373  	f2g7 := int64(f2) * int64(g7)
   374  	f2g8_19 := int64(f2) * int64(g8_19)
   375  	f2g9_19 := int64(f2) * int64(g9_19)
   376  	f3g0 := int64(f3) * int64(g0)
   377  	f3g1_2 := int64(f3_2) * int64(g1)
   378  	f3g2 := int64(f3) * int64(g2)
   379  	f3g3_2 := int64(f3_2) * int64(g3)
   380  	f3g4 := int64(f3) * int64(g4)
   381  	f3g5_2 := int64(f3_2) * int64(g5)
   382  	f3g6 := int64(f3) * int64(g6)
   383  	f3g7_38 := int64(f3_2) * int64(g7_19)
   384  	f3g8_19 := int64(f3) * int64(g8_19)
   385  	f3g9_38 := int64(f3_2) * int64(g9_19)
   386  	f4g0 := int64(f4) * int64(g0)
   387  	f4g1 := int64(f4) * int64(g1)
   388  	f4g2 := int64(f4) * int64(g2)
   389  	f4g3 := int64(f4) * int64(g3)
   390  	f4g4 := int64(f4) * int64(g4)
   391  	f4g5 := int64(f4) * int64(g5)
   392  	f4g6_19 := int64(f4) * int64(g6_19)
   393  	f4g7_19 := int64(f4) * int64(g7_19)
   394  	f4g8_19 := int64(f4) * int64(g8_19)
   395  	f4g9_19 := int64(f4) * int64(g9_19)
   396  	f5g0 := int64(f5) * int64(g0)
   397  	f5g1_2 := int64(f5_2) * int64(g1)
   398  	f5g2 := int64(f5) * int64(g2)
   399  	f5g3_2 := int64(f5_2) * int64(g3)
   400  	f5g4 := int64(f5) * int64(g4)
   401  	f5g5_38 := int64(f5_2) * int64(g5_19)
   402  	f5g6_19 := int64(f5) * int64(g6_19)
   403  	f5g7_38 := int64(f5_2) * int64(g7_19)
   404  	f5g8_19 := int64(f5) * int64(g8_19)
   405  	f5g9_38 := int64(f5_2) * int64(g9_19)
   406  	f6g0 := int64(f6) * int64(g0)
   407  	f6g1 := int64(f6) * int64(g1)
   408  	f6g2 := int64(f6) * int64(g2)
   409  	f6g3 := int64(f6) * int64(g3)
   410  	f6g4_19 := int64(f6) * int64(g4_19)
   411  	f6g5_19 := int64(f6) * int64(g5_19)
   412  	f6g6_19 := int64(f6) * int64(g6_19)
   413  	f6g7_19 := int64(f6) * int64(g7_19)
   414  	f6g8_19 := int64(f6) * int64(g8_19)
   415  	f6g9_19 := int64(f6) * int64(g9_19)
   416  	f7g0 := int64(f7) * int64(g0)
   417  	f7g1_2 := int64(f7_2) * int64(g1)
   418  	f7g2 := int64(f7) * int64(g2)
   419  	f7g3_38 := int64(f7_2) * int64(g3_19)
   420  	f7g4_19 := int64(f7) * int64(g4_19)
   421  	f7g5_38 := int64(f7_2) * int64(g5_19)
   422  	f7g6_19 := int64(f7) * int64(g6_19)
   423  	f7g7_38 := int64(f7_2) * int64(g7_19)
   424  	f7g8_19 := int64(f7) * int64(g8_19)
   425  	f7g9_38 := int64(f7_2) * int64(g9_19)
   426  	f8g0 := int64(f8) * int64(g0)
   427  	f8g1 := int64(f8) * int64(g1)
   428  	f8g2_19 := int64(f8) * int64(g2_19)
   429  	f8g3_19 := int64(f8) * int64(g3_19)
   430  	f8g4_19 := int64(f8) * int64(g4_19)
   431  	f8g5_19 := int64(f8) * int64(g5_19)
   432  	f8g6_19 := int64(f8) * int64(g6_19)
   433  	f8g7_19 := int64(f8) * int64(g7_19)
   434  	f8g8_19 := int64(f8) * int64(g8_19)
   435  	f8g9_19 := int64(f8) * int64(g9_19)
   436  	f9g0 := int64(f9) * int64(g0)
   437  	f9g1_38 := int64(f9_2) * int64(g1_19)
   438  	f9g2_19 := int64(f9) * int64(g2_19)
   439  	f9g3_38 := int64(f9_2) * int64(g3_19)
   440  	f9g4_19 := int64(f9) * int64(g4_19)
   441  	f9g5_38 := int64(f9_2) * int64(g5_19)
   442  	f9g6_19 := int64(f9) * int64(g6_19)
   443  	f9g7_38 := int64(f9_2) * int64(g7_19)
   444  	f9g8_19 := int64(f9) * int64(g8_19)
   445  	f9g9_38 := int64(f9_2) * int64(g9_19)
   446  	h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
   447  	h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
   448  	h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
   449  	h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
   450  	h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
   451  	h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
   452  	h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
   453  	h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
   454  	h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
   455  	h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
   456  	var carry [10]int64
   457  
   458  	/*
   459  	  |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
   460  	    i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
   461  	  |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
   462  	    i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
   463  	*/
   464  
   465  	carry[0] = (h0 + (1 << 25)) >> 26
   466  	h1 += carry[0]
   467  	h0 -= carry[0] << 26
   468  	carry[4] = (h4 + (1 << 25)) >> 26
   469  	h5 += carry[4]
   470  	h4 -= carry[4] << 26
   471  	/* |h0| <= 2^25 */
   472  	/* |h4| <= 2^25 */
   473  	/* |h1| <= 1.51*2^58 */
   474  	/* |h5| <= 1.51*2^58 */
   475  
   476  	carry[1] = (h1 + (1 << 24)) >> 25
   477  	h2 += carry[1]
   478  	h1 -= carry[1] << 25
   479  	carry[5] = (h5 + (1 << 24)) >> 25
   480  	h6 += carry[5]
   481  	h5 -= carry[5] << 25
   482  	/* |h1| <= 2^24; from now on fits into int32 */
   483  	/* |h5| <= 2^24; from now on fits into int32 */
   484  	/* |h2| <= 1.21*2^59 */
   485  	/* |h6| <= 1.21*2^59 */
   486  
   487  	carry[2] = (h2 + (1 << 25)) >> 26
   488  	h3 += carry[2]
   489  	h2 -= carry[2] << 26
   490  	carry[6] = (h6 + (1 << 25)) >> 26
   491  	h7 += carry[6]
   492  	h6 -= carry[6] << 26
   493  	/* |h2| <= 2^25; from now on fits into int32 unchanged */
   494  	/* |h6| <= 2^25; from now on fits into int32 unchanged */
   495  	/* |h3| <= 1.51*2^58 */
   496  	/* |h7| <= 1.51*2^58 */
   497  
   498  	carry[3] = (h3 + (1 << 24)) >> 25
   499  	h4 += carry[3]
   500  	h3 -= carry[3] << 25
   501  	carry[7] = (h7 + (1 << 24)) >> 25
   502  	h8 += carry[7]
   503  	h7 -= carry[7] << 25
   504  	/* |h3| <= 2^24; from now on fits into int32 unchanged */
   505  	/* |h7| <= 2^24; from now on fits into int32 unchanged */
   506  	/* |h4| <= 1.52*2^33 */
   507  	/* |h8| <= 1.52*2^33 */
   508  
   509  	carry[4] = (h4 + (1 << 25)) >> 26
   510  	h5 += carry[4]
   511  	h4 -= carry[4] << 26
   512  	carry[8] = (h8 + (1 << 25)) >> 26
   513  	h9 += carry[8]
   514  	h8 -= carry[8] << 26
   515  	/* |h4| <= 2^25; from now on fits into int32 unchanged */
   516  	/* |h8| <= 2^25; from now on fits into int32 unchanged */
   517  	/* |h5| <= 1.01*2^24 */
   518  	/* |h9| <= 1.51*2^58 */
   519  
   520  	carry[9] = (h9 + (1 << 24)) >> 25
   521  	h0 += carry[9] * 19
   522  	h9 -= carry[9] << 25
   523  	/* |h9| <= 2^24; from now on fits into int32 unchanged */
   524  	/* |h0| <= 1.8*2^37 */
   525  
   526  	carry[0] = (h0 + (1 << 25)) >> 26
   527  	h1 += carry[0]
   528  	h0 -= carry[0] << 26
   529  	/* |h0| <= 2^25; from now on fits into int32 unchanged */
   530  	/* |h1| <= 1.01*2^24 */
   531  
   532  	h[0] = int32(h0)
   533  	h[1] = int32(h1)
   534  	h[2] = int32(h2)
   535  	h[3] = int32(h3)
   536  	h[4] = int32(h4)
   537  	h[5] = int32(h5)
   538  	h[6] = int32(h6)
   539  	h[7] = int32(h7)
   540  	h[8] = int32(h8)
   541  	h[9] = int32(h9)
   542  }
   543  
   544  // FeSquare calculates h = f*f. Can overlap h with f.
   545  //
   546  // Preconditions:
   547  //    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
   548  //
   549  // Postconditions:
   550  //    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   551  func FeSquare(h, f *FieldElement) {
   552  	f0 := f[0]
   553  	f1 := f[1]
   554  	f2 := f[2]
   555  	f3 := f[3]
   556  	f4 := f[4]
   557  	f5 := f[5]
   558  	f6 := f[6]
   559  	f7 := f[7]
   560  	f8 := f[8]
   561  	f9 := f[9]
   562  	f0_2 := 2 * f0
   563  	f1_2 := 2 * f1
   564  	f2_2 := 2 * f2
   565  	f3_2 := 2 * f3
   566  	f4_2 := 2 * f4
   567  	f5_2 := 2 * f5
   568  	f6_2 := 2 * f6
   569  	f7_2 := 2 * f7
   570  	f5_38 := 38 * f5 // 1.31*2^30
   571  	f6_19 := 19 * f6 // 1.31*2^30
   572  	f7_38 := 38 * f7 // 1.31*2^30
   573  	f8_19 := 19 * f8 // 1.31*2^30
   574  	f9_38 := 38 * f9 // 1.31*2^30
   575  	f0f0 := int64(f0) * int64(f0)
   576  	f0f1_2 := int64(f0_2) * int64(f1)
   577  	f0f2_2 := int64(f0_2) * int64(f2)
   578  	f0f3_2 := int64(f0_2) * int64(f3)
   579  	f0f4_2 := int64(f0_2) * int64(f4)
   580  	f0f5_2 := int64(f0_2) * int64(f5)
   581  	f0f6_2 := int64(f0_2) * int64(f6)
   582  	f0f7_2 := int64(f0_2) * int64(f7)
   583  	f0f8_2 := int64(f0_2) * int64(f8)
   584  	f0f9_2 := int64(f0_2) * int64(f9)
   585  	f1f1_2 := int64(f1_2) * int64(f1)
   586  	f1f2_2 := int64(f1_2) * int64(f2)
   587  	f1f3_4 := int64(f1_2) * int64(f3_2)
   588  	f1f4_2 := int64(f1_2) * int64(f4)
   589  	f1f5_4 := int64(f1_2) * int64(f5_2)
   590  	f1f6_2 := int64(f1_2) * int64(f6)
   591  	f1f7_4 := int64(f1_2) * int64(f7_2)
   592  	f1f8_2 := int64(f1_2) * int64(f8)
   593  	f1f9_76 := int64(f1_2) * int64(f9_38)
   594  	f2f2 := int64(f2) * int64(f2)
   595  	f2f3_2 := int64(f2_2) * int64(f3)
   596  	f2f4_2 := int64(f2_2) * int64(f4)
   597  	f2f5_2 := int64(f2_2) * int64(f5)
   598  	f2f6_2 := int64(f2_2) * int64(f6)
   599  	f2f7_2 := int64(f2_2) * int64(f7)
   600  	f2f8_38 := int64(f2_2) * int64(f8_19)
   601  	f2f9_38 := int64(f2) * int64(f9_38)
   602  	f3f3_2 := int64(f3_2) * int64(f3)
   603  	f3f4_2 := int64(f3_2) * int64(f4)
   604  	f3f5_4 := int64(f3_2) * int64(f5_2)
   605  	f3f6_2 := int64(f3_2) * int64(f6)
   606  	f3f7_76 := int64(f3_2) * int64(f7_38)
   607  	f3f8_38 := int64(f3_2) * int64(f8_19)
   608  	f3f9_76 := int64(f3_2) * int64(f9_38)
   609  	f4f4 := int64(f4) * int64(f4)
   610  	f4f5_2 := int64(f4_2) * int64(f5)
   611  	f4f6_38 := int64(f4_2) * int64(f6_19)
   612  	f4f7_38 := int64(f4) * int64(f7_38)
   613  	f4f8_38 := int64(f4_2) * int64(f8_19)
   614  	f4f9_38 := int64(f4) * int64(f9_38)
   615  	f5f5_38 := int64(f5) * int64(f5_38)
   616  	f5f6_38 := int64(f5_2) * int64(f6_19)
   617  	f5f7_76 := int64(f5_2) * int64(f7_38)
   618  	f5f8_38 := int64(f5_2) * int64(f8_19)
   619  	f5f9_76 := int64(f5_2) * int64(f9_38)
   620  	f6f6_19 := int64(f6) * int64(f6_19)
   621  	f6f7_38 := int64(f6) * int64(f7_38)
   622  	f6f8_38 := int64(f6_2) * int64(f8_19)
   623  	f6f9_38 := int64(f6) * int64(f9_38)
   624  	f7f7_38 := int64(f7) * int64(f7_38)
   625  	f7f8_38 := int64(f7_2) * int64(f8_19)
   626  	f7f9_76 := int64(f7_2) * int64(f9_38)
   627  	f8f8_19 := int64(f8) * int64(f8_19)
   628  	f8f9_38 := int64(f8) * int64(f9_38)
   629  	f9f9_38 := int64(f9) * int64(f9_38)
   630  	h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
   631  	h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
   632  	h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
   633  	h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
   634  	h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
   635  	h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
   636  	h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
   637  	h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
   638  	h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
   639  	h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
   640  	var carry [10]int64
   641  
   642  	carry[0] = (h0 + (1 << 25)) >> 26
   643  	h1 += carry[0]
   644  	h0 -= carry[0] << 26
   645  	carry[4] = (h4 + (1 << 25)) >> 26
   646  	h5 += carry[4]
   647  	h4 -= carry[4] << 26
   648  
   649  	carry[1] = (h1 + (1 << 24)) >> 25
   650  	h2 += carry[1]
   651  	h1 -= carry[1] << 25
   652  	carry[5] = (h5 + (1 << 24)) >> 25
   653  	h6 += carry[5]
   654  	h5 -= carry[5] << 25
   655  
   656  	carry[2] = (h2 + (1 << 25)) >> 26
   657  	h3 += carry[2]
   658  	h2 -= carry[2] << 26
   659  	carry[6] = (h6 + (1 << 25)) >> 26
   660  	h7 += carry[6]
   661  	h6 -= carry[6] << 26
   662  
   663  	carry[3] = (h3 + (1 << 24)) >> 25
   664  	h4 += carry[3]
   665  	h3 -= carry[3] << 25
   666  	carry[7] = (h7 + (1 << 24)) >> 25
   667  	h8 += carry[7]
   668  	h7 -= carry[7] << 25
   669  
   670  	carry[4] = (h4 + (1 << 25)) >> 26
   671  	h5 += carry[4]
   672  	h4 -= carry[4] << 26
   673  	carry[8] = (h8 + (1 << 25)) >> 26
   674  	h9 += carry[8]
   675  	h8 -= carry[8] << 26
   676  
   677  	carry[9] = (h9 + (1 << 24)) >> 25
   678  	h0 += carry[9] * 19
   679  	h9 -= carry[9] << 25
   680  
   681  	carry[0] = (h0 + (1 << 25)) >> 26
   682  	h1 += carry[0]
   683  	h0 -= carry[0] << 26
   684  
   685  	h[0] = int32(h0)
   686  	h[1] = int32(h1)
   687  	h[2] = int32(h2)
   688  	h[3] = int32(h3)
   689  	h[4] = int32(h4)
   690  	h[5] = int32(h5)
   691  	h[6] = int32(h6)
   692  	h[7] = int32(h7)
   693  	h[8] = int32(h8)
   694  	h[9] = int32(h9)
   695  }
   696  
   697  // FeSquare2 sets h = 2 * f * f
   698  //
   699  // Can overlap h with f.
   700  //
   701  // Preconditions:
   702  //    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
   703  //
   704  // Postconditions:
   705  //    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
   706  // See fe_mul.c for discussion of implementation strategy.
   707  func FeSquare2(h, f *FieldElement) {
   708  	f0 := f[0]
   709  	f1 := f[1]
   710  	f2 := f[2]
   711  	f3 := f[3]
   712  	f4 := f[4]
   713  	f5 := f[5]
   714  	f6 := f[6]
   715  	f7 := f[7]
   716  	f8 := f[8]
   717  	f9 := f[9]
   718  	f0_2 := 2 * f0
   719  	f1_2 := 2 * f1
   720  	f2_2 := 2 * f2
   721  	f3_2 := 2 * f3
   722  	f4_2 := 2 * f4
   723  	f5_2 := 2 * f5
   724  	f6_2 := 2 * f6
   725  	f7_2 := 2 * f7
   726  	f5_38 := 38 * f5 // 1.959375*2^30
   727  	f6_19 := 19 * f6 // 1.959375*2^30
   728  	f7_38 := 38 * f7 // 1.959375*2^30
   729  	f8_19 := 19 * f8 // 1.959375*2^30
   730  	f9_38 := 38 * f9 // 1.959375*2^30
   731  	f0f0 := int64(f0) * int64(f0)
   732  	f0f1_2 := int64(f0_2) * int64(f1)
   733  	f0f2_2 := int64(f0_2) * int64(f2)
   734  	f0f3_2 := int64(f0_2) * int64(f3)
   735  	f0f4_2 := int64(f0_2) * int64(f4)
   736  	f0f5_2 := int64(f0_2) * int64(f5)
   737  	f0f6_2 := int64(f0_2) * int64(f6)
   738  	f0f7_2 := int64(f0_2) * int64(f7)
   739  	f0f8_2 := int64(f0_2) * int64(f8)
   740  	f0f9_2 := int64(f0_2) * int64(f9)
   741  	f1f1_2 := int64(f1_2) * int64(f1)
   742  	f1f2_2 := int64(f1_2) * int64(f2)
   743  	f1f3_4 := int64(f1_2) * int64(f3_2)
   744  	f1f4_2 := int64(f1_2) * int64(f4)
   745  	f1f5_4 := int64(f1_2) * int64(f5_2)
   746  	f1f6_2 := int64(f1_2) * int64(f6)
   747  	f1f7_4 := int64(f1_2) * int64(f7_2)
   748  	f1f8_2 := int64(f1_2) * int64(f8)
   749  	f1f9_76 := int64(f1_2) * int64(f9_38)
   750  	f2f2 := int64(f2) * int64(f2)
   751  	f2f3_2 := int64(f2_2) * int64(f3)
   752  	f2f4_2 := int64(f2_2) * int64(f4)
   753  	f2f5_2 := int64(f2_2) * int64(f5)
   754  	f2f6_2 := int64(f2_2) * int64(f6)
   755  	f2f7_2 := int64(f2_2) * int64(f7)
   756  	f2f8_38 := int64(f2_2) * int64(f8_19)
   757  	f2f9_38 := int64(f2) * int64(f9_38)
   758  	f3f3_2 := int64(f3_2) * int64(f3)
   759  	f3f4_2 := int64(f3_2) * int64(f4)
   760  	f3f5_4 := int64(f3_2) * int64(f5_2)
   761  	f3f6_2 := int64(f3_2) * int64(f6)
   762  	f3f7_76 := int64(f3_2) * int64(f7_38)
   763  	f3f8_38 := int64(f3_2) * int64(f8_19)
   764  	f3f9_76 := int64(f3_2) * int64(f9_38)
   765  	f4f4 := int64(f4) * int64(f4)
   766  	f4f5_2 := int64(f4_2) * int64(f5)
   767  	f4f6_38 := int64(f4_2) * int64(f6_19)
   768  	f4f7_38 := int64(f4) * int64(f7_38)
   769  	f4f8_38 := int64(f4_2) * int64(f8_19)
   770  	f4f9_38 := int64(f4) * int64(f9_38)
   771  	f5f5_38 := int64(f5) * int64(f5_38)
   772  	f5f6_38 := int64(f5_2) * int64(f6_19)
   773  	f5f7_76 := int64(f5_2) * int64(f7_38)
   774  	f5f8_38 := int64(f5_2) * int64(f8_19)
   775  	f5f9_76 := int64(f5_2) * int64(f9_38)
   776  	f6f6_19 := int64(f6) * int64(f6_19)
   777  	f6f7_38 := int64(f6) * int64(f7_38)
   778  	f6f8_38 := int64(f6_2) * int64(f8_19)
   779  	f6f9_38 := int64(f6) * int64(f9_38)
   780  	f7f7_38 := int64(f7) * int64(f7_38)
   781  	f7f8_38 := int64(f7_2) * int64(f8_19)
   782  	f7f9_76 := int64(f7_2) * int64(f9_38)
   783  	f8f8_19 := int64(f8) * int64(f8_19)
   784  	f8f9_38 := int64(f8) * int64(f9_38)
   785  	f9f9_38 := int64(f9) * int64(f9_38)
   786  	h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
   787  	h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
   788  	h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
   789  	h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
   790  	h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
   791  	h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
   792  	h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
   793  	h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
   794  	h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
   795  	h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
   796  	var carry [10]int64
   797  
   798  	h0 += h0
   799  	h1 += h1
   800  	h2 += h2
   801  	h3 += h3
   802  	h4 += h4
   803  	h5 += h5
   804  	h6 += h6
   805  	h7 += h7
   806  	h8 += h8
   807  	h9 += h9
   808  
   809  	carry[0] = (h0 + (1 << 25)) >> 26
   810  	h1 += carry[0]
   811  	h0 -= carry[0] << 26
   812  	carry[4] = (h4 + (1 << 25)) >> 26
   813  	h5 += carry[4]
   814  	h4 -= carry[4] << 26
   815  
   816  	carry[1] = (h1 + (1 << 24)) >> 25
   817  	h2 += carry[1]
   818  	h1 -= carry[1] << 25
   819  	carry[5] = (h5 + (1 << 24)) >> 25
   820  	h6 += carry[5]
   821  	h5 -= carry[5] << 25
   822  
   823  	carry[2] = (h2 + (1 << 25)) >> 26
   824  	h3 += carry[2]
   825  	h2 -= carry[2] << 26
   826  	carry[6] = (h6 + (1 << 25)) >> 26
   827  	h7 += carry[6]
   828  	h6 -= carry[6] << 26
   829  
   830  	carry[3] = (h3 + (1 << 24)) >> 25
   831  	h4 += carry[3]
   832  	h3 -= carry[3] << 25
   833  	carry[7] = (h7 + (1 << 24)) >> 25
   834  	h8 += carry[7]
   835  	h7 -= carry[7] << 25
   836  
   837  	carry[4] = (h4 + (1 << 25)) >> 26
   838  	h5 += carry[4]
   839  	h4 -= carry[4] << 26
   840  	carry[8] = (h8 + (1 << 25)) >> 26
   841  	h9 += carry[8]
   842  	h8 -= carry[8] << 26
   843  
   844  	carry[9] = (h9 + (1 << 24)) >> 25
   845  	h0 += carry[9] * 19
   846  	h9 -= carry[9] << 25
   847  
   848  	carry[0] = (h0 + (1 << 25)) >> 26
   849  	h1 += carry[0]
   850  	h0 -= carry[0] << 26
   851  
   852  	h[0] = int32(h0)
   853  	h[1] = int32(h1)
   854  	h[2] = int32(h2)
   855  	h[3] = int32(h3)
   856  	h[4] = int32(h4)
   857  	h[5] = int32(h5)
   858  	h[6] = int32(h6)
   859  	h[7] = int32(h7)
   860  	h[8] = int32(h8)
   861  	h[9] = int32(h9)
   862  }
   863  
   864  func FeInvert(out, z *FieldElement) {
   865  	var t0, t1, t2, t3 FieldElement
   866  	var i int
   867  
   868  	FeSquare(&t0, z)        // 2^1
   869  	FeSquare(&t1, &t0)      // 2^2
   870  	for i = 1; i < 2; i++ { // 2^3
   871  		FeSquare(&t1, &t1)
   872  	}
   873  	FeMul(&t1, z, &t1)      // 2^3 + 2^0
   874  	FeMul(&t0, &t0, &t1)    // 2^3 + 2^1 + 2^0
   875  	FeSquare(&t2, &t0)      // 2^4 + 2^2 + 2^1
   876  	FeMul(&t1, &t1, &t2)    // 2^4 + 2^3 + 2^2 + 2^1 + 2^0
   877  	FeSquare(&t2, &t1)      // 5,4,3,2,1
   878  	for i = 1; i < 5; i++ { // 9,8,7,6,5
   879  		FeSquare(&t2, &t2)
   880  	}
   881  	FeMul(&t1, &t2, &t1)     // 9,8,7,6,5,4,3,2,1,0
   882  	FeSquare(&t2, &t1)       // 10..1
   883  	for i = 1; i < 10; i++ { // 19..10
   884  		FeSquare(&t2, &t2)
   885  	}
   886  	FeMul(&t2, &t2, &t1)     // 19..0
   887  	FeSquare(&t3, &t2)       // 20..1
   888  	for i = 1; i < 20; i++ { // 39..20
   889  		FeSquare(&t3, &t3)
   890  	}
   891  	FeMul(&t2, &t3, &t2)     // 39..0
   892  	FeSquare(&t2, &t2)       // 40..1
   893  	for i = 1; i < 10; i++ { // 49..10
   894  		FeSquare(&t2, &t2)
   895  	}
   896  	FeMul(&t1, &t2, &t1)     // 49..0
   897  	FeSquare(&t2, &t1)       // 50..1
   898  	for i = 1; i < 50; i++ { // 99..50
   899  		FeSquare(&t2, &t2)
   900  	}
   901  	FeMul(&t2, &t2, &t1)      // 99..0
   902  	FeSquare(&t3, &t2)        // 100..1
   903  	for i = 1; i < 100; i++ { // 199..100
   904  		FeSquare(&t3, &t3)
   905  	}
   906  	FeMul(&t2, &t3, &t2)     // 199..0
   907  	FeSquare(&t2, &t2)       // 200..1
   908  	for i = 1; i < 50; i++ { // 249..50
   909  		FeSquare(&t2, &t2)
   910  	}
   911  	FeMul(&t1, &t2, &t1)    // 249..0
   912  	FeSquare(&t1, &t1)      // 250..1
   913  	for i = 1; i < 5; i++ { // 254..5
   914  		FeSquare(&t1, &t1)
   915  	}
   916  	FeMul(out, &t1, &t0) // 254..5,3,1,0
   917  }
   918  
   919  func fePow22523(out, z *FieldElement) {
   920  	var t0, t1, t2 FieldElement
   921  	var i int
   922  
   923  	FeSquare(&t0, z)
   924  	for i = 1; i < 1; i++ {
   925  		FeSquare(&t0, &t0)
   926  	}
   927  	FeSquare(&t1, &t0)
   928  	for i = 1; i < 2; i++ {
   929  		FeSquare(&t1, &t1)
   930  	}
   931  	FeMul(&t1, z, &t1)
   932  	FeMul(&t0, &t0, &t1)
   933  	FeSquare(&t0, &t0)
   934  	for i = 1; i < 1; i++ {
   935  		FeSquare(&t0, &t0)
   936  	}
   937  	FeMul(&t0, &t1, &t0)
   938  	FeSquare(&t1, &t0)
   939  	for i = 1; i < 5; i++ {
   940  		FeSquare(&t1, &t1)
   941  	}
   942  	FeMul(&t0, &t1, &t0)
   943  	FeSquare(&t1, &t0)
   944  	for i = 1; i < 10; i++ {
   945  		FeSquare(&t1, &t1)
   946  	}
   947  	FeMul(&t1, &t1, &t0)
   948  	FeSquare(&t2, &t1)
   949  	for i = 1; i < 20; i++ {
   950  		FeSquare(&t2, &t2)
   951  	}
   952  	FeMul(&t1, &t2, &t1)
   953  	FeSquare(&t1, &t1)
   954  	for i = 1; i < 10; i++ {
   955  		FeSquare(&t1, &t1)
   956  	}
   957  	FeMul(&t0, &t1, &t0)
   958  	FeSquare(&t1, &t0)
   959  	for i = 1; i < 50; i++ {
   960  		FeSquare(&t1, &t1)
   961  	}
   962  	FeMul(&t1, &t1, &t0)
   963  	FeSquare(&t2, &t1)
   964  	for i = 1; i < 100; i++ {
   965  		FeSquare(&t2, &t2)
   966  	}
   967  	FeMul(&t1, &t2, &t1)
   968  	FeSquare(&t1, &t1)
   969  	for i = 1; i < 50; i++ {
   970  		FeSquare(&t1, &t1)
   971  	}
   972  	FeMul(&t0, &t1, &t0)
   973  	FeSquare(&t0, &t0)
   974  	for i = 1; i < 2; i++ {
   975  		FeSquare(&t0, &t0)
   976  	}
   977  	FeMul(out, &t0, z)
   978  }
   979  
   980  // Group elements are members of the elliptic curve -x^2 + y^2 = 1 + d * x^2 *
   981  // y^2 where d = -121665/121666.
   982  //
   983  // Several representations are used:
   984  //   ProjectiveGroupElement: (X:Y:Z) satisfying x=X/Z, y=Y/Z
   985  //   ExtendedGroupElement: (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
   986  //   CompletedGroupElement: ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
   987  //   PreComputedGroupElement: (y+x,y-x,2dxy)
   988  
   989  type ProjectiveGroupElement struct {
   990  	X, Y, Z FieldElement
   991  }
   992  
   993  type ExtendedGroupElement struct {
   994  	X, Y, Z, T FieldElement
   995  }
   996  
   997  type CompletedGroupElement struct {
   998  	X, Y, Z, T FieldElement
   999  }
  1000  
  1001  type PreComputedGroupElement struct {
  1002  	yPlusX, yMinusX, xy2d FieldElement
  1003  }
  1004  
  1005  type CachedGroupElement struct {
  1006  	yPlusX, yMinusX, Z, T2d FieldElement
  1007  }
  1008  
  1009  func (p *ProjectiveGroupElement) Zero() {
  1010  	FeZero(&p.X)
  1011  	FeOne(&p.Y)
  1012  	FeOne(&p.Z)
  1013  }
  1014  
  1015  func (p *ProjectiveGroupElement) Double(r *CompletedGroupElement) {
  1016  	var t0 FieldElement
  1017  
  1018  	FeSquare(&r.X, &p.X)
  1019  	FeSquare(&r.Z, &p.Y)
  1020  	FeSquare2(&r.T, &p.Z)
  1021  	FeAdd(&r.Y, &p.X, &p.Y)
  1022  	FeSquare(&t0, &r.Y)
  1023  	FeAdd(&r.Y, &r.Z, &r.X)
  1024  	FeSub(&r.Z, &r.Z, &r.X)
  1025  	FeSub(&r.X, &t0, &r.Y)
  1026  	FeSub(&r.T, &r.T, &r.Z)
  1027  }
  1028  
  1029  func (p *ProjectiveGroupElement) ToExtended(s *ExtendedGroupElement) {
  1030  	var b [32]byte
  1031  	p.ToBytes(&b)
  1032  	s.FromBytes(&b)
  1033  }
  1034  
  1035  func (p *ProjectiveGroupElement) ToBytes(s *[32]byte) {
  1036  	var recip, x, y FieldElement
  1037  
  1038  	FeInvert(&recip, &p.Z)
  1039  	FeMul(&x, &p.X, &recip)
  1040  	FeMul(&y, &p.Y, &recip)
  1041  	FeToBytes(s, &y)
  1042  	s[31] ^= FeIsNegative(&x) << 7
  1043  }
  1044  
  1045  func (p *ExtendedGroupElement) Zero() {
  1046  	FeZero(&p.X)
  1047  	FeOne(&p.Y)
  1048  	FeOne(&p.Z)
  1049  	FeZero(&p.T)
  1050  }
  1051  
  1052  func (p *ExtendedGroupElement) Double(r *CompletedGroupElement) {
  1053  	var q ProjectiveGroupElement
  1054  	p.ToProjective(&q)
  1055  	q.Double(r)
  1056  }
  1057  
  1058  func GeDouble(r, p *ExtendedGroupElement) {
  1059  	var q ProjectiveGroupElement
  1060  	p.ToProjective(&q)
  1061  	var rco CompletedGroupElement
  1062  	q.Double(&rco)
  1063  	rco.ToExtended(r)
  1064  }
  1065  
  1066  func (p *ExtendedGroupElement) ToCached(r *CachedGroupElement) {
  1067  	FeAdd(&r.yPlusX, &p.Y, &p.X)
  1068  	FeSub(&r.yMinusX, &p.Y, &p.X)
  1069  	FeCopy(&r.Z, &p.Z)
  1070  	FeMul(&r.T2d, &p.T, &d2)
  1071  }
  1072  
  1073  func (p *ExtendedGroupElement) ToProjective(r *ProjectiveGroupElement) {
  1074  	FeCopy(&r.X, &p.X)
  1075  	FeCopy(&r.Y, &p.Y)
  1076  	FeCopy(&r.Z, &p.Z)
  1077  }
  1078  
  1079  func (p *ExtendedGroupElement) ToBytes(s *[32]byte) {
  1080  	var recip, x, y FieldElement
  1081  
  1082  	FeInvert(&recip, &p.Z)
  1083  	FeMul(&x, &p.X, &recip)
  1084  	FeMul(&y, &p.Y, &recip)
  1085  	FeToBytes(s, &y)
  1086  	s[31] ^= FeIsNegative(&x) << 7
  1087  }
  1088  
  1089  // FromBytesBaseGroup unmarshals an elliptic curve point returns true iff the
  1090  // point point is in the order l subgroup generated by the base point. This
  1091  // implementation is based on
  1092  // https://www.iacr.org/archive/pkc2003/25670211/25670211.pdf Definition 1.
  1093  // Validation of an elliptic curve public key P ensures that P is a point of
  1094  // order BasePointOrder in E.
  1095  func (p *ExtendedGroupElement) FromBytesBaseGroup(s *[32]byte) bool {
  1096  
  1097  	// condition 1: P != infinity
  1098  	if *s == [32]byte{1} {
  1099  		return false
  1100  	}
  1101  
  1102  	// condition 3 is implied by successful point decompression
  1103  	if !p.FromBytes(s) {
  1104  		return false
  1105  	}
  1106  	// condition 2: ToBytes produces canonical encodings, check against that
  1107  	var sCheck [32]byte
  1108  	p.ToBytes(&sCheck)
  1109  	if sCheck != *s {
  1110  		return false
  1111  	}
  1112  
  1113  	// condition 4: order * P == infinity
  1114  	var out ExtendedGroupElement
  1115  	GeScalarMult(&out, &BasePointOrder, p)
  1116  	out.ToBytes(&sCheck)
  1117  	if sCheck != [32]byte{1} {
  1118  		return false
  1119  	}
  1120  
  1121  	// an array of all zeros would not result from any randomly generated point
  1122  	// and might easily be caused by bugs in ToBytes or FromBytes
  1123  	if *s == [32]byte{} {
  1124  		return false
  1125  	}
  1126  	return true
  1127  }
  1128  
  1129  func (p *ExtendedGroupElement) FromBytes(s *[32]byte) bool {
  1130  	FeFromBytes(&p.Y, s)
  1131  	return p.FromParityAndY(s[31]>>7, &p.Y)
  1132  }
  1133  
  1134  func (p *ExtendedGroupElement) FromParityAndY(bit byte, y *FieldElement) bool {
  1135  	var u, v, v3, vxx, check FieldElement
  1136  
  1137  	FeCopy(&p.Y, y)
  1138  	FeOne(&p.Z)
  1139  	FeSquare(&u, &p.Y)
  1140  	FeMul(&v, &u, &d)
  1141  	FeSub(&u, &u, &p.Z) // y = y^2-1
  1142  	FeAdd(&v, &v, &p.Z) // v = dy^2+1
  1143  
  1144  	FeSquare(&v3, &v)
  1145  	FeMul(&v3, &v3, &v) // v3 = v^3
  1146  	FeSquare(&p.X, &v3)
  1147  	FeMul(&p.X, &p.X, &v)
  1148  	FeMul(&p.X, &p.X, &u) // x = uv^7
  1149  
  1150  	fePow22523(&p.X, &p.X) // x = (uv^7)^((q-5)/8)
  1151  	FeMul(&p.X, &p.X, &v3)
  1152  	FeMul(&p.X, &p.X, &u) // x = uv^3(uv^7)^((q-5)/8)
  1153  
  1154  	var tmpX, tmp2 [32]byte
  1155  
  1156  	FeSquare(&vxx, &p.X)
  1157  	FeMul(&vxx, &vxx, &v)
  1158  	FeSub(&check, &vxx, &u) // vx^2-u
  1159  	if FeIsNonZero(&check) == 1 {
  1160  		FeAdd(&check, &vxx, &u) // vx^2+u
  1161  		if FeIsNonZero(&check) == 1 {
  1162  			return false
  1163  		}
  1164  		FeMul(&p.X, &p.X, &SqrtM1)
  1165  
  1166  		FeToBytes(&tmpX, &p.X)
  1167  		for i, v := range tmpX {
  1168  			tmp2[31-i] = v
  1169  		}
  1170  	}
  1171  
  1172  	if FeIsNegative(&p.X) != bit {
  1173  		FeNeg(&p.X, &p.X)
  1174  	}
  1175  
  1176  	FeMul(&p.T, &p.X, &p.Y)
  1177  	return true
  1178  }
  1179  
  1180  func (p *CompletedGroupElement) ToProjective(r *ProjectiveGroupElement) {
  1181  	FeMul(&r.X, &p.X, &p.T)
  1182  	FeMul(&r.Y, &p.Y, &p.Z)
  1183  	FeMul(&r.Z, &p.Z, &p.T)
  1184  }
  1185  
  1186  func (p *CompletedGroupElement) ToExtended(r *ExtendedGroupElement) {
  1187  	FeMul(&r.X, &p.X, &p.T)
  1188  	FeMul(&r.Y, &p.Y, &p.Z)
  1189  	FeMul(&r.Z, &p.Z, &p.T)
  1190  	FeMul(&r.T, &p.X, &p.Y)
  1191  }
  1192  
  1193  func (p *PreComputedGroupElement) Zero() {
  1194  	FeOne(&p.yPlusX)
  1195  	FeOne(&p.yMinusX)
  1196  	FeZero(&p.xy2d)
  1197  }
  1198  
  1199  func geAdd(r *CompletedGroupElement, p *ExtendedGroupElement, q *CachedGroupElement) {
  1200  	var t0 FieldElement
  1201  
  1202  	FeAdd(&r.X, &p.Y, &p.X)
  1203  	FeSub(&r.Y, &p.Y, &p.X)
  1204  	FeMul(&r.Z, &r.X, &q.yPlusX)
  1205  	FeMul(&r.Y, &r.Y, &q.yMinusX)
  1206  	FeMul(&r.T, &q.T2d, &p.T)
  1207  	FeMul(&r.X, &p.Z, &q.Z)
  1208  	FeAdd(&t0, &r.X, &r.X)
  1209  	FeSub(&r.X, &r.Z, &r.Y)
  1210  	FeAdd(&r.Y, &r.Z, &r.Y)
  1211  	FeAdd(&r.Z, &t0, &r.T)
  1212  	FeSub(&r.T, &t0, &r.T)
  1213  }
  1214  
  1215  func geSub(r *CompletedGroupElement, p *ExtendedGroupElement, q *CachedGroupElement) {
  1216  	var t0 FieldElement
  1217  
  1218  	FeAdd(&r.X, &p.Y, &p.X)
  1219  	FeSub(&r.Y, &p.Y, &p.X)
  1220  	FeMul(&r.Z, &r.X, &q.yMinusX)
  1221  	FeMul(&r.Y, &r.Y, &q.yPlusX)
  1222  	FeMul(&r.T, &q.T2d, &p.T)
  1223  	FeMul(&r.X, &p.Z, &q.Z)
  1224  	FeAdd(&t0, &r.X, &r.X)
  1225  	FeSub(&r.X, &r.Z, &r.Y)
  1226  	FeAdd(&r.Y, &r.Z, &r.Y)
  1227  	FeSub(&r.Z, &t0, &r.T)
  1228  	FeAdd(&r.T, &t0, &r.T)
  1229  }
  1230  
  1231  func geMixedAdd(r *CompletedGroupElement, p *ExtendedGroupElement, q *PreComputedGroupElement) {
  1232  	var t0 FieldElement
  1233  
  1234  	FeAdd(&r.X, &p.Y, &p.X)
  1235  	FeSub(&r.Y, &p.Y, &p.X)
  1236  	FeMul(&r.Z, &r.X, &q.yPlusX)
  1237  	FeMul(&r.Y, &r.Y, &q.yMinusX)
  1238  	FeMul(&r.T, &q.xy2d, &p.T)
  1239  	FeAdd(&t0, &p.Z, &p.Z)
  1240  	FeSub(&r.X, &r.Z, &r.Y)
  1241  	FeAdd(&r.Y, &r.Z, &r.Y)
  1242  	FeAdd(&r.Z, &t0, &r.T)
  1243  	FeSub(&r.T, &t0, &r.T)
  1244  }
  1245  
  1246  func geMixedSub(r *CompletedGroupElement, p *ExtendedGroupElement, q *PreComputedGroupElement) {
  1247  	var t0 FieldElement
  1248  
  1249  	FeAdd(&r.X, &p.Y, &p.X)
  1250  	FeSub(&r.Y, &p.Y, &p.X)
  1251  	FeMul(&r.Z, &r.X, &q.yMinusX)
  1252  	FeMul(&r.Y, &r.Y, &q.yPlusX)
  1253  	FeMul(&r.T, &q.xy2d, &p.T)
  1254  	FeAdd(&t0, &p.Z, &p.Z)
  1255  	FeSub(&r.X, &r.Z, &r.Y)
  1256  	FeAdd(&r.Y, &r.Z, &r.Y)
  1257  	FeSub(&r.Z, &t0, &r.T)
  1258  	FeAdd(&r.T, &t0, &r.T)
  1259  }
  1260  
  1261  func slide(r *[256]int8, a *[32]byte) {
  1262  	for i := range r {
  1263  		r[i] = int8(1 & (a[i>>3] >> uint(i&7)))
  1264  	}
  1265  
  1266  	for i := range r {
  1267  		if r[i] != 0 {
  1268  			for b := 1; b <= 6 && i+b < 256; b++ {
  1269  				if r[i+b] != 0 {
  1270  					if r[i]+(r[i+b]<<uint(b)) <= 15 {
  1271  						r[i] += r[i+b] << uint(b)
  1272  						r[i+b] = 0
  1273  					} else if r[i]-(r[i+b]<<uint(b)) >= -15 {
  1274  						r[i] -= r[i+b] << uint(b)
  1275  						for k := i + b; k < 256; k++ {
  1276  							if r[k] == 0 {
  1277  								r[k] = 1
  1278  								break
  1279  							}
  1280  							r[k] = 0
  1281  						}
  1282  					} else {
  1283  						break
  1284  					}
  1285  				}
  1286  			}
  1287  		}
  1288  	}
  1289  }
  1290  
  1291  // GeAdd sets r = a+b. r may overlaop with a and b.
  1292  func GeAdd(r, a, b *ExtendedGroupElement) {
  1293  	var bca CachedGroupElement
  1294  	b.ToCached(&bca)
  1295  	var rc CompletedGroupElement
  1296  	geAdd(&rc, a, &bca)
  1297  	rc.ToExtended(r)
  1298  }
  1299  
  1300  func ExtendedGroupElementCopy(t, u *ExtendedGroupElement) {
  1301  	FeCopy(&t.X, &u.X)
  1302  	FeCopy(&t.Y, &u.Y)
  1303  	FeCopy(&t.Z, &u.Z)
  1304  	FeCopy(&t.T, &u.T)
  1305  }
  1306  
  1307  func ExtendedGroupElementCMove(t, u *ExtendedGroupElement, b int32) {
  1308  	FeCMove(&t.X, &u.X, b)
  1309  	FeCMove(&t.Y, &u.Y, b)
  1310  	FeCMove(&t.Z, &u.Z, b)
  1311  	FeCMove(&t.T, &u.T, b)
  1312  }
  1313  
  1314  // GeScalarMult sets r = a*A
  1315  // where a = a[0]+256*a[1]+...+256^31 a[31].
  1316  func GeScalarMult(r *ExtendedGroupElement, a *[32]byte, A *ExtendedGroupElement) {
  1317  	var p, q ExtendedGroupElement
  1318  	q.Zero()
  1319  	ExtendedGroupElementCopy(&p, A)
  1320  	for i := uint(0); i < 256; i++ {
  1321  		bit := int32(a[i>>3]>>(i&7)) & 1
  1322  		var t ExtendedGroupElement
  1323  		GeAdd(&t, &q, &p)
  1324  		ExtendedGroupElementCMove(&q, &t, bit)
  1325  		GeDouble(&p, &p)
  1326  	}
  1327  	ExtendedGroupElementCopy(r, &q)
  1328  }
  1329  
  1330  // GeDoubleScalarMultVartime sets r = a*A + b*B
  1331  // where a = a[0]+256*a[1]+...+256^31 a[31].
  1332  // and b = b[0]+256*b[1]+...+256^31 b[31].
  1333  // B is the Ed25519 base point (x,4/5) with x positive.
  1334  func GeDoubleScalarMultVartime(r *ProjectiveGroupElement, a *[32]byte, A *ExtendedGroupElement, b *[32]byte) {
  1335  	var aSlide, bSlide [256]int8
  1336  	var Ai [8]CachedGroupElement // A,3A,5A,7A,9A,11A,13A,15A
  1337  	var t CompletedGroupElement
  1338  	var u, A2 ExtendedGroupElement
  1339  	var i int
  1340  
  1341  	slide(&aSlide, a)
  1342  	slide(&bSlide, b)
  1343  
  1344  	A.ToCached(&Ai[0])
  1345  	A.Double(&t)
  1346  	t.ToExtended(&A2)
  1347  
  1348  	for i := 0; i < 7; i++ {
  1349  		geAdd(&t, &A2, &Ai[i])
  1350  		t.ToExtended(&u)
  1351  		u.ToCached(&Ai[i+1])
  1352  	}
  1353  
  1354  	r.Zero()
  1355  
  1356  	for i = 255; i >= 0; i-- {
  1357  		if aSlide[i] != 0 || bSlide[i] != 0 {
  1358  			break
  1359  		}
  1360  	}
  1361  
  1362  	for ; i >= 0; i-- {
  1363  		r.Double(&t)
  1364  
  1365  		if aSlide[i] > 0 {
  1366  			t.ToExtended(&u)
  1367  			geAdd(&t, &u, &Ai[aSlide[i]/2])
  1368  		} else if aSlide[i] < 0 {
  1369  			t.ToExtended(&u)
  1370  			geSub(&t, &u, &Ai[(-aSlide[i])/2])
  1371  		}
  1372  
  1373  		if bSlide[i] > 0 {
  1374  			t.ToExtended(&u)
  1375  			geMixedAdd(&t, &u, &bi[bSlide[i]/2])
  1376  		} else if bSlide[i] < 0 {
  1377  			t.ToExtended(&u)
  1378  			geMixedSub(&t, &u, &bi[(-bSlide[i])/2])
  1379  		}
  1380  
  1381  		t.ToProjective(r)
  1382  	}
  1383  }
  1384  
  1385  // equal returns 1 if b == c and 0 otherwise.
  1386  func equal(b, c int32) int32 {
  1387  	x := uint32(b ^ c)
  1388  	x--
  1389  	return int32(x >> 31)
  1390  }
  1391  
  1392  // negative returns 1 if b < 0 and 0 otherwise.
  1393  func negative(b int32) int32 {
  1394  	return (b >> 31) & 1
  1395  }
  1396  
  1397  func PreComputedGroupElementCMove(t, u *PreComputedGroupElement, b int32) {
  1398  	FeCMove(&t.yPlusX, &u.yPlusX, b)
  1399  	FeCMove(&t.yMinusX, &u.yMinusX, b)
  1400  	FeCMove(&t.xy2d, &u.xy2d, b)
  1401  }
  1402  
  1403  func selectPoint(t *PreComputedGroupElement, pos int32, b int32) {
  1404  	var minusT PreComputedGroupElement
  1405  	bNegative := negative(b)
  1406  	bAbs := b - (((-bNegative) & b) << 1)
  1407  
  1408  	t.Zero()
  1409  	for i := int32(0); i < 8; i++ {
  1410  		PreComputedGroupElementCMove(t, &base[pos][i], equal(bAbs, i+1))
  1411  	}
  1412  	FeCopy(&minusT.yPlusX, &t.yMinusX)
  1413  	FeCopy(&minusT.yMinusX, &t.yPlusX)
  1414  	FeNeg(&minusT.xy2d, &t.xy2d)
  1415  	PreComputedGroupElementCMove(t, &minusT, bNegative)
  1416  }
  1417  
  1418  // GeScalarMultBase computes h = a*B, where
  1419  //   a = a[0]+256*a[1]+...+256^31 a[31]
  1420  //   B is the Ed25519 base point (x,4/5) with x positive.
  1421  //
  1422  // Preconditions:
  1423  //   a[31] <= 127
  1424  func GeScalarMultBase(h *ExtendedGroupElement, a *[32]byte) {
  1425  	var e [64]int8
  1426  
  1427  	for i, v := range a {
  1428  		e[2*i] = int8(v & 15)
  1429  		e[2*i+1] = int8((v >> 4) & 15)
  1430  	}
  1431  
  1432  	// each e[i] is between 0 and 15 and e[63] is between 0 and 7.
  1433  
  1434  	carry := int8(0)
  1435  	for i := 0; i < 63; i++ {
  1436  		e[i] += carry
  1437  		carry = (e[i] + 8) >> 4
  1438  		e[i] -= carry << 4
  1439  	}
  1440  	e[63] += carry
  1441  	// each e[i] is between -8 and 8.
  1442  
  1443  	h.Zero()
  1444  	var t PreComputedGroupElement
  1445  	var r CompletedGroupElement
  1446  	for i := int32(1); i < 64; i += 2 {
  1447  		selectPoint(&t, i/2, int32(e[i]))
  1448  		geMixedAdd(&r, h, &t)
  1449  		r.ToExtended(h)
  1450  	}
  1451  
  1452  	var s ProjectiveGroupElement
  1453  
  1454  	h.Double(&r)
  1455  	r.ToProjective(&s)
  1456  	s.Double(&r)
  1457  	r.ToProjective(&s)
  1458  	s.Double(&r)
  1459  	r.ToProjective(&s)
  1460  	s.Double(&r)
  1461  	r.ToExtended(h)
  1462  
  1463  	for i := int32(0); i < 64; i += 2 {
  1464  		selectPoint(&t, i/2, int32(e[i]))
  1465  		geMixedAdd(&r, h, &t)
  1466  		r.ToExtended(h)
  1467  	}
  1468  }
  1469  
  1470  // The scalars are GF(2^252 + 27742317777372353535851937790883648493).
  1471  
  1472  // Input:
  1473  //   a[0]+256*a[1]+...+256^31*a[31] = a
  1474  //   b[0]+256*b[1]+...+256^31*b[31] = b
  1475  //   c[0]+256*c[1]+...+256^31*c[31] = c
  1476  //
  1477  // Output:
  1478  //   s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
  1479  //   where l = 2^252 + 27742317777372353535851937790883648493.
  1480  func ScMulAdd(s, a, b, c *[32]byte) {
  1481  	a0 := 2097151 & load3(a[:])
  1482  	a1 := 2097151 & (load4(a[2:]) >> 5)
  1483  	a2 := 2097151 & (load3(a[5:]) >> 2)
  1484  	a3 := 2097151 & (load4(a[7:]) >> 7)
  1485  	a4 := 2097151 & (load4(a[10:]) >> 4)
  1486  	a5 := 2097151 & (load3(a[13:]) >> 1)
  1487  	a6 := 2097151 & (load4(a[15:]) >> 6)
  1488  	a7 := 2097151 & (load3(a[18:]) >> 3)
  1489  	a8 := 2097151 & load3(a[21:])
  1490  	a9 := 2097151 & (load4(a[23:]) >> 5)
  1491  	a10 := 2097151 & (load3(a[26:]) >> 2)
  1492  	a11 := (load4(a[28:]) >> 7)
  1493  	b0 := 2097151 & load3(b[:])
  1494  	b1 := 2097151 & (load4(b[2:]) >> 5)
  1495  	b2 := 2097151 & (load3(b[5:]) >> 2)
  1496  	b3 := 2097151 & (load4(b[7:]) >> 7)
  1497  	b4 := 2097151 & (load4(b[10:]) >> 4)
  1498  	b5 := 2097151 & (load3(b[13:]) >> 1)
  1499  	b6 := 2097151 & (load4(b[15:]) >> 6)
  1500  	b7 := 2097151 & (load3(b[18:]) >> 3)
  1501  	b8 := 2097151 & load3(b[21:])
  1502  	b9 := 2097151 & (load4(b[23:]) >> 5)
  1503  	b10 := 2097151 & (load3(b[26:]) >> 2)
  1504  	b11 := (load4(b[28:]) >> 7)
  1505  	c0 := 2097151 & load3(c[:])
  1506  	c1 := 2097151 & (load4(c[2:]) >> 5)
  1507  	c2 := 2097151 & (load3(c[5:]) >> 2)
  1508  	c3 := 2097151 & (load4(c[7:]) >> 7)
  1509  	c4 := 2097151 & (load4(c[10:]) >> 4)
  1510  	c5 := 2097151 & (load3(c[13:]) >> 1)
  1511  	c6 := 2097151 & (load4(c[15:]) >> 6)
  1512  	c7 := 2097151 & (load3(c[18:]) >> 3)
  1513  	c8 := 2097151 & load3(c[21:])
  1514  	c9 := 2097151 & (load4(c[23:]) >> 5)
  1515  	c10 := 2097151 & (load3(c[26:]) >> 2)
  1516  	c11 := (load4(c[28:]) >> 7)
  1517  	var carry [23]int64
  1518  
  1519  	s0 := c0 + a0*b0
  1520  	s1 := c1 + a0*b1 + a1*b0
  1521  	s2 := c2 + a0*b2 + a1*b1 + a2*b0
  1522  	s3 := c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0
  1523  	s4 := c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0
  1524  	s5 := c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0
  1525  	s6 := c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0
  1526  	s7 := c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0
  1527  	s8 := c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + a8*b0
  1528  	s9 := c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0
  1529  	s10 := c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8*b2 + a9*b1 + a10*b0
  1530  	s11 := c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3 + a9*b2 + a10*b1 + a11*b0
  1531  	s12 := a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + a10*b2 + a11*b1
  1532  	s13 := a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + a11*b2
  1533  	s14 := a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + a11*b3
  1534  	s15 := a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4
  1535  	s16 := a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5
  1536  	s17 := a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6
  1537  	s18 := a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7
  1538  	s19 := a8*b11 + a9*b10 + a10*b9 + a11*b8
  1539  	s20 := a9*b11 + a10*b10 + a11*b9
  1540  	s21 := a10*b11 + a11*b10
  1541  	s22 := a11 * b11
  1542  	s23 := int64(0)
  1543  
  1544  	carry[0] = (s0 + (1 << 20)) >> 21
  1545  	s1 += carry[0]
  1546  	s0 -= carry[0] << 21
  1547  	carry[2] = (s2 + (1 << 20)) >> 21
  1548  	s3 += carry[2]
  1549  	s2 -= carry[2] << 21
  1550  	carry[4] = (s4 + (1 << 20)) >> 21
  1551  	s5 += carry[4]
  1552  	s4 -= carry[4] << 21
  1553  	carry[6] = (s6 + (1 << 20)) >> 21
  1554  	s7 += carry[6]
  1555  	s6 -= carry[6] << 21
  1556  	carry[8] = (s8 + (1 << 20)) >> 21
  1557  	s9 += carry[8]
  1558  	s8 -= carry[8] << 21
  1559  	carry[10] = (s10 + (1 << 20)) >> 21
  1560  	s11 += carry[10]
  1561  	s10 -= carry[10] << 21
  1562  	carry[12] = (s12 + (1 << 20)) >> 21
  1563  	s13 += carry[12]
  1564  	s12 -= carry[12] << 21
  1565  	carry[14] = (s14 + (1 << 20)) >> 21
  1566  	s15 += carry[14]
  1567  	s14 -= carry[14] << 21
  1568  	carry[16] = (s16 + (1 << 20)) >> 21
  1569  	s17 += carry[16]
  1570  	s16 -= carry[16] << 21
  1571  	carry[18] = (s18 + (1 << 20)) >> 21
  1572  	s19 += carry[18]
  1573  	s18 -= carry[18] << 21
  1574  	carry[20] = (s20 + (1 << 20)) >> 21
  1575  	s21 += carry[20]
  1576  	s20 -= carry[20] << 21
  1577  	carry[22] = (s22 + (1 << 20)) >> 21
  1578  	s23 += carry[22]
  1579  	s22 -= carry[22] << 21
  1580  
  1581  	carry[1] = (s1 + (1 << 20)) >> 21
  1582  	s2 += carry[1]
  1583  	s1 -= carry[1] << 21
  1584  	carry[3] = (s3 + (1 << 20)) >> 21
  1585  	s4 += carry[3]
  1586  	s3 -= carry[3] << 21
  1587  	carry[5] = (s5 + (1 << 20)) >> 21
  1588  	s6 += carry[5]
  1589  	s5 -= carry[5] << 21
  1590  	carry[7] = (s7 + (1 << 20)) >> 21
  1591  	s8 += carry[7]
  1592  	s7 -= carry[7] << 21
  1593  	carry[9] = (s9 + (1 << 20)) >> 21
  1594  	s10 += carry[9]
  1595  	s9 -= carry[9] << 21
  1596  	carry[11] = (s11 + (1 << 20)) >> 21
  1597  	s12 += carry[11]
  1598  	s11 -= carry[11] << 21
  1599  	carry[13] = (s13 + (1 << 20)) >> 21
  1600  	s14 += carry[13]
  1601  	s13 -= carry[13] << 21
  1602  	carry[15] = (s15 + (1 << 20)) >> 21
  1603  	s16 += carry[15]
  1604  	s15 -= carry[15] << 21
  1605  	carry[17] = (s17 + (1 << 20)) >> 21
  1606  	s18 += carry[17]
  1607  	s17 -= carry[17] << 21
  1608  	carry[19] = (s19 + (1 << 20)) >> 21
  1609  	s20 += carry[19]
  1610  	s19 -= carry[19] << 21
  1611  	carry[21] = (s21 + (1 << 20)) >> 21
  1612  	s22 += carry[21]
  1613  	s21 -= carry[21] << 21
  1614  
  1615  	s11 += s23 * 666643
  1616  	s12 += s23 * 470296
  1617  	s13 += s23 * 654183
  1618  	s14 -= s23 * 997805
  1619  	s15 += s23 * 136657
  1620  	s16 -= s23 * 683901
  1621  	s23 = 0
  1622  
  1623  	s10 += s22 * 666643
  1624  	s11 += s22 * 470296
  1625  	s12 += s22 * 654183
  1626  	s13 -= s22 * 997805
  1627  	s14 += s22 * 136657
  1628  	s15 -= s22 * 683901
  1629  	s22 = 0
  1630  
  1631  	s9 += s21 * 666643
  1632  	s10 += s21 * 470296
  1633  	s11 += s21 * 654183
  1634  	s12 -= s21 * 997805
  1635  	s13 += s21 * 136657
  1636  	s14 -= s21 * 683901
  1637  	s21 = 0
  1638  
  1639  	s8 += s20 * 666643
  1640  	s9 += s20 * 470296
  1641  	s10 += s20 * 654183
  1642  	s11 -= s20 * 997805
  1643  	s12 += s20 * 136657
  1644  	s13 -= s20 * 683901
  1645  	s20 = 0
  1646  
  1647  	s7 += s19 * 666643
  1648  	s8 += s19 * 470296
  1649  	s9 += s19 * 654183
  1650  	s10 -= s19 * 997805
  1651  	s11 += s19 * 136657
  1652  	s12 -= s19 * 683901
  1653  	s19 = 0
  1654  
  1655  	s6 += s18 * 666643
  1656  	s7 += s18 * 470296
  1657  	s8 += s18 * 654183
  1658  	s9 -= s18 * 997805
  1659  	s10 += s18 * 136657
  1660  	s11 -= s18 * 683901
  1661  	s18 = 0
  1662  
  1663  	carry[6] = (s6 + (1 << 20)) >> 21
  1664  	s7 += carry[6]
  1665  	s6 -= carry[6] << 21
  1666  	carry[8] = (s8 + (1 << 20)) >> 21
  1667  	s9 += carry[8]
  1668  	s8 -= carry[8] << 21
  1669  	carry[10] = (s10 + (1 << 20)) >> 21
  1670  	s11 += carry[10]
  1671  	s10 -= carry[10] << 21
  1672  	carry[12] = (s12 + (1 << 20)) >> 21
  1673  	s13 += carry[12]
  1674  	s12 -= carry[12] << 21
  1675  	carry[14] = (s14 + (1 << 20)) >> 21
  1676  	s15 += carry[14]
  1677  	s14 -= carry[14] << 21
  1678  	carry[16] = (s16 + (1 << 20)) >> 21
  1679  	s17 += carry[16]
  1680  	s16 -= carry[16] << 21
  1681  
  1682  	carry[7] = (s7 + (1 << 20)) >> 21
  1683  	s8 += carry[7]
  1684  	s7 -= carry[7] << 21
  1685  	carry[9] = (s9 + (1 << 20)) >> 21
  1686  	s10 += carry[9]
  1687  	s9 -= carry[9] << 21
  1688  	carry[11] = (s11 + (1 << 20)) >> 21
  1689  	s12 += carry[11]
  1690  	s11 -= carry[11] << 21
  1691  	carry[13] = (s13 + (1 << 20)) >> 21
  1692  	s14 += carry[13]
  1693  	s13 -= carry[13] << 21
  1694  	carry[15] = (s15 + (1 << 20)) >> 21
  1695  	s16 += carry[15]
  1696  	s15 -= carry[15] << 21
  1697  
  1698  	s5 += s17 * 666643
  1699  	s6 += s17 * 470296
  1700  	s7 += s17 * 654183
  1701  	s8 -= s17 * 997805
  1702  	s9 += s17 * 136657
  1703  	s10 -= s17 * 683901
  1704  	s17 = 0
  1705  
  1706  	s4 += s16 * 666643
  1707  	s5 += s16 * 470296
  1708  	s6 += s16 * 654183
  1709  	s7 -= s16 * 997805
  1710  	s8 += s16 * 136657
  1711  	s9 -= s16 * 683901
  1712  	s16 = 0
  1713  
  1714  	s3 += s15 * 666643
  1715  	s4 += s15 * 470296
  1716  	s5 += s15 * 654183
  1717  	s6 -= s15 * 997805
  1718  	s7 += s15 * 136657
  1719  	s8 -= s15 * 683901
  1720  	s15 = 0
  1721  
  1722  	s2 += s14 * 666643
  1723  	s3 += s14 * 470296
  1724  	s4 += s14 * 654183
  1725  	s5 -= s14 * 997805
  1726  	s6 += s14 * 136657
  1727  	s7 -= s14 * 683901
  1728  	s14 = 0
  1729  
  1730  	s1 += s13 * 666643
  1731  	s2 += s13 * 470296
  1732  	s3 += s13 * 654183
  1733  	s4 -= s13 * 997805
  1734  	s5 += s13 * 136657
  1735  	s6 -= s13 * 683901
  1736  	s13 = 0
  1737  
  1738  	s0 += s12 * 666643
  1739  	s1 += s12 * 470296
  1740  	s2 += s12 * 654183
  1741  	s3 -= s12 * 997805
  1742  	s4 += s12 * 136657
  1743  	s5 -= s12 * 683901
  1744  	s12 = 0
  1745  
  1746  	carry[0] = (s0 + (1 << 20)) >> 21
  1747  	s1 += carry[0]
  1748  	s0 -= carry[0] << 21
  1749  	carry[2] = (s2 + (1 << 20)) >> 21
  1750  	s3 += carry[2]
  1751  	s2 -= carry[2] << 21
  1752  	carry[4] = (s4 + (1 << 20)) >> 21
  1753  	s5 += carry[4]
  1754  	s4 -= carry[4] << 21
  1755  	carry[6] = (s6 + (1 << 20)) >> 21
  1756  	s7 += carry[6]
  1757  	s6 -= carry[6] << 21
  1758  	carry[8] = (s8 + (1 << 20)) >> 21
  1759  	s9 += carry[8]
  1760  	s8 -= carry[8] << 21
  1761  	carry[10] = (s10 + (1 << 20)) >> 21
  1762  	s11 += carry[10]
  1763  	s10 -= carry[10] << 21
  1764  
  1765  	carry[1] = (s1 + (1 << 20)) >> 21
  1766  	s2 += carry[1]
  1767  	s1 -= carry[1] << 21
  1768  	carry[3] = (s3 + (1 << 20)) >> 21
  1769  	s4 += carry[3]
  1770  	s3 -= carry[3] << 21
  1771  	carry[5] = (s5 + (1 << 20)) >> 21
  1772  	s6 += carry[5]
  1773  	s5 -= carry[5] << 21
  1774  	carry[7] = (s7 + (1 << 20)) >> 21
  1775  	s8 += carry[7]
  1776  	s7 -= carry[7] << 21
  1777  	carry[9] = (s9 + (1 << 20)) >> 21
  1778  	s10 += carry[9]
  1779  	s9 -= carry[9] << 21
  1780  	carry[11] = (s11 + (1 << 20)) >> 21
  1781  	s12 += carry[11]
  1782  	s11 -= carry[11] << 21
  1783  
  1784  	s0 += s12 * 666643
  1785  	s1 += s12 * 470296
  1786  	s2 += s12 * 654183
  1787  	s3 -= s12 * 997805
  1788  	s4 += s12 * 136657
  1789  	s5 -= s12 * 683901
  1790  	s12 = 0
  1791  
  1792  	carry[0] = s0 >> 21
  1793  	s1 += carry[0]
  1794  	s0 -= carry[0] << 21
  1795  	carry[1] = s1 >> 21
  1796  	s2 += carry[1]
  1797  	s1 -= carry[1] << 21
  1798  	carry[2] = s2 >> 21
  1799  	s3 += carry[2]
  1800  	s2 -= carry[2] << 21
  1801  	carry[3] = s3 >> 21
  1802  	s4 += carry[3]
  1803  	s3 -= carry[3] << 21
  1804  	carry[4] = s4 >> 21
  1805  	s5 += carry[4]
  1806  	s4 -= carry[4] << 21
  1807  	carry[5] = s5 >> 21
  1808  	s6 += carry[5]
  1809  	s5 -= carry[5] << 21
  1810  	carry[6] = s6 >> 21
  1811  	s7 += carry[6]
  1812  	s6 -= carry[6] << 21
  1813  	carry[7] = s7 >> 21
  1814  	s8 += carry[7]
  1815  	s7 -= carry[7] << 21
  1816  	carry[8] = s8 >> 21
  1817  	s9 += carry[8]
  1818  	s8 -= carry[8] << 21
  1819  	carry[9] = s9 >> 21
  1820  	s10 += carry[9]
  1821  	s9 -= carry[9] << 21
  1822  	carry[10] = s10 >> 21
  1823  	s11 += carry[10]
  1824  	s10 -= carry[10] << 21
  1825  	carry[11] = s11 >> 21
  1826  	s12 += carry[11]
  1827  	s11 -= carry[11] << 21
  1828  
  1829  	s0 += s12 * 666643
  1830  	s1 += s12 * 470296
  1831  	s2 += s12 * 654183
  1832  	s3 -= s12 * 997805
  1833  	s4 += s12 * 136657
  1834  	s5 -= s12 * 683901
  1835  	s12 = 0
  1836  
  1837  	carry[0] = s0 >> 21
  1838  	s1 += carry[0]
  1839  	s0 -= carry[0] << 21
  1840  	carry[1] = s1 >> 21
  1841  	s2 += carry[1]
  1842  	s1 -= carry[1] << 21
  1843  	carry[2] = s2 >> 21
  1844  	s3 += carry[2]
  1845  	s2 -= carry[2] << 21
  1846  	carry[3] = s3 >> 21
  1847  	s4 += carry[3]
  1848  	s3 -= carry[3] << 21
  1849  	carry[4] = s4 >> 21
  1850  	s5 += carry[4]
  1851  	s4 -= carry[4] << 21
  1852  	carry[5] = s5 >> 21
  1853  	s6 += carry[5]
  1854  	s5 -= carry[5] << 21
  1855  	carry[6] = s6 >> 21
  1856  	s7 += carry[6]
  1857  	s6 -= carry[6] << 21
  1858  	carry[7] = s7 >> 21
  1859  	s8 += carry[7]
  1860  	s7 -= carry[7] << 21
  1861  	carry[8] = s8 >> 21
  1862  	s9 += carry[8]
  1863  	s8 -= carry[8] << 21
  1864  	carry[9] = s9 >> 21
  1865  	s10 += carry[9]
  1866  	s9 -= carry[9] << 21
  1867  	carry[10] = s10 >> 21
  1868  	s11 += carry[10]
  1869  	s10 -= carry[10] << 21
  1870  
  1871  	s[0] = byte(s0 >> 0)
  1872  	s[1] = byte(s0 >> 8)
  1873  	s[2] = byte((s0 >> 16) | (s1 << 5))
  1874  	s[3] = byte(s1 >> 3)
  1875  	s[4] = byte(s1 >> 11)
  1876  	s[5] = byte((s1 >> 19) | (s2 << 2))
  1877  	s[6] = byte(s2 >> 6)
  1878  	s[7] = byte((s2 >> 14) | (s3 << 7))
  1879  	s[8] = byte(s3 >> 1)
  1880  	s[9] = byte(s3 >> 9)
  1881  	s[10] = byte((s3 >> 17) | (s4 << 4))
  1882  	s[11] = byte(s4 >> 4)
  1883  	s[12] = byte(s4 >> 12)
  1884  	s[13] = byte((s4 >> 20) | (s5 << 1))
  1885  	s[14] = byte(s5 >> 7)
  1886  	s[15] = byte((s5 >> 15) | (s6 << 6))
  1887  	s[16] = byte(s6 >> 2)
  1888  	s[17] = byte(s6 >> 10)
  1889  	s[18] = byte((s6 >> 18) | (s7 << 3))
  1890  	s[19] = byte(s7 >> 5)
  1891  	s[20] = byte(s7 >> 13)
  1892  	s[21] = byte(s8 >> 0)
  1893  	s[22] = byte(s8 >> 8)
  1894  	s[23] = byte((s8 >> 16) | (s9 << 5))
  1895  	s[24] = byte(s9 >> 3)
  1896  	s[25] = byte(s9 >> 11)
  1897  	s[26] = byte((s9 >> 19) | (s10 << 2))
  1898  	s[27] = byte(s10 >> 6)
  1899  	s[28] = byte((s10 >> 14) | (s11 << 7))
  1900  	s[29] = byte(s11 >> 1)
  1901  	s[30] = byte(s11 >> 9)
  1902  	s[31] = byte(s11 >> 17)
  1903  }
  1904  
  1905  // Input:
  1906  //   s[0]+256*s[1]+...+256^63*s[63] = s
  1907  //   s <= l
  1908  //
  1909  // Output:
  1910  //   s[0]+256*s[1]+...+256^31*s[31] = l - s
  1911  //   where l = 2^252 + 27742317777372353535851937790883648493.
  1912  func ScNeg(r, s *[32]byte) {
  1913  	l := [32]byte{237, 211, 245, 92, 26, 99, 18, 88, 214, 156, 247, 162, 222, 249, 222, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16}
  1914  	var carry int32
  1915  	for i := 0; i < 32; i++ {
  1916  		carry = carry + int32(l[i]) - int32(s[i])
  1917  		negative := carry & math.MinInt32 // extract the sign bit (min=0b100...)
  1918  		negative |= negative >> 16
  1919  		negative |= negative >> 8
  1920  		negative |= negative >> 4
  1921  		negative |= negative >> 2
  1922  		negative |= negative >> 1
  1923  		carry += negative & 256 // +=256 if negative, unmodified otherwise
  1924  		r[i] = byte(carry)
  1925  		// carry for next iteration
  1926  		carry = negative & (-1) // -1 if negative, 0 otherwise
  1927  	}
  1928  }
  1929  
  1930  // Input:
  1931  //   s[0]+256*s[1]+...+256^63*s[63] = s
  1932  //
  1933  // Output:
  1934  //   s[0]+256*s[1]+...+256^31*s[31] = s mod l
  1935  //   where l = 2^252 + 27742317777372353535851937790883648493.
  1936  func ScReduce(out *[32]byte, s *[64]byte) {
  1937  	s0 := 2097151 & load3(s[:])
  1938  	s1 := 2097151 & (load4(s[2:]) >> 5)
  1939  	s2 := 2097151 & (load3(s[5:]) >> 2)
  1940  	s3 := 2097151 & (load4(s[7:]) >> 7)
  1941  	s4 := 2097151 & (load4(s[10:]) >> 4)
  1942  	s5 := 2097151 & (load3(s[13:]) >> 1)
  1943  	s6 := 2097151 & (load4(s[15:]) >> 6)
  1944  	s7 := 2097151 & (load3(s[18:]) >> 3)
  1945  	s8 := 2097151 & load3(s[21:])
  1946  	s9 := 2097151 & (load4(s[23:]) >> 5)
  1947  	s10 := 2097151 & (load3(s[26:]) >> 2)
  1948  	s11 := 2097151 & (load4(s[28:]) >> 7)
  1949  	s12 := 2097151 & (load4(s[31:]) >> 4)
  1950  	s13 := 2097151 & (load3(s[34:]) >> 1)
  1951  	s14 := 2097151 & (load4(s[36:]) >> 6)
  1952  	s15 := 2097151 & (load3(s[39:]) >> 3)
  1953  	s16 := 2097151 & load3(s[42:])
  1954  	s17 := 2097151 & (load4(s[44:]) >> 5)
  1955  	s18 := 2097151 & (load3(s[47:]) >> 2)
  1956  	s19 := 2097151 & (load4(s[49:]) >> 7)
  1957  	s20 := 2097151 & (load4(s[52:]) >> 4)
  1958  	s21 := 2097151 & (load3(s[55:]) >> 1)
  1959  	s22 := 2097151 & (load4(s[57:]) >> 6)
  1960  	s23 := (load4(s[60:]) >> 3)
  1961  
  1962  	s11 += s23 * 666643
  1963  	s12 += s23 * 470296
  1964  	s13 += s23 * 654183
  1965  	s14 -= s23 * 997805
  1966  	s15 += s23 * 136657
  1967  	s16 -= s23 * 683901
  1968  	s23 = 0
  1969  
  1970  	s10 += s22 * 666643
  1971  	s11 += s22 * 470296
  1972  	s12 += s22 * 654183
  1973  	s13 -= s22 * 997805
  1974  	s14 += s22 * 136657
  1975  	s15 -= s22 * 683901
  1976  	s22 = 0
  1977  
  1978  	s9 += s21 * 666643
  1979  	s10 += s21 * 470296
  1980  	s11 += s21 * 654183
  1981  	s12 -= s21 * 997805
  1982  	s13 += s21 * 136657
  1983  	s14 -= s21 * 683901
  1984  	s21 = 0
  1985  
  1986  	s8 += s20 * 666643
  1987  	s9 += s20 * 470296
  1988  	s10 += s20 * 654183
  1989  	s11 -= s20 * 997805
  1990  	s12 += s20 * 136657
  1991  	s13 -= s20 * 683901
  1992  	s20 = 0
  1993  
  1994  	s7 += s19 * 666643
  1995  	s8 += s19 * 470296
  1996  	s9 += s19 * 654183
  1997  	s10 -= s19 * 997805
  1998  	s11 += s19 * 136657
  1999  	s12 -= s19 * 683901
  2000  	s19 = 0
  2001  
  2002  	s6 += s18 * 666643
  2003  	s7 += s18 * 470296
  2004  	s8 += s18 * 654183
  2005  	s9 -= s18 * 997805
  2006  	s10 += s18 * 136657
  2007  	s11 -= s18 * 683901
  2008  	s18 = 0
  2009  
  2010  	var carry [17]int64
  2011  
  2012  	carry[6] = (s6 + (1 << 20)) >> 21
  2013  	s7 += carry[6]
  2014  	s6 -= carry[6] << 21
  2015  	carry[8] = (s8 + (1 << 20)) >> 21
  2016  	s9 += carry[8]
  2017  	s8 -= carry[8] << 21
  2018  	carry[10] = (s10 + (1 << 20)) >> 21
  2019  	s11 += carry[10]
  2020  	s10 -= carry[10] << 21
  2021  	carry[12] = (s12 + (1 << 20)) >> 21
  2022  	s13 += carry[12]
  2023  	s12 -= carry[12] << 21
  2024  	carry[14] = (s14 + (1 << 20)) >> 21
  2025  	s15 += carry[14]
  2026  	s14 -= carry[14] << 21
  2027  	carry[16] = (s16 + (1 << 20)) >> 21
  2028  	s17 += carry[16]
  2029  	s16 -= carry[16] << 21
  2030  
  2031  	carry[7] = (s7 + (1 << 20)) >> 21
  2032  	s8 += carry[7]
  2033  	s7 -= carry[7] << 21
  2034  	carry[9] = (s9 + (1 << 20)) >> 21
  2035  	s10 += carry[9]
  2036  	s9 -= carry[9] << 21
  2037  	carry[11] = (s11 + (1 << 20)) >> 21
  2038  	s12 += carry[11]
  2039  	s11 -= carry[11] << 21
  2040  	carry[13] = (s13 + (1 << 20)) >> 21
  2041  	s14 += carry[13]
  2042  	s13 -= carry[13] << 21
  2043  	carry[15] = (s15 + (1 << 20)) >> 21
  2044  	s16 += carry[15]
  2045  	s15 -= carry[15] << 21
  2046  
  2047  	s5 += s17 * 666643
  2048  	s6 += s17 * 470296
  2049  	s7 += s17 * 654183
  2050  	s8 -= s17 * 997805
  2051  	s9 += s17 * 136657
  2052  	s10 -= s17 * 683901
  2053  	s17 = 0
  2054  
  2055  	s4 += s16 * 666643
  2056  	s5 += s16 * 470296
  2057  	s6 += s16 * 654183
  2058  	s7 -= s16 * 997805
  2059  	s8 += s16 * 136657
  2060  	s9 -= s16 * 683901
  2061  	s16 = 0
  2062  
  2063  	s3 += s15 * 666643
  2064  	s4 += s15 * 470296
  2065  	s5 += s15 * 654183
  2066  	s6 -= s15 * 997805
  2067  	s7 += s15 * 136657
  2068  	s8 -= s15 * 683901
  2069  	s15 = 0
  2070  
  2071  	s2 += s14 * 666643
  2072  	s3 += s14 * 470296
  2073  	s4 += s14 * 654183
  2074  	s5 -= s14 * 997805
  2075  	s6 += s14 * 136657
  2076  	s7 -= s14 * 683901
  2077  	s14 = 0
  2078  
  2079  	s1 += s13 * 666643
  2080  	s2 += s13 * 470296
  2081  	s3 += s13 * 654183
  2082  	s4 -= s13 * 997805
  2083  	s5 += s13 * 136657
  2084  	s6 -= s13 * 683901
  2085  	s13 = 0
  2086  
  2087  	s0 += s12 * 666643
  2088  	s1 += s12 * 470296
  2089  	s2 += s12 * 654183
  2090  	s3 -= s12 * 997805
  2091  	s4 += s12 * 136657
  2092  	s5 -= s12 * 683901
  2093  	s12 = 0
  2094  
  2095  	carry[0] = (s0 + (1 << 20)) >> 21
  2096  	s1 += carry[0]
  2097  	s0 -= carry[0] << 21
  2098  	carry[2] = (s2 + (1 << 20)) >> 21
  2099  	s3 += carry[2]
  2100  	s2 -= carry[2] << 21
  2101  	carry[4] = (s4 + (1 << 20)) >> 21
  2102  	s5 += carry[4]
  2103  	s4 -= carry[4] << 21
  2104  	carry[6] = (s6 + (1 << 20)) >> 21
  2105  	s7 += carry[6]
  2106  	s6 -= carry[6] << 21
  2107  	carry[8] = (s8 + (1 << 20)) >> 21
  2108  	s9 += carry[8]
  2109  	s8 -= carry[8] << 21
  2110  	carry[10] = (s10 + (1 << 20)) >> 21
  2111  	s11 += carry[10]
  2112  	s10 -= carry[10] << 21
  2113  
  2114  	carry[1] = (s1 + (1 << 20)) >> 21
  2115  	s2 += carry[1]
  2116  	s1 -= carry[1] << 21
  2117  	carry[3] = (s3 + (1 << 20)) >> 21
  2118  	s4 += carry[3]
  2119  	s3 -= carry[3] << 21
  2120  	carry[5] = (s5 + (1 << 20)) >> 21
  2121  	s6 += carry[5]
  2122  	s5 -= carry[5] << 21
  2123  	carry[7] = (s7 + (1 << 20)) >> 21
  2124  	s8 += carry[7]
  2125  	s7 -= carry[7] << 21
  2126  	carry[9] = (s9 + (1 << 20)) >> 21
  2127  	s10 += carry[9]
  2128  	s9 -= carry[9] << 21
  2129  	carry[11] = (s11 + (1 << 20)) >> 21
  2130  	s12 += carry[11]
  2131  	s11 -= carry[11] << 21
  2132  
  2133  	s0 += s12 * 666643
  2134  	s1 += s12 * 470296
  2135  	s2 += s12 * 654183
  2136  	s3 -= s12 * 997805
  2137  	s4 += s12 * 136657
  2138  	s5 -= s12 * 683901
  2139  	s12 = 0
  2140  
  2141  	carry[0] = s0 >> 21
  2142  	s1 += carry[0]
  2143  	s0 -= carry[0] << 21
  2144  	carry[1] = s1 >> 21
  2145  	s2 += carry[1]
  2146  	s1 -= carry[1] << 21
  2147  	carry[2] = s2 >> 21
  2148  	s3 += carry[2]
  2149  	s2 -= carry[2] << 21
  2150  	carry[3] = s3 >> 21
  2151  	s4 += carry[3]
  2152  	s3 -= carry[3] << 21
  2153  	carry[4] = s4 >> 21
  2154  	s5 += carry[4]
  2155  	s4 -= carry[4] << 21
  2156  	carry[5] = s5 >> 21
  2157  	s6 += carry[5]
  2158  	s5 -= carry[5] << 21
  2159  	carry[6] = s6 >> 21
  2160  	s7 += carry[6]
  2161  	s6 -= carry[6] << 21
  2162  	carry[7] = s7 >> 21
  2163  	s8 += carry[7]
  2164  	s7 -= carry[7] << 21
  2165  	carry[8] = s8 >> 21
  2166  	s9 += carry[8]
  2167  	s8 -= carry[8] << 21
  2168  	carry[9] = s9 >> 21
  2169  	s10 += carry[9]
  2170  	s9 -= carry[9] << 21
  2171  	carry[10] = s10 >> 21
  2172  	s11 += carry[10]
  2173  	s10 -= carry[10] << 21
  2174  	carry[11] = s11 >> 21
  2175  	s12 += carry[11]
  2176  	s11 -= carry[11] << 21
  2177  
  2178  	s0 += s12 * 666643
  2179  	s1 += s12 * 470296
  2180  	s2 += s12 * 654183
  2181  	s3 -= s12 * 997805
  2182  	s4 += s12 * 136657
  2183  	s5 -= s12 * 683901
  2184  	s12 = 0
  2185  
  2186  	carry[0] = s0 >> 21
  2187  	s1 += carry[0]
  2188  	s0 -= carry[0] << 21
  2189  	carry[1] = s1 >> 21
  2190  	s2 += carry[1]
  2191  	s1 -= carry[1] << 21
  2192  	carry[2] = s2 >> 21
  2193  	s3 += carry[2]
  2194  	s2 -= carry[2] << 21
  2195  	carry[3] = s3 >> 21
  2196  	s4 += carry[3]
  2197  	s3 -= carry[3] << 21
  2198  	carry[4] = s4 >> 21
  2199  	s5 += carry[4]
  2200  	s4 -= carry[4] << 21
  2201  	carry[5] = s5 >> 21
  2202  	s6 += carry[5]
  2203  	s5 -= carry[5] << 21
  2204  	carry[6] = s6 >> 21
  2205  	s7 += carry[6]
  2206  	s6 -= carry[6] << 21
  2207  	carry[7] = s7 >> 21
  2208  	s8 += carry[7]
  2209  	s7 -= carry[7] << 21
  2210  	carry[8] = s8 >> 21
  2211  	s9 += carry[8]
  2212  	s8 -= carry[8] << 21
  2213  	carry[9] = s9 >> 21
  2214  	s10 += carry[9]
  2215  	s9 -= carry[9] << 21
  2216  	carry[10] = s10 >> 21
  2217  	s11 += carry[10]
  2218  	s10 -= carry[10] << 21
  2219  
  2220  	out[0] = byte(s0 >> 0)
  2221  	out[1] = byte(s0 >> 8)
  2222  	out[2] = byte((s0 >> 16) | (s1 << 5))
  2223  	out[3] = byte(s1 >> 3)
  2224  	out[4] = byte(s1 >> 11)
  2225  	out[5] = byte((s1 >> 19) | (s2 << 2))
  2226  	out[6] = byte(s2 >> 6)
  2227  	out[7] = byte((s2 >> 14) | (s3 << 7))
  2228  	out[8] = byte(s3 >> 1)
  2229  	out[9] = byte(s3 >> 9)
  2230  	out[10] = byte((s3 >> 17) | (s4 << 4))
  2231  	out[11] = byte(s4 >> 4)
  2232  	out[12] = byte(s4 >> 12)
  2233  	out[13] = byte((s4 >> 20) | (s5 << 1))
  2234  	out[14] = byte(s5 >> 7)
  2235  	out[15] = byte((s5 >> 15) | (s6 << 6))
  2236  	out[16] = byte(s6 >> 2)
  2237  	out[17] = byte(s6 >> 10)
  2238  	out[18] = byte((s6 >> 18) | (s7 << 3))
  2239  	out[19] = byte(s7 >> 5)
  2240  	out[20] = byte(s7 >> 13)
  2241  	out[21] = byte(s8 >> 0)
  2242  	out[22] = byte(s8 >> 8)
  2243  	out[23] = byte((s8 >> 16) | (s9 << 5))
  2244  	out[24] = byte(s9 >> 3)
  2245  	out[25] = byte(s9 >> 11)
  2246  	out[26] = byte((s9 >> 19) | (s10 << 2))
  2247  	out[27] = byte(s10 >> 6)
  2248  	out[28] = byte((s10 >> 14) | (s11 << 7))
  2249  	out[29] = byte(s11 >> 1)
  2250  	out[30] = byte(s11 >> 9)
  2251  	out[31] = byte(s11 >> 17)
  2252  }