github.com/deroproject/derosuite@v2.1.6-1.0.20200307070847-0f2e589c7a2b+incompatible/crypto/edwards25519.go (about)

     1  // Copyright 2017-2018 DERO Project. All rights reserved.
     2  // Use of this source code in any form is governed by RESEARCH license.
     3  // license can be found in the LICENSE file.
     4  // GPG: 0F39 E425 8C65 3947 702A  8234 08B2 0360 A03A 9DE8
     5  //
     6  //
     7  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
     8  // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     9  // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
    10  // THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    11  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    12  // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    13  // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    14  // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
    15  // THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    16  
    17  // Copyright 2013 The Go Authors. All rights reserved.
    18  // Use of this source code is governed by a BSD-style
    19  // license that can be found in the LICENSE-BSD file.
    20  
    21  // Most of this is from the golang x/crypto package
    22  
    23  // +build ignore
    24  
    25  // Package edwards25519 implements operations in GF(2**255-19) and on an
    26  // Edwards curve that is isomorphic to curve25519. See
    27  // http://ed25519.cr.yp.to/.
    28  
    29  // move this file out of this package and use x/crypto
    30  package crypto
    31  
    32  // This code is a port of the public domain, "ref10" implementation of ed25519
    33  // from SUPERCOP.
    34  
    35  // FieldElement represents an element of the field GF(2^255 - 19).  An element
    36  // t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
    37  // t[3]+2^102 t[4]+...+2^230 t[9].  Bounds on each t[i] vary depending on
    38  // context.
    39  type FieldElement [10]int32
    40  
    41  var FeMa = FieldElement{-486662, 0, 0, 0, 0, 0, 0, 0, 0, 0}                                                                        /* -A */
    42  var FeMa2 = FieldElement{-12721188, -3529, 0, 0, 0, 0, 0, 0, 0, 0}                                                                 /* -A^2 */
    43  var FeFffb1 = FieldElement{-31702527, -2466483, -26106795, -12203692, -12169197, -321052, 14850977, -10296299, -16929438, -407568} /* sqrt(-2 * A * (A + 2)) */
    44  var FeFffb2 = FieldElement{8166131, -6741800, -17040804, 3154616, 21461005, 1466302, -30876704, -6368709, 10503587, -13363080}     /* sqrt(2 * A * (A + 2)) */
    45  var FeFffb3 = FieldElement{-13620103, 14639558, 4532995, 7679154, 16815101, -15883539, -22863840, -14813421, 13716513, -6477756}   /* sqrt(-sqrt(-1) * A * (A + 2)) */
    46  var FeFffb4 = FieldElement{-21786234, -12173074, 21573800, 4524538, -4645904, 16204591, 8012863, -8444712, 3212926, 6885324}       /* sqrt(sqrt(-1) * A * (A + 2)) */
    47  var FeSqrtM1 = FieldElement{-32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, 326686, 11406482}       /* sqrt(-1) */
    48  
    49  var zero FieldElement
    50  var one FieldElement
    51  
    52  func init() {
    53  	one[0] = 1
    54  }
    55  
    56  func (f *FieldElement) Zero() {
    57  	copy(f[:], zero[:])
    58  }
    59  
    60  func (f *FieldElement) One() {
    61  	copy(f[:], one[:])
    62  }
    63  
    64  func FeAdd(dst, a, b *FieldElement) {
    65  	dst[0] = a[0] + b[0]
    66  	dst[1] = a[1] + b[1]
    67  	dst[2] = a[2] + b[2]
    68  	dst[3] = a[3] + b[3]
    69  	dst[4] = a[4] + b[4]
    70  	dst[5] = a[5] + b[5]
    71  	dst[6] = a[6] + b[6]
    72  	dst[7] = a[7] + b[7]
    73  	dst[8] = a[8] + b[8]
    74  	dst[9] = a[9] + b[9]
    75  }
    76  
    77  func FeSub(dst, a, b *FieldElement) {
    78  	dst[0] = a[0] - b[0]
    79  	dst[1] = a[1] - b[1]
    80  	dst[2] = a[2] - b[2]
    81  	dst[3] = a[3] - b[3]
    82  	dst[4] = a[4] - b[4]
    83  	dst[5] = a[5] - b[5]
    84  	dst[6] = a[6] - b[6]
    85  	dst[7] = a[7] - b[7]
    86  	dst[8] = a[8] - b[8]
    87  	dst[9] = a[9] - b[9]
    88  }
    89  
    90  func FeCopy(dst, src *FieldElement) {
    91  	copy(dst[:], src[:])
    92  }
    93  
    94  // Replace (f,g) with (g,g) if b == 1;
    95  // replace (f,g) with (f,g) if b == 0.
    96  //
    97  // Preconditions: b in {0,1}.
    98  func FeCMove(f, g *FieldElement, b int32) {
    99  	b = -b
   100  	f[0] ^= b & (f[0] ^ g[0])
   101  	f[1] ^= b & (f[1] ^ g[1])
   102  	f[2] ^= b & (f[2] ^ g[2])
   103  	f[3] ^= b & (f[3] ^ g[3])
   104  	f[4] ^= b & (f[4] ^ g[4])
   105  	f[5] ^= b & (f[5] ^ g[5])
   106  	f[6] ^= b & (f[6] ^ g[6])
   107  	f[7] ^= b & (f[7] ^ g[7])
   108  	f[8] ^= b & (f[8] ^ g[8])
   109  	f[9] ^= b & (f[9] ^ g[9])
   110  }
   111  
   112  func load3(in []byte) (result int64) {
   113  	result = int64(in[0]) | (int64(in[1]) << 8) | (int64(in[2]) << 16)
   114  	return
   115  }
   116  
   117  func load4(in []byte) (result int64) {
   118  	result = int64(in[0]) | (int64(in[1]) << 8) | (int64(in[2]) << 16) | (int64(in[3]) << 24)
   119  	return
   120  }
   121  
   122  func FeFromBytes(dst *FieldElement, src *Key) {
   123  	h0 := load4(src[:])
   124  	h1 := load3(src[4:]) << 6
   125  	h2 := load3(src[7:]) << 5
   126  	h3 := load3(src[10:]) << 3
   127  	h4 := load3(src[13:]) << 2
   128  	h5 := load4(src[16:])
   129  	h6 := load3(src[20:]) << 7
   130  	h7 := load3(src[23:]) << 5
   131  	h8 := load3(src[26:]) << 4
   132  	h9 := (load3(src[29:]) & 8388607) << 2
   133  
   134  	FeCombine(dst, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
   135  }
   136  
   137  // FeToBytes marshals h to s.
   138  // Preconditions:
   139  //   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   140  //
   141  // Write p=2^255-19; q=floor(h/p).
   142  // Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
   143  //
   144  // Proof:
   145  //   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
   146  //   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
   147  //
   148  //   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
   149  //   Then 0<y<1.
   150  //
   151  //   Write r=h-pq.
   152  //   Have 0<=r<=p-1=2^255-20.
   153  //   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
   154  //
   155  //   Write x=r+19(2^-255)r+y.
   156  //   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
   157  //
   158  //   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
   159  //   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
   160  func FeToBytes(s *Key, h *FieldElement) {
   161  	var carry [10]int32
   162  
   163  	q := (19*h[9] + (1 << 24)) >> 25
   164  	q = (h[0] + q) >> 26
   165  	q = (h[1] + q) >> 25
   166  	q = (h[2] + q) >> 26
   167  	q = (h[3] + q) >> 25
   168  	q = (h[4] + q) >> 26
   169  	q = (h[5] + q) >> 25
   170  	q = (h[6] + q) >> 26
   171  	q = (h[7] + q) >> 25
   172  	q = (h[8] + q) >> 26
   173  	q = (h[9] + q) >> 25
   174  
   175  	// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
   176  	h[0] += 19 * q
   177  	// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
   178  
   179  	carry[0] = h[0] >> 26
   180  	h[1] += carry[0]
   181  	h[0] -= carry[0] << 26
   182  	carry[1] = h[1] >> 25
   183  	h[2] += carry[1]
   184  	h[1] -= carry[1] << 25
   185  	carry[2] = h[2] >> 26
   186  	h[3] += carry[2]
   187  	h[2] -= carry[2] << 26
   188  	carry[3] = h[3] >> 25
   189  	h[4] += carry[3]
   190  	h[3] -= carry[3] << 25
   191  	carry[4] = h[4] >> 26
   192  	h[5] += carry[4]
   193  	h[4] -= carry[4] << 26
   194  	carry[5] = h[5] >> 25
   195  	h[6] += carry[5]
   196  	h[5] -= carry[5] << 25
   197  	carry[6] = h[6] >> 26
   198  	h[7] += carry[6]
   199  	h[6] -= carry[6] << 26
   200  	carry[7] = h[7] >> 25
   201  	h[8] += carry[7]
   202  	h[7] -= carry[7] << 25
   203  	carry[8] = h[8] >> 26
   204  	h[9] += carry[8]
   205  	h[8] -= carry[8] << 26
   206  	carry[9] = h[9] >> 25
   207  	h[9] -= carry[9] << 25
   208  	// h10 = carry9
   209  
   210  	// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
   211  	// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
   212  	// evidently 2^255 h10-2^255 q = 0.
   213  	// Goal: Output h[0]+...+2^230 h[9].
   214  
   215  	s[0] = byte(h[0] >> 0)
   216  	s[1] = byte(h[0] >> 8)
   217  	s[2] = byte(h[0] >> 16)
   218  	s[3] = byte((h[0] >> 24) | (h[1] << 2))
   219  	s[4] = byte(h[1] >> 6)
   220  	s[5] = byte(h[1] >> 14)
   221  	s[6] = byte((h[1] >> 22) | (h[2] << 3))
   222  	s[7] = byte(h[2] >> 5)
   223  	s[8] = byte(h[2] >> 13)
   224  	s[9] = byte((h[2] >> 21) | (h[3] << 5))
   225  	s[10] = byte(h[3] >> 3)
   226  	s[11] = byte(h[3] >> 11)
   227  	s[12] = byte((h[3] >> 19) | (h[4] << 6))
   228  	s[13] = byte(h[4] >> 2)
   229  	s[14] = byte(h[4] >> 10)
   230  	s[15] = byte(h[4] >> 18)
   231  	s[16] = byte(h[5] >> 0)
   232  	s[17] = byte(h[5] >> 8)
   233  	s[18] = byte(h[5] >> 16)
   234  	s[19] = byte((h[5] >> 24) | (h[6] << 1))
   235  	s[20] = byte(h[6] >> 7)
   236  	s[21] = byte(h[6] >> 15)
   237  	s[22] = byte((h[6] >> 23) | (h[7] << 3))
   238  	s[23] = byte(h[7] >> 5)
   239  	s[24] = byte(h[7] >> 13)
   240  	s[25] = byte((h[7] >> 21) | (h[8] << 4))
   241  	s[26] = byte(h[8] >> 4)
   242  	s[27] = byte(h[8] >> 12)
   243  	s[28] = byte((h[8] >> 20) | (h[9] << 6))
   244  	s[29] = byte(h[9] >> 2)
   245  	s[30] = byte(h[9] >> 10)
   246  	s[31] = byte(h[9] >> 18)
   247  }
   248  
   249  func (f *FieldElement) IsNegative() byte {
   250  	var s Key
   251  	FeToBytes(&s, f)
   252  	return s[0] & 1
   253  }
   254  
   255  func (f *FieldElement) IsNonZero() int32 {
   256  	var s Key
   257  	FeToBytes(&s, f)
   258  	var x uint8
   259  	for _, b := range s {
   260  		x |= b
   261  	}
   262  	x |= x >> 4
   263  	x |= x >> 2
   264  	x |= x >> 1
   265  	return int32(x & 1)
   266  }
   267  
   268  // FeNeg sets h = -f
   269  //
   270  // Preconditions:
   271  //    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   272  //
   273  // Postconditions:
   274  //    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   275  func FeNeg(h, f *FieldElement) {
   276  	h[0] = -f[0]
   277  	h[1] = -f[1]
   278  	h[2] = -f[2]
   279  	h[3] = -f[3]
   280  	h[4] = -f[4]
   281  	h[5] = -f[5]
   282  	h[6] = -f[6]
   283  	h[7] = -f[7]
   284  	h[8] = -f[8]
   285  	h[9] = -f[9]
   286  }
   287  
   288  func FeCombine(h *FieldElement, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 int64) {
   289  	var c0, c1, c2, c3, c4, c5, c6, c7, c8, c9 int64
   290  
   291  	/*
   292  	  |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
   293  	    i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
   294  	  |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
   295  	    i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
   296  	*/
   297  
   298  	c0 = (h0 + (1 << 25)) >> 26
   299  	h1 += c0
   300  	h0 -= c0 << 26
   301  	c4 = (h4 + (1 << 25)) >> 26
   302  	h5 += c4
   303  	h4 -= c4 << 26
   304  	/* |h0| <= 2^25 */
   305  	/* |h4| <= 2^25 */
   306  	/* |h1| <= 1.51*2^58 */
   307  	/* |h5| <= 1.51*2^58 */
   308  
   309  	c1 = (h1 + (1 << 24)) >> 25
   310  	h2 += c1
   311  	h1 -= c1 << 25
   312  	c5 = (h5 + (1 << 24)) >> 25
   313  	h6 += c5
   314  	h5 -= c5 << 25
   315  	/* |h1| <= 2^24; from now on fits into int32 */
   316  	/* |h5| <= 2^24; from now on fits into int32 */
   317  	/* |h2| <= 1.21*2^59 */
   318  	/* |h6| <= 1.21*2^59 */
   319  
   320  	c2 = (h2 + (1 << 25)) >> 26
   321  	h3 += c2
   322  	h2 -= c2 << 26
   323  	c6 = (h6 + (1 << 25)) >> 26
   324  	h7 += c6
   325  	h6 -= c6 << 26
   326  	/* |h2| <= 2^25; from now on fits into int32 unchanged */
   327  	/* |h6| <= 2^25; from now on fits into int32 unchanged */
   328  	/* |h3| <= 1.51*2^58 */
   329  	/* |h7| <= 1.51*2^58 */
   330  
   331  	c3 = (h3 + (1 << 24)) >> 25
   332  	h4 += c3
   333  	h3 -= c3 << 25
   334  	c7 = (h7 + (1 << 24)) >> 25
   335  	h8 += c7
   336  	h7 -= c7 << 25
   337  	/* |h3| <= 2^24; from now on fits into int32 unchanged */
   338  	/* |h7| <= 2^24; from now on fits into int32 unchanged */
   339  	/* |h4| <= 1.52*2^33 */
   340  	/* |h8| <= 1.52*2^33 */
   341  
   342  	c4 = (h4 + (1 << 25)) >> 26
   343  	h5 += c4
   344  	h4 -= c4 << 26
   345  	c8 = (h8 + (1 << 25)) >> 26
   346  	h9 += c8
   347  	h8 -= c8 << 26
   348  	/* |h4| <= 2^25; from now on fits into int32 unchanged */
   349  	/* |h8| <= 2^25; from now on fits into int32 unchanged */
   350  	/* |h5| <= 1.01*2^24 */
   351  	/* |h9| <= 1.51*2^58 */
   352  
   353  	c9 = (h9 + (1 << 24)) >> 25
   354  	h0 += c9 * 19
   355  	h9 -= c9 << 25
   356  	/* |h9| <= 2^24; from now on fits into int32 unchanged */
   357  	/* |h0| <= 1.8*2^37 */
   358  
   359  	c0 = (h0 + (1 << 25)) >> 26
   360  	h1 += c0
   361  	h0 -= c0 << 26
   362  	/* |h0| <= 2^25; from now on fits into int32 unchanged */
   363  	/* |h1| <= 1.01*2^24 */
   364  
   365  	h[0] = int32(h0)
   366  	h[1] = int32(h1)
   367  	h[2] = int32(h2)
   368  	h[3] = int32(h3)
   369  	h[4] = int32(h4)
   370  	h[5] = int32(h5)
   371  	h[6] = int32(h6)
   372  	h[7] = int32(h7)
   373  	h[8] = int32(h8)
   374  	h[9] = int32(h9)
   375  }
   376  
   377  // FeMul calculates h = f * g
   378  // Can overlap h with f or g.
   379  //
   380  // Preconditions:
   381  //    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
   382  //    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
   383  //
   384  // Postconditions:
   385  //    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   386  //
   387  // Notes on implementation strategy:
   388  //
   389  // Using schoolbook multiplication.
   390  // Karatsuba would save a little in some cost models.
   391  //
   392  // Most multiplications by 2 and 19 are 32-bit precomputations;
   393  // cheaper than 64-bit postcomputations.
   394  //
   395  // There is one remaining multiplication by 19 in the carry chain;
   396  // one *19 precomputation can be merged into this,
   397  // but the resulting data flow is considerably less clean.
   398  //
   399  // There are 12 carries below.
   400  // 10 of them are 2-way parallelizable and vectorizable.
   401  // Can get away with 11 carries, but then data flow is much deeper.
   402  //
   403  // With tighter constraints on inputs can squeeze carries into int32.
   404  func FeMul(h, f, g *FieldElement) {
   405  	f0 := int64(f[0])
   406  	f1 := int64(f[1])
   407  	f2 := int64(f[2])
   408  	f3 := int64(f[3])
   409  	f4 := int64(f[4])
   410  	f5 := int64(f[5])
   411  	f6 := int64(f[6])
   412  	f7 := int64(f[7])
   413  	f8 := int64(f[8])
   414  	f9 := int64(f[9])
   415  
   416  	f1_2 := int64(2 * f[1])
   417  	f3_2 := int64(2 * f[3])
   418  	f5_2 := int64(2 * f[5])
   419  	f7_2 := int64(2 * f[7])
   420  	f9_2 := int64(2 * f[9])
   421  
   422  	g0 := int64(g[0])
   423  	g1 := int64(g[1])
   424  	g2 := int64(g[2])
   425  	g3 := int64(g[3])
   426  	g4 := int64(g[4])
   427  	g5 := int64(g[5])
   428  	g6 := int64(g[6])
   429  	g7 := int64(g[7])
   430  	g8 := int64(g[8])
   431  	g9 := int64(g[9])
   432  
   433  	g1_19 := int64(19 * g[1]) /* 1.4*2^29 */
   434  	g2_19 := int64(19 * g[2]) /* 1.4*2^30; still ok */
   435  	g3_19 := int64(19 * g[3])
   436  	g4_19 := int64(19 * g[4])
   437  	g5_19 := int64(19 * g[5])
   438  	g6_19 := int64(19 * g[6])
   439  	g7_19 := int64(19 * g[7])
   440  	g8_19 := int64(19 * g[8])
   441  	g9_19 := int64(19 * g[9])
   442  
   443  	h0 := f0*g0 + f1_2*g9_19 + f2*g8_19 + f3_2*g7_19 + f4*g6_19 + f5_2*g5_19 + f6*g4_19 + f7_2*g3_19 + f8*g2_19 + f9_2*g1_19
   444  	h1 := f0*g1 + f1*g0 + f2*g9_19 + f3*g8_19 + f4*g7_19 + f5*g6_19 + f6*g5_19 + f7*g4_19 + f8*g3_19 + f9*g2_19
   445  	h2 := f0*g2 + f1_2*g1 + f2*g0 + f3_2*g9_19 + f4*g8_19 + f5_2*g7_19 + f6*g6_19 + f7_2*g5_19 + f8*g4_19 + f9_2*g3_19
   446  	h3 := f0*g3 + f1*g2 + f2*g1 + f3*g0 + f4*g9_19 + f5*g8_19 + f6*g7_19 + f7*g6_19 + f8*g5_19 + f9*g4_19
   447  	h4 := f0*g4 + f1_2*g3 + f2*g2 + f3_2*g1 + f4*g0 + f5_2*g9_19 + f6*g8_19 + f7_2*g7_19 + f8*g6_19 + f9_2*g5_19
   448  	h5 := f0*g5 + f1*g4 + f2*g3 + f3*g2 + f4*g1 + f5*g0 + f6*g9_19 + f7*g8_19 + f8*g7_19 + f9*g6_19
   449  	h6 := f0*g6 + f1_2*g5 + f2*g4 + f3_2*g3 + f4*g2 + f5_2*g1 + f6*g0 + f7_2*g9_19 + f8*g8_19 + f9_2*g7_19
   450  	h7 := f0*g7 + f1*g6 + f2*g5 + f3*g4 + f4*g3 + f5*g2 + f6*g1 + f7*g0 + f8*g9_19 + f9*g8_19
   451  	h8 := f0*g8 + f1_2*g7 + f2*g6 + f3_2*g5 + f4*g4 + f5_2*g3 + f6*g2 + f7_2*g1 + f8*g0 + f9_2*g9_19
   452  	h9 := f0*g9 + f1*g8 + f2*g7 + f3*g6 + f4*g5 + f5*g4 + f6*g3 + f7*g2 + f8*g1 + f9*g0
   453  
   454  	FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
   455  }
   456  
   457  func feSquare(f *FieldElement) (h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 int64) {
   458  	f0 := int64(f[0])
   459  	f1 := int64(f[1])
   460  	f2 := int64(f[2])
   461  	f3 := int64(f[3])
   462  	f4 := int64(f[4])
   463  	f5 := int64(f[5])
   464  	f6 := int64(f[6])
   465  	f7 := int64(f[7])
   466  	f8 := int64(f[8])
   467  	f9 := int64(f[9])
   468  	f0_2 := int64(2 * f[0])
   469  	f1_2 := int64(2 * f[1])
   470  	f2_2 := int64(2 * f[2])
   471  	f3_2 := int64(2 * f[3])
   472  	f4_2 := int64(2 * f[4])
   473  	f5_2 := int64(2 * f[5])
   474  	f6_2 := int64(2 * f[6])
   475  	f7_2 := int64(2 * f[7])
   476  	f5_38 := 38 * f5 // 1.31*2^30
   477  	f6_19 := 19 * f6 // 1.31*2^30
   478  	f7_38 := 38 * f7 // 1.31*2^30
   479  	f8_19 := 19 * f8 // 1.31*2^30
   480  	f9_38 := 38 * f9 // 1.31*2^30
   481  
   482  	h0 = f0*f0 + f1_2*f9_38 + f2_2*f8_19 + f3_2*f7_38 + f4_2*f6_19 + f5*f5_38
   483  	h1 = f0_2*f1 + f2*f9_38 + f3_2*f8_19 + f4*f7_38 + f5_2*f6_19
   484  	h2 = f0_2*f2 + f1_2*f1 + f3_2*f9_38 + f4_2*f8_19 + f5_2*f7_38 + f6*f6_19
   485  	h3 = f0_2*f3 + f1_2*f2 + f4*f9_38 + f5_2*f8_19 + f6*f7_38
   486  	h4 = f0_2*f4 + f1_2*f3_2 + f2*f2 + f5_2*f9_38 + f6_2*f8_19 + f7*f7_38
   487  	h5 = f0_2*f5 + f1_2*f4 + f2_2*f3 + f6*f9_38 + f7_2*f8_19
   488  	h6 = f0_2*f6 + f1_2*f5_2 + f2_2*f4 + f3_2*f3 + f7_2*f9_38 + f8*f8_19
   489  	h7 = f0_2*f7 + f1_2*f6 + f2_2*f5 + f3_2*f4 + f8*f9_38
   490  	h8 = f0_2*f8 + f1_2*f7_2 + f2_2*f6 + f3_2*f5_2 + f4*f4 + f9*f9_38
   491  	h9 = f0_2*f9 + f1_2*f8 + f2_2*f7 + f3_2*f6 + f4_2*f5
   492  
   493  	return
   494  }
   495  
   496  // FeSquare calculates h = f*f. Can overlap h with f.
   497  //
   498  // Preconditions:
   499  //    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
   500  //
   501  // Postconditions:
   502  //    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   503  func FeSquare(h, f *FieldElement) {
   504  	h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 := feSquare(f)
   505  	FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
   506  }
   507  
   508  // FeSquare2 sets h = 2 * f * f
   509  //
   510  // Can overlap h with f.
   511  //
   512  // Preconditions:
   513  //    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
   514  //
   515  // Postconditions:
   516  //    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
   517  // See fe_mul.c for discussion of implementation strategy.
   518  func FeSquare2(h, f *FieldElement) {
   519  	h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 := feSquare(f)
   520  
   521  	h0 += h0
   522  	h1 += h1
   523  	h2 += h2
   524  	h3 += h3
   525  	h4 += h4
   526  	h5 += h5
   527  	h6 += h6
   528  	h7 += h7
   529  	h8 += h8
   530  	h9 += h9
   531  
   532  	FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
   533  }
   534  
   535  func FeInvert(out, z *FieldElement) {
   536  	var t0, t1, t2, t3 FieldElement
   537  	var i int
   538  
   539  	FeSquare(&t0, z)        // 2^1
   540  	FeSquare(&t1, &t0)      // 2^2
   541  	for i = 1; i < 2; i++ { // 2^3
   542  		FeSquare(&t1, &t1)
   543  	}
   544  	FeMul(&t1, z, &t1)      // 2^3 + 2^0
   545  	FeMul(&t0, &t0, &t1)    // 2^3 + 2^1 + 2^0
   546  	FeSquare(&t2, &t0)      // 2^4 + 2^2 + 2^1
   547  	FeMul(&t1, &t1, &t2)    // 2^4 + 2^3 + 2^2 + 2^1 + 2^0
   548  	FeSquare(&t2, &t1)      // 5,4,3,2,1
   549  	for i = 1; i < 5; i++ { // 9,8,7,6,5
   550  		FeSquare(&t2, &t2)
   551  	}
   552  	FeMul(&t1, &t2, &t1)     // 9,8,7,6,5,4,3,2,1,0
   553  	FeSquare(&t2, &t1)       // 10..1
   554  	for i = 1; i < 10; i++ { // 19..10
   555  		FeSquare(&t2, &t2)
   556  	}
   557  	FeMul(&t2, &t2, &t1)     // 19..0
   558  	FeSquare(&t3, &t2)       // 20..1
   559  	for i = 1; i < 20; i++ { // 39..20
   560  		FeSquare(&t3, &t3)
   561  	}
   562  	FeMul(&t2, &t3, &t2)     // 39..0
   563  	FeSquare(&t2, &t2)       // 40..1
   564  	for i = 1; i < 10; i++ { // 49..10
   565  		FeSquare(&t2, &t2)
   566  	}
   567  	FeMul(&t1, &t2, &t1)     // 49..0
   568  	FeSquare(&t2, &t1)       // 50..1
   569  	for i = 1; i < 50; i++ { // 99..50
   570  		FeSquare(&t2, &t2)
   571  	}
   572  	FeMul(&t2, &t2, &t1)      // 99..0
   573  	FeSquare(&t3, &t2)        // 100..1
   574  	for i = 1; i < 100; i++ { // 199..100
   575  		FeSquare(&t3, &t3)
   576  	}
   577  	FeMul(&t2, &t3, &t2)     // 199..0
   578  	FeSquare(&t2, &t2)       // 200..1
   579  	for i = 1; i < 50; i++ { // 249..50
   580  		FeSquare(&t2, &t2)
   581  	}
   582  	FeMul(&t1, &t2, &t1)    // 249..0
   583  	FeSquare(&t1, &t1)      // 250..1
   584  	for i = 1; i < 5; i++ { // 254..5
   585  		FeSquare(&t1, &t1)
   586  	}
   587  	FeMul(out, &t1, &t0) // 254..5,3,1,0
   588  }
   589  
   590  func fePow22523(out, z *FieldElement) {
   591  	var t0, t1, t2 FieldElement
   592  	var i int
   593  
   594  	FeSquare(&t0, z)
   595  	for i = 1; i < 1; i++ {
   596  		FeSquare(&t0, &t0)
   597  	}
   598  	FeSquare(&t1, &t0)
   599  	for i = 1; i < 2; i++ {
   600  		FeSquare(&t1, &t1)
   601  	}
   602  	FeMul(&t1, z, &t1)
   603  	FeMul(&t0, &t0, &t1)
   604  	FeSquare(&t0, &t0)
   605  	for i = 1; i < 1; i++ {
   606  		FeSquare(&t0, &t0)
   607  	}
   608  	FeMul(&t0, &t1, &t0)
   609  	FeSquare(&t1, &t0)
   610  	for i = 1; i < 5; i++ {
   611  		FeSquare(&t1, &t1)
   612  	}
   613  	FeMul(&t0, &t1, &t0)
   614  	FeSquare(&t1, &t0)
   615  	for i = 1; i < 10; i++ {
   616  		FeSquare(&t1, &t1)
   617  	}
   618  	FeMul(&t1, &t1, &t0)
   619  	FeSquare(&t2, &t1)
   620  	for i = 1; i < 20; i++ {
   621  		FeSquare(&t2, &t2)
   622  	}
   623  	FeMul(&t1, &t2, &t1)
   624  	FeSquare(&t1, &t1)
   625  	for i = 1; i < 10; i++ {
   626  		FeSquare(&t1, &t1)
   627  	}
   628  	FeMul(&t0, &t1, &t0)
   629  	FeSquare(&t1, &t0)
   630  	for i = 1; i < 50; i++ {
   631  		FeSquare(&t1, &t1)
   632  	}
   633  	FeMul(&t1, &t1, &t0)
   634  	FeSquare(&t2, &t1)
   635  	for i = 1; i < 100; i++ {
   636  		FeSquare(&t2, &t2)
   637  	}
   638  	FeMul(&t1, &t2, &t1)
   639  	FeSquare(&t1, &t1)
   640  	for i = 1; i < 50; i++ {
   641  		FeSquare(&t1, &t1)
   642  	}
   643  	FeMul(&t0, &t1, &t0)
   644  	FeSquare(&t0, &t0)
   645  	for i = 1; i < 2; i++ {
   646  		FeSquare(&t0, &t0)
   647  	}
   648  	FeMul(out, &t0, z)
   649  }
   650  
   651  func FeDivPowM1(out, u, v *FieldElement) {
   652  	var v3, uv7, t0 FieldElement
   653  
   654  	FeSquare(&v3, v)
   655  	FeMul(&v3, &v3, v) /* v3 = v^3 */
   656  	FeSquare(&uv7, &v3)
   657  	FeMul(&uv7, &uv7, v)
   658  	FeMul(&uv7, &uv7, u) /* uv7 = uv^7 */
   659  
   660  	fePow22523(&t0, &uv7)
   661  	/* t0 = (uv^7)^((q-5)/8) */
   662  	FeMul(&t0, &t0, &v3)
   663  	FeMul(out, &t0, u) /* u^(m+1)v^(-(m+1)) */
   664  }
   665  
   666  // Group elements are members of the elliptic curve -x^2 + y^2 = 1 + d * x^2 *
   667  // y^2 where d = -121665/121666.
   668  //
   669  // Several representations are used:
   670  //   ProjectiveGroupElement: (X:Y:Z) satisfying x=X/Z, y=Y/Z
   671  //   ExtendedGroupElement: (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
   672  //   CompletedGroupElement: ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
   673  //   PreComputedGroupElement: (y+x,y-x,2dxy)
   674  
   675  type ProjectiveGroupElement struct {
   676  	X, Y, Z FieldElement
   677  }
   678  
   679  type ExtendedGroupElement struct {
   680  	X, Y, Z, T FieldElement
   681  }
   682  
   683  type CompletedGroupElement struct {
   684  	X, Y, Z, T FieldElement
   685  }
   686  
   687  type PreComputedGroupElement struct {
   688  	yPlusX, yMinusX, xy2d FieldElement
   689  }
   690  
   691  type CachedGroupElement struct {
   692  	yPlusX, yMinusX, Z, T2d FieldElement
   693  }
   694  
   695  func (p *ProjectiveGroupElement) Zero() {
   696  	p.X.Zero()
   697  	p.Y.One()
   698  	p.Z.One()
   699  }
   700  
   701  func (p *ProjectiveGroupElement) Double(r *CompletedGroupElement) {
   702  	var t0 FieldElement
   703  
   704  	FeSquare(&r.X, &p.X)
   705  	FeSquare(&r.Z, &p.Y)
   706  	FeSquare2(&r.T, &p.Z)
   707  	FeAdd(&r.Y, &p.X, &p.Y)
   708  	FeSquare(&t0, &r.Y)
   709  	FeAdd(&r.Y, &r.Z, &r.X)
   710  	FeSub(&r.Z, &r.Z, &r.X)
   711  	FeSub(&r.X, &t0, &r.Y)
   712  	FeSub(&r.T, &r.T, &r.Z)
   713  }
   714  
   715  func (p *ProjectiveGroupElement) ToBytes(s *Key) {
   716  	var recip, x, y FieldElement
   717  
   718  	FeInvert(&recip, &p.Z)
   719  	FeMul(&x, &p.X, &recip)
   720  	FeMul(&y, &p.Y, &recip)
   721  	FeToBytes(s, &y)
   722  	s[31] ^= x.IsNegative() << 7
   723  }
   724  
   725  // this is equivalent to ge_fromfe_frombytes_vartime
   726  func (p *ProjectiveGroupElement) FromBytes(s *Key) {
   727  	h0 := load4(s[:])
   728  	h1 := load3(s[4:]) << 6
   729  	h2 := load3(s[7:]) << 5
   730  	h3 := load3(s[10:]) << 3
   731  	h4 := load3(s[13:]) << 2
   732  	h5 := load4(s[16:])
   733  	h6 := load3(s[20:]) << 7
   734  	h7 := load3(s[23:]) << 5
   735  	h8 := load3(s[26:]) << 4
   736  	h9 := load3(s[29:]) << 2
   737  	var carry [10]int64
   738  	carry[9] = (h9 + int64(1<<24)) >> 25
   739  	h0 += carry[9] * 19
   740  	h9 -= carry[9] << 25
   741  	carry[1] = (h1 + int64(1<<24)) >> 25
   742  	h2 += carry[1]
   743  	h1 -= carry[1] << 25
   744  	carry[3] = (h3 + int64(1<<24)) >> 25
   745  	h4 += carry[3]
   746  	h3 -= carry[3] << 25
   747  	carry[5] = (h5 + int64(1<<24)) >> 25
   748  	h6 += carry[5]
   749  	h5 -= carry[5] << 25
   750  	carry[7] = (h7 + int64(1<<24)) >> 25
   751  	h8 += carry[7]
   752  	h7 -= carry[7] << 25
   753  
   754  	carry[0] = (h0 + int64(1<<25)) >> 26
   755  	h1 += carry[0]
   756  	h0 -= carry[0] << 26
   757  	carry[2] = (h2 + int64(1<<25)) >> 26
   758  	h3 += carry[2]
   759  	h2 -= carry[2] << 26
   760  	carry[4] = (h4 + int64(1<<25)) >> 26
   761  	h5 += carry[4]
   762  	h4 -= carry[4] << 26
   763  	carry[6] = (h6 + int64(1<<25)) >> 26
   764  	h7 += carry[6]
   765  	h6 -= carry[6] << 26
   766  	carry[8] = (h8 + int64(1<<25)) >> 26
   767  	h9 += carry[8]
   768  	h8 -= carry[8] << 26
   769  
   770  	var u, v, w, x, y, z FieldElement
   771  	u[0] = int32(h0)
   772  	u[1] = int32(h1)
   773  	u[2] = int32(h2)
   774  	u[3] = int32(h3)
   775  	u[4] = int32(h4)
   776  	u[5] = int32(h5)
   777  	u[6] = int32(h6)
   778  	u[7] = int32(h7)
   779  	u[8] = int32(h8)
   780  	u[9] = int32(h9)
   781  	FeSquare2(&v, &u) /* 2 * u^2 */
   782  	w.One()
   783  	FeAdd(&w, &v, &w)        /* w = 2 * u^2 + 1 */
   784  	FeSquare(&x, &w)         /* w^2 */
   785  	FeMul(&y, &FeMa2, &v)    /* -2 * A^2 * u^2 */
   786  	FeAdd(&x, &x, &y)        /* x = w^2 - 2 * A^2 * u^2 */
   787  	FeDivPowM1(&p.X, &w, &x) /* (w / x)^(m + 1) */
   788  	FeSquare(&y, &p.X)
   789  	FeMul(&x, &y, &x)
   790  	FeSub(&y, &w, &x)
   791  	FeCopy(&z, &FeMa)
   792  	isNegative := false
   793  	var sign byte
   794  	if y.IsNonZero() != 0 {
   795  		FeAdd(&y, &w, &x)
   796  		if y.IsNonZero() != 0 {
   797  			isNegative = true
   798  		} else {
   799  			FeMul(&p.X, &p.X, &FeFffb1)
   800  		}
   801  	} else {
   802  		FeMul(&p.X, &p.X, &FeFffb2)
   803  	}
   804  	if isNegative {
   805  		FeMul(&x, &x, &FeSqrtM1)
   806  		FeSub(&y, &w, &x)
   807  		if y.IsNonZero() != 0 {
   808  			FeAdd(&y, &w, &x)
   809  			FeMul(&p.X, &p.X, &FeFffb3)
   810  		} else {
   811  			FeMul(&p.X, &p.X, &FeFffb4)
   812  		}
   813  		/* p.X = sqrt(A * (A + 2) * w / x) */
   814  		/* z = -A */
   815  		sign = 1
   816  	} else {
   817  		FeMul(&p.X, &p.X, &u) /* u * sqrt(2 * A * (A + 2) * w / x) */
   818  		FeMul(&z, &z, &v)     /* -2 * A * u^2 */
   819  		sign = 0
   820  	}
   821  	if p.X.IsNegative() != sign {
   822  		FeNeg(&p.X, &p.X)
   823  	}
   824  	FeAdd(&p.Z, &z, &w)
   825  	FeSub(&p.Y, &z, &w)
   826  	FeMul(&p.X, &p.X, &p.Z)
   827  }
   828  
   829  func (p *ExtendedGroupElement) Zero() {
   830  	p.X.Zero()
   831  	p.Y.One()
   832  	p.Z.One()
   833  	p.T.Zero()
   834  }
   835  
   836  func (p *ExtendedGroupElement) Double(r *CompletedGroupElement) {
   837  	var q ProjectiveGroupElement
   838  	p.ToProjective(&q)
   839  	q.Double(r)
   840  }
   841  
   842  func (p *ExtendedGroupElement) ToCached(r *CachedGroupElement) {
   843  	FeAdd(&r.yPlusX, &p.Y, &p.X)
   844  	FeSub(&r.yMinusX, &p.Y, &p.X)
   845  	FeCopy(&r.Z, &p.Z)
   846  	FeMul(&r.T2d, &p.T, &d2)
   847  }
   848  
   849  func (p *ExtendedGroupElement) ToProjective(r *ProjectiveGroupElement) {
   850  	FeCopy(&r.X, &p.X)
   851  	FeCopy(&r.Y, &p.Y)
   852  	FeCopy(&r.Z, &p.Z)
   853  }
   854  
   855  func (p *ExtendedGroupElement) ToBytes(s *Key) {
   856  	var recip, x, y FieldElement
   857  
   858  	FeInvert(&recip, &p.Z)
   859  	FeMul(&x, &p.X, &recip)
   860  	FeMul(&y, &p.Y, &recip)
   861  	FeToBytes(s, &y)
   862  	s[31] ^= x.IsNegative() << 7
   863  }
   864  
   865  // used to verify whether a KEY is a point on the curve
   866  // equivalent to ge_frombytes_vartime
   867  // the original one skipped a few checks, we do them now
   868  // the original one failed a few checks so it was rewritten
   869  func (p *ExtendedGroupElement) FromBytes(s *Key) bool {
   870  	/* Original implementation without checks
   871  	var u, v, v3, vxx, check FieldElement
   872  
   873  	FeFromBytes(&p.Y, s)
   874  	p.Z.One()
   875  	FeSquare(&u, &p.Y)
   876  	FeMul(&v, &u, &d)
   877  	FeSub(&u, &u, &p.Z) // y = y^2-1
   878  	FeAdd(&v, &v, &p.Z) // v = dy^2+1
   879  
   880  	FeSquare(&v3, &v)
   881  	FeMul(&v3, &v3, &v) // v3 = v^3
   882  	FeSquare(&p.X, &v3)
   883  	FeMul(&p.X, &p.X, &v)
   884  	FeMul(&p.X, &p.X, &u) // x = uv^7
   885  
   886  	fePow22523(&p.X, &p.X) // x = (uv^7)^((q-5)/8)
   887  	FeMul(&p.X, &p.X, &v3)
   888  	FeMul(&p.X, &p.X, &u) // x = uv^3(uv^7)^((q-5)/8)
   889  
   890  	var tmpX, tmp2 Key
   891  
   892  	FeSquare(&vxx, &p.X)
   893  	FeMul(&vxx, &vxx, &v)
   894  	FeSub(&check, &vxx, &u) // vx^2-u
   895  	if check.IsNonZero() == 1 {
   896  		FeAdd(&check, &vxx, &u) // vx^2+u
   897  		if check.IsNonZero() == 1 {
   898  			return false
   899  		}
   900  		FeMul(&p.X, &p.X, &SqrtM1)
   901  
   902  		FeToBytes(&tmpX, &p.X)
   903  		for i, v := range tmpX {
   904  			tmp2[31-i] = v
   905  		}
   906  	}
   907  
   908  	if p.X.IsNegative() != (s[31] >> 7) {
   909  		FeNeg(&p.X, &p.X)
   910  	}
   911  
   912  	FeMul(&p.T, &p.X, &p.Y)
   913  	return true
   914  
   915  	*/
   916  
   917  	var u, v, vxx, check FieldElement
   918  
   919  	// expanded FeFromBytes (with canonical check)
   920  	h0 := load4(s[:])
   921  	h1 := load3(s[4:]) << 6
   922  	h2 := load3(s[7:]) << 5
   923  	h3 := load3(s[10:]) << 3
   924  	h4 := load3(s[13:]) << 2
   925  	h5 := load4(s[16:])
   926  	h6 := load3(s[20:]) << 7
   927  	h7 := load3(s[23:]) << 5
   928  	h8 := load3(s[26:]) << 4
   929  	h9 := (load3(s[29:]) & 8388607) << 2
   930  
   931  	// Validate the number to be canonical
   932  	if h9 == 33554428 && h8 == 268435440 && h7 == 536870880 && h6 == 2147483520 &&
   933  		h5 == 4294967295 && h4 == 67108860 && h3 == 134217720 && h2 == 536870880 &&
   934  		h1 == 1073741760 && h0 >= 4294967277 {
   935  		return false
   936  	}
   937  
   938  	var carry [10]int64
   939  	carry[9] = (h9 + 1<<24) >> 25
   940  	h0 += carry[9] * 19
   941  	h9 -= carry[9] << 25
   942  	carry[1] = (h1 + 1<<24) >> 25
   943  	h2 += carry[1]
   944  	h1 -= carry[1] << 25
   945  	carry[3] = (h3 + 1<<24) >> 25
   946  	h4 += carry[3]
   947  	h3 -= carry[3] << 25
   948  	carry[5] = (h5 + 1<<24) >> 25
   949  	h6 += carry[5]
   950  	h5 -= carry[5] << 25
   951  	carry[7] = (h7 + 1<<24) >> 25
   952  	h8 += carry[7]
   953  	h7 -= carry[7] << 25
   954  
   955  	carry[0] = (h0 + 1<<25) >> 26
   956  	h1 += carry[0]
   957  	h0 -= carry[0] << 26
   958  	carry[2] = (h2 + 1<<25) >> 26
   959  	h3 += carry[2]
   960  	h2 -= carry[2] << 26
   961  	carry[4] = (h4 + 1<<25) >> 26
   962  	h5 += carry[4]
   963  	h4 -= carry[4] << 26
   964  	carry[6] = (h6 + 1<<25) >> 26
   965  	h7 += carry[6]
   966  	h6 -= carry[6] << 26
   967  	carry[8] = (h8 + 1<<25) >> 26
   968  	h9 += carry[8]
   969  	h8 -= carry[8] << 26
   970  
   971  	p.Y[0] = int32(h0)
   972  	p.Y[1] = int32(h1)
   973  	p.Y[2] = int32(h2)
   974  	p.Y[3] = int32(h3)
   975  	p.Y[4] = int32(h4)
   976  	p.Y[5] = int32(h5)
   977  	p.Y[6] = int32(h6)
   978  	p.Y[7] = int32(h7)
   979  	p.Y[8] = int32(h8)
   980  	p.Y[9] = int32(h9)
   981  	// Finished: FeFromBytes
   982  
   983  	p.Z.One() // = FeOne(&p.Z)
   984  	FeSquare(&u, &p.Y)
   985  	FeMul(&v, &u, &edD)
   986  	FeSub(&u, &u, &p.Z) // y = y^2-1
   987  	FeAdd(&v, &v, &p.Z) // v = dy^2+1
   988  
   989  	FeDivPowM1(&p.X, &u, &v) // x = uv^3(uv^7)^((q-5)/8)
   990  
   991  	FeSquare(&vxx, &p.X)
   992  	FeMul(&vxx, &vxx, &v)
   993  	FeSub(&check, &vxx, &u) // vx^2-u
   994  	if check.IsNonZero() == 1 {
   995  		FeAdd(&check, &vxx, &u) // vx^2+u
   996  		if check.IsNonZero() == 1 {
   997  			return false
   998  		}
   999  		FeMul(&p.X, &p.X, &edSqrtM1)
  1000  	}
  1001  
  1002  	if p.X.IsNegative() != (s[31] >> 7) {
  1003  		// If x = 0, the sign must be positive
  1004  		if p.X.IsNonZero() == 0 {
  1005  			//return false
  1006  		}
  1007  		FeNeg(&p.X, &p.X)
  1008  	}
  1009  
  1010  	FeMul(&p.T, &p.X, &p.Y)
  1011  	return true
  1012  }
  1013  
  1014  func (p *CompletedGroupElement) ToProjective(r *ProjectiveGroupElement) {
  1015  	FeMul(&r.X, &p.X, &p.T)
  1016  	FeMul(&r.Y, &p.Y, &p.Z)
  1017  	FeMul(&r.Z, &p.Z, &p.T)
  1018  }
  1019  
  1020  func (p *CompletedGroupElement) ToExtended(r *ExtendedGroupElement) {
  1021  	FeMul(&r.X, &p.X, &p.T)
  1022  	FeMul(&r.Y, &p.Y, &p.Z)
  1023  	FeMul(&r.Z, &p.Z, &p.T)
  1024  	FeMul(&r.T, &p.X, &p.Y)
  1025  }
  1026  
  1027  func (p *PreComputedGroupElement) Zero() {
  1028  	p.yPlusX.One()
  1029  	p.yMinusX.One()
  1030  	p.xy2d.Zero()
  1031  }
  1032  
  1033  func (c *CachedGroupElement) Zero() {
  1034  	c.yPlusX.One()
  1035  	c.yMinusX.One()
  1036  	c.Z.One()
  1037  	c.T2d.Zero()
  1038  }
  1039  
  1040  func geAdd(r *CompletedGroupElement, p *ExtendedGroupElement, q *CachedGroupElement) {
  1041  	var t0 FieldElement
  1042  
  1043  	FeAdd(&r.X, &p.Y, &p.X)
  1044  	FeSub(&r.Y, &p.Y, &p.X)
  1045  	FeMul(&r.Z, &r.X, &q.yPlusX)
  1046  	FeMul(&r.Y, &r.Y, &q.yMinusX)
  1047  	FeMul(&r.T, &q.T2d, &p.T)
  1048  	FeMul(&r.X, &p.Z, &q.Z)
  1049  	FeAdd(&t0, &r.X, &r.X)
  1050  	FeSub(&r.X, &r.Z, &r.Y)
  1051  	FeAdd(&r.Y, &r.Z, &r.Y)
  1052  	FeAdd(&r.Z, &t0, &r.T)
  1053  	FeSub(&r.T, &t0, &r.T)
  1054  }
  1055  
  1056  func geMixedAdd(r *CompletedGroupElement, p *ExtendedGroupElement, q *PreComputedGroupElement) {
  1057  	var t0 FieldElement
  1058  
  1059  	FeAdd(&r.X, &p.Y, &p.X)
  1060  	FeSub(&r.Y, &p.Y, &p.X)
  1061  	FeMul(&r.Z, &r.X, &q.yPlusX)
  1062  	FeMul(&r.Y, &r.Y, &q.yMinusX)
  1063  	FeMul(&r.T, &q.xy2d, &p.T)
  1064  	FeAdd(&t0, &p.Z, &p.Z)
  1065  	FeSub(&r.X, &r.Z, &r.Y)
  1066  	FeAdd(&r.Y, &r.Z, &r.Y)
  1067  	FeAdd(&r.Z, &t0, &r.T)
  1068  	FeSub(&r.T, &t0, &r.T)
  1069  }
  1070  
  1071  func geSub(r *CompletedGroupElement, p *ExtendedGroupElement, q *CachedGroupElement) {
  1072  	var t0 FieldElement
  1073  
  1074  	FeAdd(&r.X, &p.Y, &p.X)
  1075  	FeSub(&r.Y, &p.Y, &p.X)
  1076  	FeMul(&r.Z, &r.X, &q.yMinusX)
  1077  	FeMul(&r.Y, &r.Y, &q.yPlusX)
  1078  	FeMul(&r.T, &q.T2d, &p.T)
  1079  	FeMul(&r.X, &p.Z, &q.Z)
  1080  	FeAdd(&t0, &r.X, &r.X)
  1081  	FeSub(&r.X, &r.Z, &r.Y)
  1082  	FeAdd(&r.Y, &r.Z, &r.Y)
  1083  	FeSub(&r.Z, &t0, &r.T)
  1084  	FeAdd(&r.T, &t0, &r.T)
  1085  }
  1086  
  1087  func geMixedSub(r *CompletedGroupElement, p *ExtendedGroupElement, q *PreComputedGroupElement) {
  1088  	var t0 FieldElement
  1089  
  1090  	FeAdd(&r.X, &p.Y, &p.X)
  1091  	FeSub(&r.Y, &p.Y, &p.X)
  1092  	FeMul(&r.Z, &r.X, &q.yMinusX)
  1093  	FeMul(&r.Y, &r.Y, &q.yPlusX)
  1094  	FeMul(&r.T, &q.xy2d, &p.T)
  1095  	FeAdd(&t0, &p.Z, &p.Z)
  1096  	FeSub(&r.X, &r.Z, &r.Y)
  1097  	FeAdd(&r.Y, &r.Z, &r.Y)
  1098  	FeSub(&r.Z, &t0, &r.T)
  1099  	FeAdd(&r.T, &t0, &r.T)
  1100  }
  1101  
  1102  // r = 8 * t
  1103  func GeMul8(r *CompletedGroupElement, t *ProjectiveGroupElement) {
  1104  	var u ProjectiveGroupElement
  1105  	t.Double(r)
  1106  	r.ToProjective(&u)
  1107  	u.Double(r)
  1108  	r.ToProjective(&u)
  1109  	u.Double(r)
  1110  }
  1111  
  1112  // caches s into an array of CachedGroupElements for scalar multiplication later
  1113  func GePrecompute(r *[8]CachedGroupElement, s *ExtendedGroupElement) {
  1114  	var t CompletedGroupElement
  1115  	var s2, u ExtendedGroupElement
  1116  	s.ToCached(&r[0])
  1117  	s.Double(&t)
  1118  	t.ToExtended(&s2)
  1119  	for i := 0; i < 7; i++ {
  1120  		geAdd(&t, &s2, &r[i])
  1121  		t.ToExtended(&u)
  1122  		u.ToCached(&r[i+1])
  1123  	}
  1124  }
  1125  
  1126  func slide(r *[256]int8, a *Key) {
  1127  	for i := range r {
  1128  		r[i] = int8(1 & (a[i>>3] >> uint(i&7)))
  1129  	}
  1130  
  1131  	for i := range r {
  1132  		if r[i] != 0 {
  1133  			for b := 1; b <= 6 && i+b < 256; b++ {
  1134  				if r[i+b] != 0 {
  1135  					if r[i]+(r[i+b]<<uint(b)) <= 15 {
  1136  						r[i] += r[i+b] << uint(b)
  1137  						r[i+b] = 0
  1138  					} else if r[i]-(r[i+b]<<uint(b)) >= -15 {
  1139  						r[i] -= r[i+b] << uint(b)
  1140  						for k := i + b; k < 256; k++ {
  1141  							if r[k] == 0 {
  1142  								r[k] = 1
  1143  								break
  1144  							}
  1145  							r[k] = 0
  1146  						}
  1147  					} else {
  1148  						break
  1149  					}
  1150  				}
  1151  			}
  1152  		}
  1153  	}
  1154  }
  1155  
  1156  // GeDoubleScalarMultVartime sets r = a*A + b*B
  1157  // where a = a[0]+256*a[1]+...+256^31 a[31].
  1158  // and b = b[0]+256*b[1]+...+256^31 b[31].
  1159  // B is the Ed25519 base point (x,4/5) with x positive.
  1160  func GeDoubleScalarMultVartime(r *ProjectiveGroupElement, a *Key, A *ExtendedGroupElement, b *Key) {
  1161  	var aSlide, bSlide [256]int8
  1162  	var Ai [8]CachedGroupElement // A,3A,5A,7A,9A,11A,13A,15A
  1163  	var t CompletedGroupElement
  1164  	var u ExtendedGroupElement
  1165  	var i int
  1166  
  1167  	slide(&aSlide, a)
  1168  	slide(&bSlide, b)
  1169  	GePrecompute(&Ai, A)
  1170  
  1171  	r.Zero()
  1172  
  1173  	for i = 255; i >= 0; i-- {
  1174  		if aSlide[i] != 0 || bSlide[i] != 0 {
  1175  			break
  1176  		}
  1177  	}
  1178  
  1179  	for ; i >= 0; i-- {
  1180  		r.Double(&t)
  1181  
  1182  		if aSlide[i] > 0 {
  1183  			t.ToExtended(&u)
  1184  			geAdd(&t, &u, &Ai[aSlide[i]/2])
  1185  		} else if aSlide[i] < 0 {
  1186  			t.ToExtended(&u)
  1187  			geSub(&t, &u, &Ai[(-aSlide[i])/2])
  1188  		}
  1189  
  1190  		if bSlide[i] > 0 {
  1191  			t.ToExtended(&u)
  1192  			geMixedAdd(&t, &u, &bi[bSlide[i]/2])
  1193  		} else if bSlide[i] < 0 {
  1194  			t.ToExtended(&u)
  1195  			geMixedSub(&t, &u, &bi[(-bSlide[i])/2])
  1196  		}
  1197  
  1198  		t.ToProjective(r)
  1199  	}
  1200  }
  1201  
  1202  // sets r = a*A + b*B
  1203  // where Bi is the [8]CachedGroupElement consisting of
  1204  // B,3B,5B,7B,9B,11B,13B,15B
  1205  func GeDoubleScalarMultPrecompVartime2(r *ProjectiveGroupElement, a *Key, Ai *[8]CachedGroupElement, b *Key, Bi *[8]CachedGroupElement) {
  1206  	var aSlide, bSlide [256]int8
  1207  	//var Ai [8]CachedGroupElement // A,3A,5A,7A,9A,11A,13A,15A
  1208  	var t CompletedGroupElement
  1209  	var u ExtendedGroupElement
  1210  	var i int
  1211  	slide(&aSlide, a)
  1212  	slide(&bSlide, b)
  1213  	//GePrecompute(&Ai, A)
  1214  	r.Zero()
  1215  	for i = 255; i >= 0; i-- {
  1216  		if aSlide[i] != 0 || bSlide[i] != 0 {
  1217  			break
  1218  		}
  1219  	}
  1220  	for ; i >= 0; i-- {
  1221  		r.Double(&t)
  1222  		if aSlide[i] > 0 {
  1223  			t.ToExtended(&u)
  1224  			geAdd(&t, &u, &Ai[aSlide[i]/2])
  1225  		} else if aSlide[i] < 0 {
  1226  			t.ToExtended(&u)
  1227  			geSub(&t, &u, &Ai[(-aSlide[i])/2])
  1228  		}
  1229  		if bSlide[i] > 0 {
  1230  			t.ToExtended(&u)
  1231  			geAdd(&t, &u, &Bi[bSlide[i]/2])
  1232  		} else if bSlide[i] < 0 {
  1233  			t.ToExtended(&u)
  1234  			geSub(&t, &u, &Bi[(-bSlide[i])/2])
  1235  		}
  1236  		t.ToProjective(r)
  1237  	}
  1238  	return
  1239  }
  1240  
  1241  // sets r = a*A + b*B
  1242  // where Bi is the [8]CachedGroupElement consisting of
  1243  // B,3B,5B,7B,9B,11B,13B,15B
  1244  func GeDoubleScalarMultPrecompVartime(r *ProjectiveGroupElement, a *Key, A *ExtendedGroupElement, b *Key, Bi *[8]CachedGroupElement) {
  1245  	var Ai [8]CachedGroupElement // A,3A,5A,7A,9A,11A,13A,15A
  1246  	GePrecompute(&Ai, A)
  1247  	GeDoubleScalarMultPrecompVartime2(r, a, &Ai, b, Bi)
  1248  }
  1249  
  1250  // equal returns 1 if b == c and 0 otherwise.
  1251  func equal(b, c int32) int32 {
  1252  	x := uint32(b ^ c)
  1253  	x--
  1254  	return int32(x >> 31)
  1255  }
  1256  
  1257  // negative returns 1 if b < 0 and 0 otherwise.
  1258  func negative(b int32) int32 {
  1259  	return (b >> 31) & 1
  1260  }
  1261  
  1262  func CachedGroupElementCMove(t, u *CachedGroupElement, b int32) {
  1263  	FeCMove(&t.yPlusX, &u.yPlusX, b)
  1264  	FeCMove(&t.yMinusX, &u.yMinusX, b)
  1265  	FeCMove(&t.Z, &u.Z, b)
  1266  	FeCMove(&t.T2d, &u.T2d, b)
  1267  }
  1268  
  1269  func PreComputedGroupElementCMove(t, u *PreComputedGroupElement, b int32) {
  1270  	FeCMove(&t.yPlusX, &u.yPlusX, b)
  1271  	FeCMove(&t.yMinusX, &u.yMinusX, b)
  1272  	FeCMove(&t.xy2d, &u.xy2d, b)
  1273  }
  1274  
  1275  func selectPoint(t *PreComputedGroupElement, pos int32, b int32) {
  1276  	var minusT PreComputedGroupElement
  1277  	bNegative := negative(b)
  1278  	bAbs := b - (((-bNegative) & b) << 1)
  1279  
  1280  	t.Zero()
  1281  	for i := int32(0); i < 8; i++ {
  1282  		PreComputedGroupElementCMove(t, &base[pos][i], equal(bAbs, i+1))
  1283  	}
  1284  	FeCopy(&minusT.yPlusX, &t.yMinusX)
  1285  	FeCopy(&minusT.yMinusX, &t.yPlusX)
  1286  	FeNeg(&minusT.xy2d, &t.xy2d)
  1287  	PreComputedGroupElementCMove(t, &minusT, bNegative)
  1288  }
  1289  
  1290  // GeScalarMult computes h = a*A, where
  1291  //   a = a[0]+256*a[1]+...+256^31 a[31]
  1292  //   A is a point on the curve
  1293  //
  1294  // Preconditions:
  1295  //   a[31] <= 127
  1296  func GeScalarMult(r *ProjectiveGroupElement, a *Key, A *ExtendedGroupElement) {
  1297  	var e [64]int32
  1298  	var carry, carry2 int32
  1299  	for i := 0; i < 31; i++ {
  1300  		carry += int32(a[i])             /* 0..256 */
  1301  		carry2 = (carry + 8) >> 4        /* 0..16 */
  1302  		e[2*i] = carry - (carry2 << 4)   /* -8..7 */
  1303  		carry = (carry2 + 8) >> 4        /* 0..1 */
  1304  		e[2*i+1] = carry2 - (carry << 4) /* -8..7 */
  1305  	}
  1306  	carry += int32(a[31])         /* 0..128 */
  1307  	carry2 = (carry + 8) >> 4     /* 0..8 */
  1308  	e[62] = carry - (carry2 << 4) /* -8..7 */
  1309  	e[63] = carry2                /* 0..8 */
  1310  
  1311  	var Ai [8]CachedGroupElement // A,2A,3A,4A,5A,6A,7A,8A
  1312  	t := new(CompletedGroupElement)
  1313  	u := new(ExtendedGroupElement)
  1314  	A.ToCached(&Ai[0])
  1315  	for i := 0; i < 7; i++ {
  1316  		geAdd(t, A, &Ai[i])
  1317  		t.ToExtended(u)
  1318  		u.ToCached(&Ai[i+1])
  1319  	}
  1320  	r.Zero()
  1321  	cur := new(CachedGroupElement)
  1322  	minusCur := new(CachedGroupElement)
  1323  	for i := 63; i >= 0; i-- {
  1324  		b := e[i]
  1325  		bNegative := negative(b)
  1326  		bAbs := b - (((-bNegative) & b) << 1)
  1327  		r.Double(t)
  1328  		t.ToProjective(r)
  1329  		r.Double(t)
  1330  		t.ToProjective(r)
  1331  		r.Double(t)
  1332  		t.ToProjective(r)
  1333  		r.Double(t)
  1334  		t.ToExtended(u)
  1335  		cur.Zero()
  1336  		for j := int32(0); j < 8; j++ {
  1337  			CachedGroupElementCMove(cur, &Ai[j], equal(bAbs, j+1))
  1338  		}
  1339  		FeCopy(&minusCur.yPlusX, &cur.yMinusX)
  1340  		FeCopy(&minusCur.yMinusX, &cur.yPlusX)
  1341  		FeCopy(&minusCur.Z, &cur.Z)
  1342  		FeNeg(&minusCur.T2d, &cur.T2d)
  1343  		CachedGroupElementCMove(cur, minusCur, bNegative)
  1344  		geAdd(t, u, cur)
  1345  		t.ToProjective(r)
  1346  	}
  1347  }
  1348  
  1349  // GeScalarMultBase computes h = a*B, where
  1350  //   a = a[0]+256*a[1]+...+256^31 a[31]
  1351  //   B is the Ed25519 base point (x,4/5) with x positive.
  1352  //
  1353  // Preconditions:
  1354  //   a[31] <= 127
  1355  func GeScalarMultBase(h *ExtendedGroupElement, a *Key) {
  1356  	var e [64]int8
  1357  
  1358  	for i, v := range a {
  1359  		e[2*i] = int8(v & 15)
  1360  		e[2*i+1] = int8((v >> 4) & 15)
  1361  	}
  1362  
  1363  	// each e[i] is between 0 and 15 and e[63] is between 0 and 7.
  1364  
  1365  	carry := int8(0)
  1366  	for i := 0; i < 63; i++ {
  1367  		e[i] += carry
  1368  		carry = (e[i] + 8) >> 4
  1369  		e[i] -= carry << 4
  1370  	}
  1371  	e[63] += carry
  1372  	// each e[i] is between -8 and 8.
  1373  
  1374  	h.Zero()
  1375  	var t PreComputedGroupElement
  1376  	var r CompletedGroupElement
  1377  	for i := int32(1); i < 64; i += 2 {
  1378  		selectPoint(&t, i/2, int32(e[i]))
  1379  		geMixedAdd(&r, h, &t)
  1380  		r.ToExtended(h)
  1381  	}
  1382  
  1383  	var s ProjectiveGroupElement
  1384  
  1385  	h.Double(&r)
  1386  	r.ToProjective(&s)
  1387  	s.Double(&r)
  1388  	r.ToProjective(&s)
  1389  	s.Double(&r)
  1390  	r.ToProjective(&s)
  1391  	s.Double(&r)
  1392  	r.ToExtended(h)
  1393  
  1394  	for i := int32(0); i < 64; i += 2 {
  1395  		selectPoint(&t, i/2, int32(e[i]))
  1396  		geMixedAdd(&r, h, &t)
  1397  		r.ToExtended(h)
  1398  	}
  1399  }
  1400  
  1401  func ScAdd(s, a, b *Key) {
  1402  	a0 := 2097151 & load3(a[:])
  1403  	a1 := 2097151 & (load4(a[2:]) >> 5)
  1404  	a2 := 2097151 & (load3(a[5:]) >> 2)
  1405  	a3 := 2097151 & (load4(a[7:]) >> 7)
  1406  	a4 := 2097151 & (load4(a[10:]) >> 4)
  1407  	a5 := 2097151 & (load3(a[13:]) >> 1)
  1408  	a6 := 2097151 & (load4(a[15:]) >> 6)
  1409  	a7 := 2097151 & (load3(a[18:]) >> 3)
  1410  	a8 := 2097151 & load3(a[21:])
  1411  	a9 := 2097151 & (load4(a[23:]) >> 5)
  1412  	a10 := 2097151 & (load3(a[26:]) >> 2)
  1413  	a11 := (load4(a[28:]) >> 7)
  1414  	b0 := 2097151 & load3(b[:])
  1415  	b1 := 2097151 & (load4(b[2:]) >> 5)
  1416  	b2 := 2097151 & (load3(b[5:]) >> 2)
  1417  	b3 := 2097151 & (load4(b[7:]) >> 7)
  1418  	b4 := 2097151 & (load4(b[10:]) >> 4)
  1419  	b5 := 2097151 & (load3(b[13:]) >> 1)
  1420  	b6 := 2097151 & (load4(b[15:]) >> 6)
  1421  	b7 := 2097151 & (load3(b[18:]) >> 3)
  1422  	b8 := 2097151 & load3(b[21:])
  1423  	b9 := 2097151 & (load4(b[23:]) >> 5)
  1424  	b10 := 2097151 & (load3(b[26:]) >> 2)
  1425  	b11 := (load4(b[28:]) >> 7)
  1426  	s0 := a0 + b0
  1427  	s1 := a1 + b1
  1428  	s2 := a2 + b2
  1429  	s3 := a3 + b3
  1430  	s4 := a4 + b4
  1431  	s5 := a5 + b5
  1432  	s6 := a6 + b6
  1433  	s7 := a7 + b7
  1434  	s8 := a8 + b8
  1435  	s9 := a9 + b9
  1436  	s10 := a10 + b10
  1437  	s11 := a11 + b11
  1438  	s12 := int64(0)
  1439  	var carry [12]int64
  1440  
  1441  	carry[0] = (s0 + (1 << 20)) >> 21
  1442  	s1 += carry[0]
  1443  	s0 -= carry[0] << 21
  1444  	carry[2] = (s2 + (1 << 20)) >> 21
  1445  	s3 += carry[2]
  1446  	s2 -= carry[2] << 21
  1447  	carry[4] = (s4 + (1 << 20)) >> 21
  1448  	s5 += carry[4]
  1449  	s4 -= carry[4] << 21
  1450  	carry[6] = (s6 + (1 << 20)) >> 21
  1451  	s7 += carry[6]
  1452  	s6 -= carry[6] << 21
  1453  	carry[8] = (s8 + (1 << 20)) >> 21
  1454  	s9 += carry[8]
  1455  	s8 -= carry[8] << 21
  1456  	carry[10] = (s10 + (1 << 20)) >> 21
  1457  	s11 += carry[10]
  1458  	s10 -= carry[10] << 21
  1459  
  1460  	carry[1] = (s1 + (1 << 20)) >> 21
  1461  	s2 += carry[1]
  1462  	s1 -= carry[1] << 21
  1463  	carry[3] = (s3 + (1 << 20)) >> 21
  1464  	s4 += carry[3]
  1465  	s3 -= carry[3] << 21
  1466  	carry[5] = (s5 + (1 << 20)) >> 21
  1467  	s6 += carry[5]
  1468  	s5 -= carry[5] << 21
  1469  	carry[7] = (s7 + (1 << 20)) >> 21
  1470  	s8 += carry[7]
  1471  	s7 -= carry[7] << 21
  1472  	carry[9] = (s9 + (1 << 20)) >> 21
  1473  	s10 += carry[9]
  1474  	s9 -= carry[9] << 21
  1475  	carry[11] = (s11 + (1 << 20)) >> 21
  1476  	s12 += carry[11]
  1477  	s11 -= carry[11] << 21
  1478  
  1479  	s0 += s12 * 666643
  1480  	s1 += s12 * 470296
  1481  	s2 += s12 * 654183
  1482  	s3 -= s12 * 997805
  1483  	s4 += s12 * 136657
  1484  	s5 -= s12 * 683901
  1485  	s12 = 0
  1486  
  1487  	carry[0] = s0 >> 21
  1488  	s1 += carry[0]
  1489  	s0 -= carry[0] << 21
  1490  	carry[1] = s1 >> 21
  1491  	s2 += carry[1]
  1492  	s1 -= carry[1] << 21
  1493  	carry[2] = s2 >> 21
  1494  	s3 += carry[2]
  1495  	s2 -= carry[2] << 21
  1496  	carry[3] = s3 >> 21
  1497  	s4 += carry[3]
  1498  	s3 -= carry[3] << 21
  1499  	carry[4] = s4 >> 21
  1500  	s5 += carry[4]
  1501  	s4 -= carry[4] << 21
  1502  	carry[5] = s5 >> 21
  1503  	s6 += carry[5]
  1504  	s5 -= carry[5] << 21
  1505  	carry[6] = s6 >> 21
  1506  	s7 += carry[6]
  1507  	s6 -= carry[6] << 21
  1508  	carry[7] = s7 >> 21
  1509  	s8 += carry[7]
  1510  	s7 -= carry[7] << 21
  1511  	carry[8] = s8 >> 21
  1512  	s9 += carry[8]
  1513  	s8 -= carry[8] << 21
  1514  	carry[9] = s9 >> 21
  1515  	s10 += carry[9]
  1516  	s9 -= carry[9] << 21
  1517  	carry[10] = s10 >> 21
  1518  	s11 += carry[10]
  1519  	s10 -= carry[10] << 21
  1520  	carry[11] = s11 >> 21
  1521  	s12 += carry[11]
  1522  	s11 -= carry[11] << 21
  1523  
  1524  	s0 += s12 * 666643
  1525  	s1 += s12 * 470296
  1526  	s2 += s12 * 654183
  1527  	s3 -= s12 * 997805
  1528  	s4 += s12 * 136657
  1529  	s5 -= s12 * 683901
  1530  
  1531  	carry[0] = s0 >> 21
  1532  	s1 += carry[0]
  1533  	s0 -= carry[0] << 21
  1534  	carry[1] = s1 >> 21
  1535  	s2 += carry[1]
  1536  	s1 -= carry[1] << 21
  1537  	carry[2] = s2 >> 21
  1538  	s3 += carry[2]
  1539  	s2 -= carry[2] << 21
  1540  	carry[3] = s3 >> 21
  1541  	s4 += carry[3]
  1542  	s3 -= carry[3] << 21
  1543  	carry[4] = s4 >> 21
  1544  	s5 += carry[4]
  1545  	s4 -= carry[4] << 21
  1546  	carry[5] = s5 >> 21
  1547  	s6 += carry[5]
  1548  	s5 -= carry[5] << 21
  1549  	carry[6] = s6 >> 21
  1550  	s7 += carry[6]
  1551  	s6 -= carry[6] << 21
  1552  	carry[7] = s7 >> 21
  1553  	s8 += carry[7]
  1554  	s7 -= carry[7] << 21
  1555  	carry[8] = s8 >> 21
  1556  	s9 += carry[8]
  1557  	s8 -= carry[8] << 21
  1558  	carry[9] = s9 >> 21
  1559  	s10 += carry[9]
  1560  	s9 -= carry[9] << 21
  1561  	carry[10] = s10 >> 21
  1562  	s11 += carry[10]
  1563  	s10 -= carry[10] << 21
  1564  
  1565  	s[0] = byte(s0 >> 0)
  1566  	s[1] = byte(s0 >> 8)
  1567  	s[2] = byte((s0 >> 16) | (s1 << 5))
  1568  	s[3] = byte(s1 >> 3)
  1569  	s[4] = byte(s1 >> 11)
  1570  	s[5] = byte((s1 >> 19) | (s2 << 2))
  1571  	s[6] = byte(s2 >> 6)
  1572  	s[7] = byte((s2 >> 14) | (s3 << 7))
  1573  	s[8] = byte(s3 >> 1)
  1574  	s[9] = byte(s3 >> 9)
  1575  	s[10] = byte((s3 >> 17) | (s4 << 4))
  1576  	s[11] = byte(s4 >> 4)
  1577  	s[12] = byte(s4 >> 12)
  1578  	s[13] = byte((s4 >> 20) | (s5 << 1))
  1579  	s[14] = byte(s5 >> 7)
  1580  	s[15] = byte((s5 >> 15) | (s6 << 6))
  1581  	s[16] = byte(s6 >> 2)
  1582  	s[17] = byte(s6 >> 10)
  1583  	s[18] = byte((s6 >> 18) | (s7 << 3))
  1584  	s[19] = byte(s7 >> 5)
  1585  	s[20] = byte(s7 >> 13)
  1586  	s[21] = byte(s8 >> 0)
  1587  	s[22] = byte(s8 >> 8)
  1588  	s[23] = byte((s8 >> 16) | (s9 << 5))
  1589  	s[24] = byte(s9 >> 3)
  1590  	s[25] = byte(s9 >> 11)
  1591  	s[26] = byte((s9 >> 19) | (s10 << 2))
  1592  	s[27] = byte(s10 >> 6)
  1593  	s[28] = byte((s10 >> 14) | (s11 << 7))
  1594  	s[29] = byte(s11 >> 1)
  1595  	s[30] = byte(s11 >> 9)
  1596  	s[31] = byte(s11 >> 17)
  1597  }
  1598  
  1599  func ScSub(s, a, b *Key) {
  1600  	a0 := 2097151 & load3(a[:])
  1601  	a1 := 2097151 & (load4(a[2:]) >> 5)
  1602  	a2 := 2097151 & (load3(a[5:]) >> 2)
  1603  	a3 := 2097151 & (load4(a[7:]) >> 7)
  1604  	a4 := 2097151 & (load4(a[10:]) >> 4)
  1605  	a5 := 2097151 & (load3(a[13:]) >> 1)
  1606  	a6 := 2097151 & (load4(a[15:]) >> 6)
  1607  	a7 := 2097151 & (load3(a[18:]) >> 3)
  1608  	a8 := 2097151 & load3(a[21:])
  1609  	a9 := 2097151 & (load4(a[23:]) >> 5)
  1610  	a10 := 2097151 & (load3(a[26:]) >> 2)
  1611  	a11 := (load4(a[28:]) >> 7)
  1612  	b0 := 2097151 & load3(b[:])
  1613  	b1 := 2097151 & (load4(b[2:]) >> 5)
  1614  	b2 := 2097151 & (load3(b[5:]) >> 2)
  1615  	b3 := 2097151 & (load4(b[7:]) >> 7)
  1616  	b4 := 2097151 & (load4(b[10:]) >> 4)
  1617  	b5 := 2097151 & (load3(b[13:]) >> 1)
  1618  	b6 := 2097151 & (load4(b[15:]) >> 6)
  1619  	b7 := 2097151 & (load3(b[18:]) >> 3)
  1620  	b8 := 2097151 & load3(b[21:])
  1621  	b9 := 2097151 & (load4(b[23:]) >> 5)
  1622  	b10 := 2097151 & (load3(b[26:]) >> 2)
  1623  	b11 := (load4(b[28:]) >> 7)
  1624  	s0 := a0 - b0
  1625  	s1 := a1 - b1
  1626  	s2 := a2 - b2
  1627  	s3 := a3 - b3
  1628  	s4 := a4 - b4
  1629  	s5 := a5 - b5
  1630  	s6 := a6 - b6
  1631  	s7 := a7 - b7
  1632  	s8 := a8 - b8
  1633  	s9 := a9 - b9
  1634  	s10 := a10 - b10
  1635  	s11 := a11 - b11
  1636  	s12 := int64(0)
  1637  	var carry [12]int64
  1638  
  1639  	carry[0] = (s0 + (1 << 20)) >> 21
  1640  	s1 += carry[0]
  1641  	s0 -= carry[0] << 21
  1642  	carry[2] = (s2 + (1 << 20)) >> 21
  1643  	s3 += carry[2]
  1644  	s2 -= carry[2] << 21
  1645  	carry[4] = (s4 + (1 << 20)) >> 21
  1646  	s5 += carry[4]
  1647  	s4 -= carry[4] << 21
  1648  	carry[6] = (s6 + (1 << 20)) >> 21
  1649  	s7 += carry[6]
  1650  	s6 -= carry[6] << 21
  1651  	carry[8] = (s8 + (1 << 20)) >> 21
  1652  	s9 += carry[8]
  1653  	s8 -= carry[8] << 21
  1654  	carry[10] = (s10 + (1 << 20)) >> 21
  1655  	s11 += carry[10]
  1656  	s10 -= carry[10] << 21
  1657  
  1658  	carry[1] = (s1 + (1 << 20)) >> 21
  1659  	s2 += carry[1]
  1660  	s1 -= carry[1] << 21
  1661  	carry[3] = (s3 + (1 << 20)) >> 21
  1662  	s4 += carry[3]
  1663  	s3 -= carry[3] << 21
  1664  	carry[5] = (s5 + (1 << 20)) >> 21
  1665  	s6 += carry[5]
  1666  	s5 -= carry[5] << 21
  1667  	carry[7] = (s7 + (1 << 20)) >> 21
  1668  	s8 += carry[7]
  1669  	s7 -= carry[7] << 21
  1670  	carry[9] = (s9 + (1 << 20)) >> 21
  1671  	s10 += carry[9]
  1672  	s9 -= carry[9] << 21
  1673  	carry[11] = (s11 + (1 << 20)) >> 21
  1674  	s12 += carry[11]
  1675  	s11 -= carry[11] << 21
  1676  
  1677  	s0 += s12 * 666643
  1678  	s1 += s12 * 470296
  1679  	s2 += s12 * 654183
  1680  	s3 -= s12 * 997805
  1681  	s4 += s12 * 136657
  1682  	s5 -= s12 * 683901
  1683  	s12 = 0
  1684  
  1685  	carry[0] = s0 >> 21
  1686  	s1 += carry[0]
  1687  	s0 -= carry[0] << 21
  1688  	carry[1] = s1 >> 21
  1689  	s2 += carry[1]
  1690  	s1 -= carry[1] << 21
  1691  	carry[2] = s2 >> 21
  1692  	s3 += carry[2]
  1693  	s2 -= carry[2] << 21
  1694  	carry[3] = s3 >> 21
  1695  	s4 += carry[3]
  1696  	s3 -= carry[3] << 21
  1697  	carry[4] = s4 >> 21
  1698  	s5 += carry[4]
  1699  	s4 -= carry[4] << 21
  1700  	carry[5] = s5 >> 21
  1701  	s6 += carry[5]
  1702  	s5 -= carry[5] << 21
  1703  	carry[6] = s6 >> 21
  1704  	s7 += carry[6]
  1705  	s6 -= carry[6] << 21
  1706  	carry[7] = s7 >> 21
  1707  	s8 += carry[7]
  1708  	s7 -= carry[7] << 21
  1709  	carry[8] = s8 >> 21
  1710  	s9 += carry[8]
  1711  	s8 -= carry[8] << 21
  1712  	carry[9] = s9 >> 21
  1713  	s10 += carry[9]
  1714  	s9 -= carry[9] << 21
  1715  	carry[10] = s10 >> 21
  1716  	s11 += carry[10]
  1717  	s10 -= carry[10] << 21
  1718  	carry[11] = s11 >> 21
  1719  	s12 += carry[11]
  1720  	s11 -= carry[11] << 21
  1721  
  1722  	s0 += s12 * 666643
  1723  	s1 += s12 * 470296
  1724  	s2 += s12 * 654183
  1725  	s3 -= s12 * 997805
  1726  	s4 += s12 * 136657
  1727  	s5 -= s12 * 683901
  1728  
  1729  	carry[0] = s0 >> 21
  1730  	s1 += carry[0]
  1731  	s0 -= carry[0] << 21
  1732  	carry[1] = s1 >> 21
  1733  	s2 += carry[1]
  1734  	s1 -= carry[1] << 21
  1735  	carry[2] = s2 >> 21
  1736  	s3 += carry[2]
  1737  	s2 -= carry[2] << 21
  1738  	carry[3] = s3 >> 21
  1739  	s4 += carry[3]
  1740  	s3 -= carry[3] << 21
  1741  	carry[4] = s4 >> 21
  1742  	s5 += carry[4]
  1743  	s4 -= carry[4] << 21
  1744  	carry[5] = s5 >> 21
  1745  	s6 += carry[5]
  1746  	s5 -= carry[5] << 21
  1747  	carry[6] = s6 >> 21
  1748  	s7 += carry[6]
  1749  	s6 -= carry[6] << 21
  1750  	carry[7] = s7 >> 21
  1751  	s8 += carry[7]
  1752  	s7 -= carry[7] << 21
  1753  	carry[8] = s8 >> 21
  1754  	s9 += carry[8]
  1755  	s8 -= carry[8] << 21
  1756  	carry[9] = s9 >> 21
  1757  	s10 += carry[9]
  1758  	s9 -= carry[9] << 21
  1759  	carry[10] = s10 >> 21
  1760  	s11 += carry[10]
  1761  	s10 -= carry[10] << 21
  1762  
  1763  	s[0] = byte(s0 >> 0)
  1764  	s[1] = byte(s0 >> 8)
  1765  	s[2] = byte((s0 >> 16) | (s1 << 5))
  1766  	s[3] = byte(s1 >> 3)
  1767  	s[4] = byte(s1 >> 11)
  1768  	s[5] = byte((s1 >> 19) | (s2 << 2))
  1769  	s[6] = byte(s2 >> 6)
  1770  	s[7] = byte((s2 >> 14) | (s3 << 7))
  1771  	s[8] = byte(s3 >> 1)
  1772  	s[9] = byte(s3 >> 9)
  1773  	s[10] = byte((s3 >> 17) | (s4 << 4))
  1774  	s[11] = byte(s4 >> 4)
  1775  	s[12] = byte(s4 >> 12)
  1776  	s[13] = byte((s4 >> 20) | (s5 << 1))
  1777  	s[14] = byte(s5 >> 7)
  1778  	s[15] = byte((s5 >> 15) | (s6 << 6))
  1779  	s[16] = byte(s6 >> 2)
  1780  	s[17] = byte(s6 >> 10)
  1781  	s[18] = byte((s6 >> 18) | (s7 << 3))
  1782  	s[19] = byte(s7 >> 5)
  1783  	s[20] = byte(s7 >> 13)
  1784  	s[21] = byte(s8 >> 0)
  1785  	s[22] = byte(s8 >> 8)
  1786  	s[23] = byte((s8 >> 16) | (s9 << 5))
  1787  	s[24] = byte(s9 >> 3)
  1788  	s[25] = byte(s9 >> 11)
  1789  	s[26] = byte((s9 >> 19) | (s10 << 2))
  1790  	s[27] = byte(s10 >> 6)
  1791  	s[28] = byte((s10 >> 14) | (s11 << 7))
  1792  	s[29] = byte(s11 >> 1)
  1793  	s[30] = byte(s11 >> 9)
  1794  	s[31] = byte(s11 >> 17)
  1795  }
  1796  
  1797  func signum(a int64) int64 {
  1798  	return a>>63 - ((-a) >> 63)
  1799  }
  1800  
  1801  // equivalent to sc_check
  1802  func Sc_check(s *Key) bool {
  1803  	return ScValid(s)
  1804  }
  1805  func ScValid(s *Key) bool {
  1806  	s0 := load4(s[:])
  1807  	s1 := load4(s[4:])
  1808  	s2 := load4(s[8:])
  1809  	s3 := load4(s[12:])
  1810  	s4 := load4(s[16:])
  1811  	s5 := load4(s[20:])
  1812  	s6 := load4(s[24:])
  1813  	s7 := load4(s[28:])
  1814  	return (signum(1559614444-s0)+(signum(1477600026-s1)<<1)+(signum(2734136534-s2)<<2)+(signum(350157278-s3)<<3)+(signum(-s4)<<4)+(signum(-s5)<<5)+(signum(-s6)<<6)+(signum(268435456-s7)<<7))>>8 == 0
  1815  
  1816  }
  1817  
  1818  func ScIsZero(s *Key) bool {
  1819  	return ((int(s[0]|s[1]|s[2]|s[3]|s[4]|s[5]|s[6]|s[7]|s[8]|
  1820  		s[9]|s[10]|s[11]|s[12]|s[13]|s[14]|s[15]|s[16]|s[17]|
  1821  		s[18]|s[19]|s[20]|s[21]|s[22]|s[23]|s[24]|s[25]|s[26]|
  1822  		s[27]|s[28]|s[29]|s[30]|s[31])-1)>>8)+1 == 0
  1823  }
  1824  
  1825  // The scalars are GF(2^252 + 27742317777372353535851937790883648493).
  1826  
  1827  // Input:
  1828  //   a[0]+256*a[1]+...+256^31*a[31] = a
  1829  //   b[0]+256*b[1]+...+256^31*b[31] = b
  1830  //   c[0]+256*c[1]+...+256^31*c[31] = c
  1831  //
  1832  // Output:
  1833  //   s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
  1834  //   where l = 2^252 + 27742317777372353535851937790883648493.
  1835  func ScMulAdd(s, a, b, c *Key) {
  1836  	a0 := 2097151 & load3(a[:])
  1837  	a1 := 2097151 & (load4(a[2:]) >> 5)
  1838  	a2 := 2097151 & (load3(a[5:]) >> 2)
  1839  	a3 := 2097151 & (load4(a[7:]) >> 7)
  1840  	a4 := 2097151 & (load4(a[10:]) >> 4)
  1841  	a5 := 2097151 & (load3(a[13:]) >> 1)
  1842  	a6 := 2097151 & (load4(a[15:]) >> 6)
  1843  	a7 := 2097151 & (load3(a[18:]) >> 3)
  1844  	a8 := 2097151 & load3(a[21:])
  1845  	a9 := 2097151 & (load4(a[23:]) >> 5)
  1846  	a10 := 2097151 & (load3(a[26:]) >> 2)
  1847  	a11 := (load4(a[28:]) >> 7)
  1848  	b0 := 2097151 & load3(b[:])
  1849  	b1 := 2097151 & (load4(b[2:]) >> 5)
  1850  	b2 := 2097151 & (load3(b[5:]) >> 2)
  1851  	b3 := 2097151 & (load4(b[7:]) >> 7)
  1852  	b4 := 2097151 & (load4(b[10:]) >> 4)
  1853  	b5 := 2097151 & (load3(b[13:]) >> 1)
  1854  	b6 := 2097151 & (load4(b[15:]) >> 6)
  1855  	b7 := 2097151 & (load3(b[18:]) >> 3)
  1856  	b8 := 2097151 & load3(b[21:])
  1857  	b9 := 2097151 & (load4(b[23:]) >> 5)
  1858  	b10 := 2097151 & (load3(b[26:]) >> 2)
  1859  	b11 := (load4(b[28:]) >> 7)
  1860  	c0 := 2097151 & load3(c[:])
  1861  	c1 := 2097151 & (load4(c[2:]) >> 5)
  1862  	c2 := 2097151 & (load3(c[5:]) >> 2)
  1863  	c3 := 2097151 & (load4(c[7:]) >> 7)
  1864  	c4 := 2097151 & (load4(c[10:]) >> 4)
  1865  	c5 := 2097151 & (load3(c[13:]) >> 1)
  1866  	c6 := 2097151 & (load4(c[15:]) >> 6)
  1867  	c7 := 2097151 & (load3(c[18:]) >> 3)
  1868  	c8 := 2097151 & load3(c[21:])
  1869  	c9 := 2097151 & (load4(c[23:]) >> 5)
  1870  	c10 := 2097151 & (load3(c[26:]) >> 2)
  1871  	c11 := (load4(c[28:]) >> 7)
  1872  	var carry [23]int64
  1873  
  1874  	s0 := c0 + a0*b0
  1875  	s1 := c1 + a0*b1 + a1*b0
  1876  	s2 := c2 + a0*b2 + a1*b1 + a2*b0
  1877  	s3 := c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0
  1878  	s4 := c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0
  1879  	s5 := c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0
  1880  	s6 := c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0
  1881  	s7 := c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0
  1882  	s8 := c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + a8*b0
  1883  	s9 := c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0
  1884  	s10 := c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8*b2 + a9*b1 + a10*b0
  1885  	s11 := c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3 + a9*b2 + a10*b1 + a11*b0
  1886  	s12 := a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + a10*b2 + a11*b1
  1887  	s13 := a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + a11*b2
  1888  	s14 := a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + a11*b3
  1889  	s15 := a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4
  1890  	s16 := a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5
  1891  	s17 := a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6
  1892  	s18 := a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7
  1893  	s19 := a8*b11 + a9*b10 + a10*b9 + a11*b8
  1894  	s20 := a9*b11 + a10*b10 + a11*b9
  1895  	s21 := a10*b11 + a11*b10
  1896  	s22 := a11 * b11
  1897  	s23 := int64(0)
  1898  
  1899  	carry[0] = (s0 + (1 << 20)) >> 21
  1900  	s1 += carry[0]
  1901  	s0 -= carry[0] << 21
  1902  	carry[2] = (s2 + (1 << 20)) >> 21
  1903  	s3 += carry[2]
  1904  	s2 -= carry[2] << 21
  1905  	carry[4] = (s4 + (1 << 20)) >> 21
  1906  	s5 += carry[4]
  1907  	s4 -= carry[4] << 21
  1908  	carry[6] = (s6 + (1 << 20)) >> 21
  1909  	s7 += carry[6]
  1910  	s6 -= carry[6] << 21
  1911  	carry[8] = (s8 + (1 << 20)) >> 21
  1912  	s9 += carry[8]
  1913  	s8 -= carry[8] << 21
  1914  	carry[10] = (s10 + (1 << 20)) >> 21
  1915  	s11 += carry[10]
  1916  	s10 -= carry[10] << 21
  1917  	carry[12] = (s12 + (1 << 20)) >> 21
  1918  	s13 += carry[12]
  1919  	s12 -= carry[12] << 21
  1920  	carry[14] = (s14 + (1 << 20)) >> 21
  1921  	s15 += carry[14]
  1922  	s14 -= carry[14] << 21
  1923  	carry[16] = (s16 + (1 << 20)) >> 21
  1924  	s17 += carry[16]
  1925  	s16 -= carry[16] << 21
  1926  	carry[18] = (s18 + (1 << 20)) >> 21
  1927  	s19 += carry[18]
  1928  	s18 -= carry[18] << 21
  1929  	carry[20] = (s20 + (1 << 20)) >> 21
  1930  	s21 += carry[20]
  1931  	s20 -= carry[20] << 21
  1932  	carry[22] = (s22 + (1 << 20)) >> 21
  1933  	s23 += carry[22]
  1934  	s22 -= carry[22] << 21
  1935  
  1936  	carry[1] = (s1 + (1 << 20)) >> 21
  1937  	s2 += carry[1]
  1938  	s1 -= carry[1] << 21
  1939  	carry[3] = (s3 + (1 << 20)) >> 21
  1940  	s4 += carry[3]
  1941  	s3 -= carry[3] << 21
  1942  	carry[5] = (s5 + (1 << 20)) >> 21
  1943  	s6 += carry[5]
  1944  	s5 -= carry[5] << 21
  1945  	carry[7] = (s7 + (1 << 20)) >> 21
  1946  	s8 += carry[7]
  1947  	s7 -= carry[7] << 21
  1948  	carry[9] = (s9 + (1 << 20)) >> 21
  1949  	s10 += carry[9]
  1950  	s9 -= carry[9] << 21
  1951  	carry[11] = (s11 + (1 << 20)) >> 21
  1952  	s12 += carry[11]
  1953  	s11 -= carry[11] << 21
  1954  	carry[13] = (s13 + (1 << 20)) >> 21
  1955  	s14 += carry[13]
  1956  	s13 -= carry[13] << 21
  1957  	carry[15] = (s15 + (1 << 20)) >> 21
  1958  	s16 += carry[15]
  1959  	s15 -= carry[15] << 21
  1960  	carry[17] = (s17 + (1 << 20)) >> 21
  1961  	s18 += carry[17]
  1962  	s17 -= carry[17] << 21
  1963  	carry[19] = (s19 + (1 << 20)) >> 21
  1964  	s20 += carry[19]
  1965  	s19 -= carry[19] << 21
  1966  	carry[21] = (s21 + (1 << 20)) >> 21
  1967  	s22 += carry[21]
  1968  	s21 -= carry[21] << 21
  1969  
  1970  	s11 += s23 * 666643
  1971  	s12 += s23 * 470296
  1972  	s13 += s23 * 654183
  1973  	s14 -= s23 * 997805
  1974  	s15 += s23 * 136657
  1975  	s16 -= s23 * 683901
  1976  	s23 = 0
  1977  
  1978  	s10 += s22 * 666643
  1979  	s11 += s22 * 470296
  1980  	s12 += s22 * 654183
  1981  	s13 -= s22 * 997805
  1982  	s14 += s22 * 136657
  1983  	s15 -= s22 * 683901
  1984  	s22 = 0
  1985  
  1986  	s9 += s21 * 666643
  1987  	s10 += s21 * 470296
  1988  	s11 += s21 * 654183
  1989  	s12 -= s21 * 997805
  1990  	s13 += s21 * 136657
  1991  	s14 -= s21 * 683901
  1992  	s21 = 0
  1993  
  1994  	s8 += s20 * 666643
  1995  	s9 += s20 * 470296
  1996  	s10 += s20 * 654183
  1997  	s11 -= s20 * 997805
  1998  	s12 += s20 * 136657
  1999  	s13 -= s20 * 683901
  2000  	s20 = 0
  2001  
  2002  	s7 += s19 * 666643
  2003  	s8 += s19 * 470296
  2004  	s9 += s19 * 654183
  2005  	s10 -= s19 * 997805
  2006  	s11 += s19 * 136657
  2007  	s12 -= s19 * 683901
  2008  	s19 = 0
  2009  
  2010  	s6 += s18 * 666643
  2011  	s7 += s18 * 470296
  2012  	s8 += s18 * 654183
  2013  	s9 -= s18 * 997805
  2014  	s10 += s18 * 136657
  2015  	s11 -= s18 * 683901
  2016  	s18 = 0
  2017  
  2018  	carry[6] = (s6 + (1 << 20)) >> 21
  2019  	s7 += carry[6]
  2020  	s6 -= carry[6] << 21
  2021  	carry[8] = (s8 + (1 << 20)) >> 21
  2022  	s9 += carry[8]
  2023  	s8 -= carry[8] << 21
  2024  	carry[10] = (s10 + (1 << 20)) >> 21
  2025  	s11 += carry[10]
  2026  	s10 -= carry[10] << 21
  2027  	carry[12] = (s12 + (1 << 20)) >> 21
  2028  	s13 += carry[12]
  2029  	s12 -= carry[12] << 21
  2030  	carry[14] = (s14 + (1 << 20)) >> 21
  2031  	s15 += carry[14]
  2032  	s14 -= carry[14] << 21
  2033  	carry[16] = (s16 + (1 << 20)) >> 21
  2034  	s17 += carry[16]
  2035  	s16 -= carry[16] << 21
  2036  
  2037  	carry[7] = (s7 + (1 << 20)) >> 21
  2038  	s8 += carry[7]
  2039  	s7 -= carry[7] << 21
  2040  	carry[9] = (s9 + (1 << 20)) >> 21
  2041  	s10 += carry[9]
  2042  	s9 -= carry[9] << 21
  2043  	carry[11] = (s11 + (1 << 20)) >> 21
  2044  	s12 += carry[11]
  2045  	s11 -= carry[11] << 21
  2046  	carry[13] = (s13 + (1 << 20)) >> 21
  2047  	s14 += carry[13]
  2048  	s13 -= carry[13] << 21
  2049  	carry[15] = (s15 + (1 << 20)) >> 21
  2050  	s16 += carry[15]
  2051  	s15 -= carry[15] << 21
  2052  
  2053  	s5 += s17 * 666643
  2054  	s6 += s17 * 470296
  2055  	s7 += s17 * 654183
  2056  	s8 -= s17 * 997805
  2057  	s9 += s17 * 136657
  2058  	s10 -= s17 * 683901
  2059  	s17 = 0
  2060  
  2061  	s4 += s16 * 666643
  2062  	s5 += s16 * 470296
  2063  	s6 += s16 * 654183
  2064  	s7 -= s16 * 997805
  2065  	s8 += s16 * 136657
  2066  	s9 -= s16 * 683901
  2067  	s16 = 0
  2068  
  2069  	s3 += s15 * 666643
  2070  	s4 += s15 * 470296
  2071  	s5 += s15 * 654183
  2072  	s6 -= s15 * 997805
  2073  	s7 += s15 * 136657
  2074  	s8 -= s15 * 683901
  2075  	s15 = 0
  2076  
  2077  	s2 += s14 * 666643
  2078  	s3 += s14 * 470296
  2079  	s4 += s14 * 654183
  2080  	s5 -= s14 * 997805
  2081  	s6 += s14 * 136657
  2082  	s7 -= s14 * 683901
  2083  	s14 = 0
  2084  
  2085  	s1 += s13 * 666643
  2086  	s2 += s13 * 470296
  2087  	s3 += s13 * 654183
  2088  	s4 -= s13 * 997805
  2089  	s5 += s13 * 136657
  2090  	s6 -= s13 * 683901
  2091  	s13 = 0
  2092  
  2093  	s0 += s12 * 666643
  2094  	s1 += s12 * 470296
  2095  	s2 += s12 * 654183
  2096  	s3 -= s12 * 997805
  2097  	s4 += s12 * 136657
  2098  	s5 -= s12 * 683901
  2099  	s12 = 0
  2100  
  2101  	carry[0] = (s0 + (1 << 20)) >> 21
  2102  	s1 += carry[0]
  2103  	s0 -= carry[0] << 21
  2104  	carry[2] = (s2 + (1 << 20)) >> 21
  2105  	s3 += carry[2]
  2106  	s2 -= carry[2] << 21
  2107  	carry[4] = (s4 + (1 << 20)) >> 21
  2108  	s5 += carry[4]
  2109  	s4 -= carry[4] << 21
  2110  	carry[6] = (s6 + (1 << 20)) >> 21
  2111  	s7 += carry[6]
  2112  	s6 -= carry[6] << 21
  2113  	carry[8] = (s8 + (1 << 20)) >> 21
  2114  	s9 += carry[8]
  2115  	s8 -= carry[8] << 21
  2116  	carry[10] = (s10 + (1 << 20)) >> 21
  2117  	s11 += carry[10]
  2118  	s10 -= carry[10] << 21
  2119  
  2120  	carry[1] = (s1 + (1 << 20)) >> 21
  2121  	s2 += carry[1]
  2122  	s1 -= carry[1] << 21
  2123  	carry[3] = (s3 + (1 << 20)) >> 21
  2124  	s4 += carry[3]
  2125  	s3 -= carry[3] << 21
  2126  	carry[5] = (s5 + (1 << 20)) >> 21
  2127  	s6 += carry[5]
  2128  	s5 -= carry[5] << 21
  2129  	carry[7] = (s7 + (1 << 20)) >> 21
  2130  	s8 += carry[7]
  2131  	s7 -= carry[7] << 21
  2132  	carry[9] = (s9 + (1 << 20)) >> 21
  2133  	s10 += carry[9]
  2134  	s9 -= carry[9] << 21
  2135  	carry[11] = (s11 + (1 << 20)) >> 21
  2136  	s12 += carry[11]
  2137  	s11 -= carry[11] << 21
  2138  
  2139  	s0 += s12 * 666643
  2140  	s1 += s12 * 470296
  2141  	s2 += s12 * 654183
  2142  	s3 -= s12 * 997805
  2143  	s4 += s12 * 136657
  2144  	s5 -= s12 * 683901
  2145  	s12 = 0
  2146  
  2147  	carry[0] = s0 >> 21
  2148  	s1 += carry[0]
  2149  	s0 -= carry[0] << 21
  2150  	carry[1] = s1 >> 21
  2151  	s2 += carry[1]
  2152  	s1 -= carry[1] << 21
  2153  	carry[2] = s2 >> 21
  2154  	s3 += carry[2]
  2155  	s2 -= carry[2] << 21
  2156  	carry[3] = s3 >> 21
  2157  	s4 += carry[3]
  2158  	s3 -= carry[3] << 21
  2159  	carry[4] = s4 >> 21
  2160  	s5 += carry[4]
  2161  	s4 -= carry[4] << 21
  2162  	carry[5] = s5 >> 21
  2163  	s6 += carry[5]
  2164  	s5 -= carry[5] << 21
  2165  	carry[6] = s6 >> 21
  2166  	s7 += carry[6]
  2167  	s6 -= carry[6] << 21
  2168  	carry[7] = s7 >> 21
  2169  	s8 += carry[7]
  2170  	s7 -= carry[7] << 21
  2171  	carry[8] = s8 >> 21
  2172  	s9 += carry[8]
  2173  	s8 -= carry[8] << 21
  2174  	carry[9] = s9 >> 21
  2175  	s10 += carry[9]
  2176  	s9 -= carry[9] << 21
  2177  	carry[10] = s10 >> 21
  2178  	s11 += carry[10]
  2179  	s10 -= carry[10] << 21
  2180  	carry[11] = s11 >> 21
  2181  	s12 += carry[11]
  2182  	s11 -= carry[11] << 21
  2183  
  2184  	s0 += s12 * 666643
  2185  	s1 += s12 * 470296
  2186  	s2 += s12 * 654183
  2187  	s3 -= s12 * 997805
  2188  	s4 += s12 * 136657
  2189  	s5 -= s12 * 683901
  2190  	s12 = 0
  2191  
  2192  	carry[0] = s0 >> 21
  2193  	s1 += carry[0]
  2194  	s0 -= carry[0] << 21
  2195  	carry[1] = s1 >> 21
  2196  	s2 += carry[1]
  2197  	s1 -= carry[1] << 21
  2198  	carry[2] = s2 >> 21
  2199  	s3 += carry[2]
  2200  	s2 -= carry[2] << 21
  2201  	carry[3] = s3 >> 21
  2202  	s4 += carry[3]
  2203  	s3 -= carry[3] << 21
  2204  	carry[4] = s4 >> 21
  2205  	s5 += carry[4]
  2206  	s4 -= carry[4] << 21
  2207  	carry[5] = s5 >> 21
  2208  	s6 += carry[5]
  2209  	s5 -= carry[5] << 21
  2210  	carry[6] = s6 >> 21
  2211  	s7 += carry[6]
  2212  	s6 -= carry[6] << 21
  2213  	carry[7] = s7 >> 21
  2214  	s8 += carry[7]
  2215  	s7 -= carry[7] << 21
  2216  	carry[8] = s8 >> 21
  2217  	s9 += carry[8]
  2218  	s8 -= carry[8] << 21
  2219  	carry[9] = s9 >> 21
  2220  	s10 += carry[9]
  2221  	s9 -= carry[9] << 21
  2222  	carry[10] = s10 >> 21
  2223  	s11 += carry[10]
  2224  	s10 -= carry[10] << 21
  2225  
  2226  	s[0] = byte(s0 >> 0)
  2227  	s[1] = byte(s0 >> 8)
  2228  	s[2] = byte((s0 >> 16) | (s1 << 5))
  2229  	s[3] = byte(s1 >> 3)
  2230  	s[4] = byte(s1 >> 11)
  2231  	s[5] = byte((s1 >> 19) | (s2 << 2))
  2232  	s[6] = byte(s2 >> 6)
  2233  	s[7] = byte((s2 >> 14) | (s3 << 7))
  2234  	s[8] = byte(s3 >> 1)
  2235  	s[9] = byte(s3 >> 9)
  2236  	s[10] = byte((s3 >> 17) | (s4 << 4))
  2237  	s[11] = byte(s4 >> 4)
  2238  	s[12] = byte(s4 >> 12)
  2239  	s[13] = byte((s4 >> 20) | (s5 << 1))
  2240  	s[14] = byte(s5 >> 7)
  2241  	s[15] = byte((s5 >> 15) | (s6 << 6))
  2242  	s[16] = byte(s6 >> 2)
  2243  	s[17] = byte(s6 >> 10)
  2244  	s[18] = byte((s6 >> 18) | (s7 << 3))
  2245  	s[19] = byte(s7 >> 5)
  2246  	s[20] = byte(s7 >> 13)
  2247  	s[21] = byte(s8 >> 0)
  2248  	s[22] = byte(s8 >> 8)
  2249  	s[23] = byte((s8 >> 16) | (s9 << 5))
  2250  	s[24] = byte(s9 >> 3)
  2251  	s[25] = byte(s9 >> 11)
  2252  	s[26] = byte((s9 >> 19) | (s10 << 2))
  2253  	s[27] = byte(s10 >> 6)
  2254  	s[28] = byte((s10 >> 14) | (s11 << 7))
  2255  	s[29] = byte(s11 >> 1)
  2256  	s[30] = byte(s11 >> 9)
  2257  	s[31] = byte(s11 >> 17)
  2258  }
  2259  
  2260  // Input:
  2261  //   a[0]+256*a[1]+...+256^31*a[31] = a
  2262  //   b[0]+256*b[1]+...+256^31*b[31] = b
  2263  //   c[0]+256*c[1]+...+256^31*c[31] = c
  2264  //
  2265  // Output:
  2266  //   s[0]+256*s[1]+...+256^31*s[31] = (c-ab) mod l
  2267  //   where l = 2^252 + 27742317777372353535851937790883648493.
  2268  func ScMulSub(s, a, b, c *Key) {
  2269  	a0 := 2097151 & load3(a[:])
  2270  	a1 := 2097151 & (load4(a[2:]) >> 5)
  2271  	a2 := 2097151 & (load3(a[5:]) >> 2)
  2272  	a3 := 2097151 & (load4(a[7:]) >> 7)
  2273  	a4 := 2097151 & (load4(a[10:]) >> 4)
  2274  	a5 := 2097151 & (load3(a[13:]) >> 1)
  2275  	a6 := 2097151 & (load4(a[15:]) >> 6)
  2276  	a7 := 2097151 & (load3(a[18:]) >> 3)
  2277  	a8 := 2097151 & load3(a[21:])
  2278  	a9 := 2097151 & (load4(a[23:]) >> 5)
  2279  	a10 := 2097151 & (load3(a[26:]) >> 2)
  2280  	a11 := (load4(a[28:]) >> 7)
  2281  	b0 := 2097151 & load3(b[:])
  2282  	b1 := 2097151 & (load4(b[2:]) >> 5)
  2283  	b2 := 2097151 & (load3(b[5:]) >> 2)
  2284  	b3 := 2097151 & (load4(b[7:]) >> 7)
  2285  	b4 := 2097151 & (load4(b[10:]) >> 4)
  2286  	b5 := 2097151 & (load3(b[13:]) >> 1)
  2287  	b6 := 2097151 & (load4(b[15:]) >> 6)
  2288  	b7 := 2097151 & (load3(b[18:]) >> 3)
  2289  	b8 := 2097151 & load3(b[21:])
  2290  	b9 := 2097151 & (load4(b[23:]) >> 5)
  2291  	b10 := 2097151 & (load3(b[26:]) >> 2)
  2292  	b11 := (load4(b[28:]) >> 7)
  2293  	c0 := 2097151 & load3(c[:])
  2294  	c1 := 2097151 & (load4(c[2:]) >> 5)
  2295  	c2 := 2097151 & (load3(c[5:]) >> 2)
  2296  	c3 := 2097151 & (load4(c[7:]) >> 7)
  2297  	c4 := 2097151 & (load4(c[10:]) >> 4)
  2298  	c5 := 2097151 & (load3(c[13:]) >> 1)
  2299  	c6 := 2097151 & (load4(c[15:]) >> 6)
  2300  	c7 := 2097151 & (load3(c[18:]) >> 3)
  2301  	c8 := 2097151 & load3(c[21:])
  2302  	c9 := 2097151 & (load4(c[23:]) >> 5)
  2303  	c10 := 2097151 & (load3(c[26:]) >> 2)
  2304  	c11 := (load4(c[28:]) >> 7)
  2305  	var carry [23]int64
  2306  
  2307  	s0 := c0 - a0*b0
  2308  	s1 := c1 - a0*b1 - a1*b0
  2309  	s2 := c2 - a0*b2 - a1*b1 - a2*b0
  2310  	s3 := c3 - a0*b3 - a1*b2 - a2*b1 - a3*b0
  2311  	s4 := c4 - a0*b4 - a1*b3 - a2*b2 - a3*b1 - a4*b0
  2312  	s5 := c5 - a0*b5 - a1*b4 - a2*b3 - a3*b2 - a4*b1 - a5*b0
  2313  	s6 := c6 - a0*b6 - a1*b5 - a2*b4 - a3*b3 - a4*b2 - a5*b1 - a6*b0
  2314  	s7 := c7 - a0*b7 - a1*b6 - a2*b5 - a3*b4 - a4*b3 - a5*b2 - a6*b1 - a7*b0
  2315  	s8 := c8 - a0*b8 - a1*b7 - a2*b6 - a3*b5 - a4*b4 - a5*b3 - a6*b2 - a7*b1 - a8*b0
  2316  	s9 := c9 - a0*b9 - a1*b8 - a2*b7 - a3*b6 - a4*b5 - a5*b4 - a6*b3 - a7*b2 - a8*b1 - a9*b0
  2317  	s10 := c10 - a0*b10 - a1*b9 - a2*b8 - a3*b7 - a4*b6 - a5*b5 - a6*b4 - a7*b3 - a8*b2 - a9*b1 - a10*b0
  2318  	s11 := c11 - a0*b11 - a1*b10 - a2*b9 - a3*b8 - a4*b7 - a5*b6 - a6*b5 - a7*b4 - a8*b3 - a9*b2 - a10*b1 - a11*b0
  2319  	s12 := -a1*b11 - a2*b10 - a3*b9 - a4*b8 - a5*b7 - a6*b6 - a7*b5 - a8*b4 - a9*b3 - a10*b2 - a11*b1
  2320  	s13 := -a2*b11 - a3*b10 - a4*b9 - a5*b8 - a6*b7 - a7*b6 - a8*b5 - a9*b4 - a10*b3 - a11*b2
  2321  	s14 := -a3*b11 - a4*b10 - a5*b9 - a6*b8 - a7*b7 - a8*b6 - a9*b5 - a10*b4 - a11*b3
  2322  	s15 := -a4*b11 - a5*b10 - a6*b9 - a7*b8 - a8*b7 - a9*b6 - a10*b5 - a11*b4
  2323  	s16 := -a5*b11 - a6*b10 - a7*b9 - a8*b8 - a9*b7 - a10*b6 - a11*b5
  2324  	s17 := -a6*b11 - a7*b10 - a8*b9 - a9*b8 - a10*b7 - a11*b6
  2325  	s18 := -a7*b11 - a8*b10 - a9*b9 - a10*b8 - a11*b7
  2326  	s19 := -a8*b11 - a9*b10 - a10*b9 - a11*b8
  2327  	s20 := -a9*b11 - a10*b10 - a11*b9
  2328  	s21 := -a10*b11 - a11*b10
  2329  	s22 := -a11 * b11
  2330  	s23 := int64(0)
  2331  
  2332  	carry[0] = (s0 + (1 << 20)) >> 21
  2333  	s1 += carry[0]
  2334  	s0 -= carry[0] << 21
  2335  	carry[2] = (s2 + (1 << 20)) >> 21
  2336  	s3 += carry[2]
  2337  	s2 -= carry[2] << 21
  2338  	carry[4] = (s4 + (1 << 20)) >> 21
  2339  	s5 += carry[4]
  2340  	s4 -= carry[4] << 21
  2341  	carry[6] = (s6 + (1 << 20)) >> 21
  2342  	s7 += carry[6]
  2343  	s6 -= carry[6] << 21
  2344  	carry[8] = (s8 + (1 << 20)) >> 21
  2345  	s9 += carry[8]
  2346  	s8 -= carry[8] << 21
  2347  	carry[10] = (s10 + (1 << 20)) >> 21
  2348  	s11 += carry[10]
  2349  	s10 -= carry[10] << 21
  2350  	carry[12] = (s12 + (1 << 20)) >> 21
  2351  	s13 += carry[12]
  2352  	s12 -= carry[12] << 21
  2353  	carry[14] = (s14 + (1 << 20)) >> 21
  2354  	s15 += carry[14]
  2355  	s14 -= carry[14] << 21
  2356  	carry[16] = (s16 + (1 << 20)) >> 21
  2357  	s17 += carry[16]
  2358  	s16 -= carry[16] << 21
  2359  	carry[18] = (s18 + (1 << 20)) >> 21
  2360  	s19 += carry[18]
  2361  	s18 -= carry[18] << 21
  2362  	carry[20] = (s20 + (1 << 20)) >> 21
  2363  	s21 += carry[20]
  2364  	s20 -= carry[20] << 21
  2365  	carry[22] = (s22 + (1 << 20)) >> 21
  2366  	s23 += carry[22]
  2367  	s22 -= carry[22] << 21
  2368  
  2369  	carry[1] = (s1 + (1 << 20)) >> 21
  2370  	s2 += carry[1]
  2371  	s1 -= carry[1] << 21
  2372  	carry[3] = (s3 + (1 << 20)) >> 21
  2373  	s4 += carry[3]
  2374  	s3 -= carry[3] << 21
  2375  	carry[5] = (s5 + (1 << 20)) >> 21
  2376  	s6 += carry[5]
  2377  	s5 -= carry[5] << 21
  2378  	carry[7] = (s7 + (1 << 20)) >> 21
  2379  	s8 += carry[7]
  2380  	s7 -= carry[7] << 21
  2381  	carry[9] = (s9 + (1 << 20)) >> 21
  2382  	s10 += carry[9]
  2383  	s9 -= carry[9] << 21
  2384  	carry[11] = (s11 + (1 << 20)) >> 21
  2385  	s12 += carry[11]
  2386  	s11 -= carry[11] << 21
  2387  	carry[13] = (s13 + (1 << 20)) >> 21
  2388  	s14 += carry[13]
  2389  	s13 -= carry[13] << 21
  2390  	carry[15] = (s15 + (1 << 20)) >> 21
  2391  	s16 += carry[15]
  2392  	s15 -= carry[15] << 21
  2393  	carry[17] = (s17 + (1 << 20)) >> 21
  2394  	s18 += carry[17]
  2395  	s17 -= carry[17] << 21
  2396  	carry[19] = (s19 + (1 << 20)) >> 21
  2397  	s20 += carry[19]
  2398  	s19 -= carry[19] << 21
  2399  	carry[21] = (s21 + (1 << 20)) >> 21
  2400  	s22 += carry[21]
  2401  	s21 -= carry[21] << 21
  2402  
  2403  	s11 += s23 * 666643
  2404  	s12 += s23 * 470296
  2405  	s13 += s23 * 654183
  2406  	s14 -= s23 * 997805
  2407  	s15 += s23 * 136657
  2408  	s16 -= s23 * 683901
  2409  	s23 = 0
  2410  
  2411  	s10 += s22 * 666643
  2412  	s11 += s22 * 470296
  2413  	s12 += s22 * 654183
  2414  	s13 -= s22 * 997805
  2415  	s14 += s22 * 136657
  2416  	s15 -= s22 * 683901
  2417  	s22 = 0
  2418  
  2419  	s9 += s21 * 666643
  2420  	s10 += s21 * 470296
  2421  	s11 += s21 * 654183
  2422  	s12 -= s21 * 997805
  2423  	s13 += s21 * 136657
  2424  	s14 -= s21 * 683901
  2425  	s21 = 0
  2426  
  2427  	s8 += s20 * 666643
  2428  	s9 += s20 * 470296
  2429  	s10 += s20 * 654183
  2430  	s11 -= s20 * 997805
  2431  	s12 += s20 * 136657
  2432  	s13 -= s20 * 683901
  2433  	s20 = 0
  2434  
  2435  	s7 += s19 * 666643
  2436  	s8 += s19 * 470296
  2437  	s9 += s19 * 654183
  2438  	s10 -= s19 * 997805
  2439  	s11 += s19 * 136657
  2440  	s12 -= s19 * 683901
  2441  	s19 = 0
  2442  
  2443  	s6 += s18 * 666643
  2444  	s7 += s18 * 470296
  2445  	s8 += s18 * 654183
  2446  	s9 -= s18 * 997805
  2447  	s10 += s18 * 136657
  2448  	s11 -= s18 * 683901
  2449  	s18 = 0
  2450  
  2451  	carry[6] = (s6 + (1 << 20)) >> 21
  2452  	s7 += carry[6]
  2453  	s6 -= carry[6] << 21
  2454  	carry[8] = (s8 + (1 << 20)) >> 21
  2455  	s9 += carry[8]
  2456  	s8 -= carry[8] << 21
  2457  	carry[10] = (s10 + (1 << 20)) >> 21
  2458  	s11 += carry[10]
  2459  	s10 -= carry[10] << 21
  2460  	carry[12] = (s12 + (1 << 20)) >> 21
  2461  	s13 += carry[12]
  2462  	s12 -= carry[12] << 21
  2463  	carry[14] = (s14 + (1 << 20)) >> 21
  2464  	s15 += carry[14]
  2465  	s14 -= carry[14] << 21
  2466  	carry[16] = (s16 + (1 << 20)) >> 21
  2467  	s17 += carry[16]
  2468  	s16 -= carry[16] << 21
  2469  
  2470  	carry[7] = (s7 + (1 << 20)) >> 21
  2471  	s8 += carry[7]
  2472  	s7 -= carry[7] << 21
  2473  	carry[9] = (s9 + (1 << 20)) >> 21
  2474  	s10 += carry[9]
  2475  	s9 -= carry[9] << 21
  2476  	carry[11] = (s11 + (1 << 20)) >> 21
  2477  	s12 += carry[11]
  2478  	s11 -= carry[11] << 21
  2479  	carry[13] = (s13 + (1 << 20)) >> 21
  2480  	s14 += carry[13]
  2481  	s13 -= carry[13] << 21
  2482  	carry[15] = (s15 + (1 << 20)) >> 21
  2483  	s16 += carry[15]
  2484  	s15 -= carry[15] << 21
  2485  
  2486  	s5 += s17 * 666643
  2487  	s6 += s17 * 470296
  2488  	s7 += s17 * 654183
  2489  	s8 -= s17 * 997805
  2490  	s9 += s17 * 136657
  2491  	s10 -= s17 * 683901
  2492  	s17 = 0
  2493  
  2494  	s4 += s16 * 666643
  2495  	s5 += s16 * 470296
  2496  	s6 += s16 * 654183
  2497  	s7 -= s16 * 997805
  2498  	s8 += s16 * 136657
  2499  	s9 -= s16 * 683901
  2500  	s16 = 0
  2501  
  2502  	s3 += s15 * 666643
  2503  	s4 += s15 * 470296
  2504  	s5 += s15 * 654183
  2505  	s6 -= s15 * 997805
  2506  	s7 += s15 * 136657
  2507  	s8 -= s15 * 683901
  2508  	s15 = 0
  2509  
  2510  	s2 += s14 * 666643
  2511  	s3 += s14 * 470296
  2512  	s4 += s14 * 654183
  2513  	s5 -= s14 * 997805
  2514  	s6 += s14 * 136657
  2515  	s7 -= s14 * 683901
  2516  	s14 = 0
  2517  
  2518  	s1 += s13 * 666643
  2519  	s2 += s13 * 470296
  2520  	s3 += s13 * 654183
  2521  	s4 -= s13 * 997805
  2522  	s5 += s13 * 136657
  2523  	s6 -= s13 * 683901
  2524  	s13 = 0
  2525  
  2526  	s0 += s12 * 666643
  2527  	s1 += s12 * 470296
  2528  	s2 += s12 * 654183
  2529  	s3 -= s12 * 997805
  2530  	s4 += s12 * 136657
  2531  	s5 -= s12 * 683901
  2532  	s12 = 0
  2533  
  2534  	carry[0] = (s0 + (1 << 20)) >> 21
  2535  	s1 += carry[0]
  2536  	s0 -= carry[0] << 21
  2537  	carry[2] = (s2 + (1 << 20)) >> 21
  2538  	s3 += carry[2]
  2539  	s2 -= carry[2] << 21
  2540  	carry[4] = (s4 + (1 << 20)) >> 21
  2541  	s5 += carry[4]
  2542  	s4 -= carry[4] << 21
  2543  	carry[6] = (s6 + (1 << 20)) >> 21
  2544  	s7 += carry[6]
  2545  	s6 -= carry[6] << 21
  2546  	carry[8] = (s8 + (1 << 20)) >> 21
  2547  	s9 += carry[8]
  2548  	s8 -= carry[8] << 21
  2549  	carry[10] = (s10 + (1 << 20)) >> 21
  2550  	s11 += carry[10]
  2551  	s10 -= carry[10] << 21
  2552  
  2553  	carry[1] = (s1 + (1 << 20)) >> 21
  2554  	s2 += carry[1]
  2555  	s1 -= carry[1] << 21
  2556  	carry[3] = (s3 + (1 << 20)) >> 21
  2557  	s4 += carry[3]
  2558  	s3 -= carry[3] << 21
  2559  	carry[5] = (s5 + (1 << 20)) >> 21
  2560  	s6 += carry[5]
  2561  	s5 -= carry[5] << 21
  2562  	carry[7] = (s7 + (1 << 20)) >> 21
  2563  	s8 += carry[7]
  2564  	s7 -= carry[7] << 21
  2565  	carry[9] = (s9 + (1 << 20)) >> 21
  2566  	s10 += carry[9]
  2567  	s9 -= carry[9] << 21
  2568  	carry[11] = (s11 + (1 << 20)) >> 21
  2569  	s12 += carry[11]
  2570  	s11 -= carry[11] << 21
  2571  
  2572  	s0 += s12 * 666643
  2573  	s1 += s12 * 470296
  2574  	s2 += s12 * 654183
  2575  	s3 -= s12 * 997805
  2576  	s4 += s12 * 136657
  2577  	s5 -= s12 * 683901
  2578  	s12 = 0
  2579  
  2580  	carry[0] = s0 >> 21
  2581  	s1 += carry[0]
  2582  	s0 -= carry[0] << 21
  2583  	carry[1] = s1 >> 21
  2584  	s2 += carry[1]
  2585  	s1 -= carry[1] << 21
  2586  	carry[2] = s2 >> 21
  2587  	s3 += carry[2]
  2588  	s2 -= carry[2] << 21
  2589  	carry[3] = s3 >> 21
  2590  	s4 += carry[3]
  2591  	s3 -= carry[3] << 21
  2592  	carry[4] = s4 >> 21
  2593  	s5 += carry[4]
  2594  	s4 -= carry[4] << 21
  2595  	carry[5] = s5 >> 21
  2596  	s6 += carry[5]
  2597  	s5 -= carry[5] << 21
  2598  	carry[6] = s6 >> 21
  2599  	s7 += carry[6]
  2600  	s6 -= carry[6] << 21
  2601  	carry[7] = s7 >> 21
  2602  	s8 += carry[7]
  2603  	s7 -= carry[7] << 21
  2604  	carry[8] = s8 >> 21
  2605  	s9 += carry[8]
  2606  	s8 -= carry[8] << 21
  2607  	carry[9] = s9 >> 21
  2608  	s10 += carry[9]
  2609  	s9 -= carry[9] << 21
  2610  	carry[10] = s10 >> 21
  2611  	s11 += carry[10]
  2612  	s10 -= carry[10] << 21
  2613  	carry[11] = s11 >> 21
  2614  	s12 += carry[11]
  2615  	s11 -= carry[11] << 21
  2616  
  2617  	s0 += s12 * 666643
  2618  	s1 += s12 * 470296
  2619  	s2 += s12 * 654183
  2620  	s3 -= s12 * 997805
  2621  	s4 += s12 * 136657
  2622  	s5 -= s12 * 683901
  2623  	s12 = 0
  2624  
  2625  	carry[0] = s0 >> 21
  2626  	s1 += carry[0]
  2627  	s0 -= carry[0] << 21
  2628  	carry[1] = s1 >> 21
  2629  	s2 += carry[1]
  2630  	s1 -= carry[1] << 21
  2631  	carry[2] = s2 >> 21
  2632  	s3 += carry[2]
  2633  	s2 -= carry[2] << 21
  2634  	carry[3] = s3 >> 21
  2635  	s4 += carry[3]
  2636  	s3 -= carry[3] << 21
  2637  	carry[4] = s4 >> 21
  2638  	s5 += carry[4]
  2639  	s4 -= carry[4] << 21
  2640  	carry[5] = s5 >> 21
  2641  	s6 += carry[5]
  2642  	s5 -= carry[5] << 21
  2643  	carry[6] = s6 >> 21
  2644  	s7 += carry[6]
  2645  	s6 -= carry[6] << 21
  2646  	carry[7] = s7 >> 21
  2647  	s8 += carry[7]
  2648  	s7 -= carry[7] << 21
  2649  	carry[8] = s8 >> 21
  2650  	s9 += carry[8]
  2651  	s8 -= carry[8] << 21
  2652  	carry[9] = s9 >> 21
  2653  	s10 += carry[9]
  2654  	s9 -= carry[9] << 21
  2655  	carry[10] = s10 >> 21
  2656  	s11 += carry[10]
  2657  	s10 -= carry[10] << 21
  2658  
  2659  	s[0] = byte(s0 >> 0)
  2660  	s[1] = byte(s0 >> 8)
  2661  	s[2] = byte((s0 >> 16) | (s1 << 5))
  2662  	s[3] = byte(s1 >> 3)
  2663  	s[4] = byte(s1 >> 11)
  2664  	s[5] = byte((s1 >> 19) | (s2 << 2))
  2665  	s[6] = byte(s2 >> 6)
  2666  	s[7] = byte((s2 >> 14) | (s3 << 7))
  2667  	s[8] = byte(s3 >> 1)
  2668  	s[9] = byte(s3 >> 9)
  2669  	s[10] = byte((s3 >> 17) | (s4 << 4))
  2670  	s[11] = byte(s4 >> 4)
  2671  	s[12] = byte(s4 >> 12)
  2672  	s[13] = byte((s4 >> 20) | (s5 << 1))
  2673  	s[14] = byte(s5 >> 7)
  2674  	s[15] = byte((s5 >> 15) | (s6 << 6))
  2675  	s[16] = byte(s6 >> 2)
  2676  	s[17] = byte(s6 >> 10)
  2677  	s[18] = byte((s6 >> 18) | (s7 << 3))
  2678  	s[19] = byte(s7 >> 5)
  2679  	s[20] = byte(s7 >> 13)
  2680  	s[21] = byte(s8 >> 0)
  2681  	s[22] = byte(s8 >> 8)
  2682  	s[23] = byte((s8 >> 16) | (s9 << 5))
  2683  	s[24] = byte(s9 >> 3)
  2684  	s[25] = byte(s9 >> 11)
  2685  	s[26] = byte((s9 >> 19) | (s10 << 2))
  2686  	s[27] = byte(s10 >> 6)
  2687  	s[28] = byte((s10 >> 14) | (s11 << 7))
  2688  	s[29] = byte(s11 >> 1)
  2689  	s[30] = byte(s11 >> 9)
  2690  	s[31] = byte(s11 >> 17)
  2691  }
  2692  
  2693  //copied from above and modified
  2694  /*Input:
  2695    a[0]+256*a[1]+...+256^31*a[31] = a
  2696    b[0]+256*b[1]+...+256^31*b[31] = b
  2697  
  2698  Output:
  2699    s[0]+256*s[1]+...+256^31*s[31] = (ab) mod l
  2700    where l = 2^252 + 27742317777372353535851937790883648493.
  2701  */
  2702  func ScMul(s, a, b *Key) {
  2703  	a0 := 2097151 & load3(a[:])
  2704  	a1 := 2097151 & (load4(a[2:]) >> 5)
  2705  	a2 := 2097151 & (load3(a[5:]) >> 2)
  2706  	a3 := 2097151 & (load4(a[7:]) >> 7)
  2707  	a4 := 2097151 & (load4(a[10:]) >> 4)
  2708  	a5 := 2097151 & (load3(a[13:]) >> 1)
  2709  	a6 := 2097151 & (load4(a[15:]) >> 6)
  2710  	a7 := 2097151 & (load3(a[18:]) >> 3)
  2711  	a8 := 2097151 & load3(a[21:])
  2712  	a9 := 2097151 & (load4(a[23:]) >> 5)
  2713  	a10 := 2097151 & (load3(a[26:]) >> 2)
  2714  	a11 := (load4(a[28:]) >> 7)
  2715  	b0 := 2097151 & load3(b[:])
  2716  	b1 := 2097151 & (load4(b[2:]) >> 5)
  2717  	b2 := 2097151 & (load3(b[5:]) >> 2)
  2718  	b3 := 2097151 & (load4(b[7:]) >> 7)
  2719  	b4 := 2097151 & (load4(b[10:]) >> 4)
  2720  	b5 := 2097151 & (load3(b[13:]) >> 1)
  2721  	b6 := 2097151 & (load4(b[15:]) >> 6)
  2722  	b7 := 2097151 & (load3(b[18:]) >> 3)
  2723  	b8 := 2097151 & load3(b[21:])
  2724  	b9 := 2097151 & (load4(b[23:]) >> 5)
  2725  	b10 := 2097151 & (load3(b[26:]) >> 2)
  2726  	b11 := (load4(b[28:]) >> 7)
  2727  
  2728  	var carry [23]int64
  2729  
  2730  	s0 := a0 * b0
  2731  	s1 := (a0*b1 + a1*b0)
  2732  	s2 := (a0*b2 + a1*b1 + a2*b0)
  2733  	s3 := (a0*b3 + a1*b2 + a2*b1 + a3*b0)
  2734  	s4 := (a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0)
  2735  	s5 := (a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0)
  2736  	s6 := (a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0)
  2737  	s7 := (a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0)
  2738  	s8 := (a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + a8*b0)
  2739  	s9 := (a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0)
  2740  	s10 := (a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8*b2 + a9*b1 + a10*b0)
  2741  	s11 := (a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3 + a9*b2 + a10*b1 + a11*b0)
  2742  	s12 := (a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + a10*b2 + a11*b1)
  2743  	s13 := (a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + a11*b2)
  2744  	s14 := (a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + a11*b3)
  2745  	s15 := (a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4)
  2746  	s16 := (a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5)
  2747  	s17 := (a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6)
  2748  	s18 := (a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7)
  2749  	s19 := (a8*b11 + a9*b10 + a10*b9 + a11*b8)
  2750  	s20 := (a9*b11 + a10*b10 + a11*b9)
  2751  	s21 := (a10*b11 + a11*b10)
  2752  	s22 := a11 * b11
  2753  	s23 := int64(0)
  2754  
  2755  	carry[0] = (s0 + (1 << 20)) >> 21
  2756  	s1 += carry[0]
  2757  	s0 -= carry[0] << 21
  2758  	carry[2] = (s2 + (1 << 20)) >> 21
  2759  	s3 += carry[2]
  2760  	s2 -= carry[2] << 21
  2761  	carry[4] = (s4 + (1 << 20)) >> 21
  2762  	s5 += carry[4]
  2763  	s4 -= carry[4] << 21
  2764  	carry[6] = (s6 + (1 << 20)) >> 21
  2765  	s7 += carry[6]
  2766  	s6 -= carry[6] << 21
  2767  	carry[8] = (s8 + (1 << 20)) >> 21
  2768  	s9 += carry[8]
  2769  	s8 -= carry[8] << 21
  2770  	carry[10] = (s10 + (1 << 20)) >> 21
  2771  	s11 += carry[10]
  2772  	s10 -= carry[10] << 21
  2773  	carry[12] = (s12 + (1 << 20)) >> 21
  2774  	s13 += carry[12]
  2775  	s12 -= carry[12] << 21
  2776  	carry[14] = (s14 + (1 << 20)) >> 21
  2777  	s15 += carry[14]
  2778  	s14 -= carry[14] << 21
  2779  	carry[16] = (s16 + (1 << 20)) >> 21
  2780  	s17 += carry[16]
  2781  	s16 -= carry[16] << 21
  2782  	carry[18] = (s18 + (1 << 20)) >> 21
  2783  	s19 += carry[18]
  2784  	s18 -= carry[18] << 21
  2785  	carry[20] = (s20 + (1 << 20)) >> 21
  2786  	s21 += carry[20]
  2787  	s20 -= carry[20] << 21
  2788  	carry[22] = (s22 + (1 << 20)) >> 21
  2789  	s23 += carry[22]
  2790  	s22 -= carry[22] << 21
  2791  
  2792  	carry[1] = (s1 + (1 << 20)) >> 21
  2793  	s2 += carry[1]
  2794  	s1 -= carry[1] << 21
  2795  	carry[3] = (s3 + (1 << 20)) >> 21
  2796  	s4 += carry[3]
  2797  	s3 -= carry[3] << 21
  2798  	carry[5] = (s5 + (1 << 20)) >> 21
  2799  	s6 += carry[5]
  2800  	s5 -= carry[5] << 21
  2801  	carry[7] = (s7 + (1 << 20)) >> 21
  2802  	s8 += carry[7]
  2803  	s7 -= carry[7] << 21
  2804  	carry[9] = (s9 + (1 << 20)) >> 21
  2805  	s10 += carry[9]
  2806  	s9 -= carry[9] << 21
  2807  	carry[11] = (s11 + (1 << 20)) >> 21
  2808  	s12 += carry[11]
  2809  	s11 -= carry[11] << 21
  2810  	carry[13] = (s13 + (1 << 20)) >> 21
  2811  	s14 += carry[13]
  2812  	s13 -= carry[13] << 21
  2813  	carry[15] = (s15 + (1 << 20)) >> 21
  2814  	s16 += carry[15]
  2815  	s15 -= carry[15] << 21
  2816  	carry[17] = (s17 + (1 << 20)) >> 21
  2817  	s18 += carry[17]
  2818  	s17 -= carry[17] << 21
  2819  	carry[19] = (s19 + (1 << 20)) >> 21
  2820  	s20 += carry[19]
  2821  	s19 -= carry[19] << 21
  2822  	carry[21] = (s21 + (1 << 20)) >> 21
  2823  	s22 += carry[21]
  2824  	s21 -= carry[21] << 21
  2825  
  2826  	s11 += s23 * 666643
  2827  	s12 += s23 * 470296
  2828  	s13 += s23 * 654183
  2829  	s14 -= s23 * 997805
  2830  	s15 += s23 * 136657
  2831  	s16 -= s23 * 683901
  2832  	s23 = 0
  2833  
  2834  	s10 += s22 * 666643
  2835  	s11 += s22 * 470296
  2836  	s12 += s22 * 654183
  2837  	s13 -= s22 * 997805
  2838  	s14 += s22 * 136657
  2839  	s15 -= s22 * 683901
  2840  	s22 = 0
  2841  
  2842  	s9 += s21 * 666643
  2843  	s10 += s21 * 470296
  2844  	s11 += s21 * 654183
  2845  	s12 -= s21 * 997805
  2846  	s13 += s21 * 136657
  2847  	s14 -= s21 * 683901
  2848  	s21 = 0
  2849  
  2850  	s8 += s20 * 666643
  2851  	s9 += s20 * 470296
  2852  	s10 += s20 * 654183
  2853  	s11 -= s20 * 997805
  2854  	s12 += s20 * 136657
  2855  	s13 -= s20 * 683901
  2856  	s20 = 0
  2857  
  2858  	s7 += s19 * 666643
  2859  	s8 += s19 * 470296
  2860  	s9 += s19 * 654183
  2861  	s10 -= s19 * 997805
  2862  	s11 += s19 * 136657
  2863  	s12 -= s19 * 683901
  2864  	s19 = 0
  2865  
  2866  	s6 += s18 * 666643
  2867  	s7 += s18 * 470296
  2868  	s8 += s18 * 654183
  2869  	s9 -= s18 * 997805
  2870  	s10 += s18 * 136657
  2871  	s11 -= s18 * 683901
  2872  	s18 = 0
  2873  
  2874  	carry[6] = (s6 + (1 << 20)) >> 21
  2875  	s7 += carry[6]
  2876  	s6 -= carry[6] << 21
  2877  	carry[8] = (s8 + (1 << 20)) >> 21
  2878  	s9 += carry[8]
  2879  	s8 -= carry[8] << 21
  2880  	carry[10] = (s10 + (1 << 20)) >> 21
  2881  	s11 += carry[10]
  2882  	s10 -= carry[10] << 21
  2883  	carry[12] = (s12 + (1 << 20)) >> 21
  2884  	s13 += carry[12]
  2885  	s12 -= carry[12] << 21
  2886  	carry[14] = (s14 + (1 << 20)) >> 21
  2887  	s15 += carry[14]
  2888  	s14 -= carry[14] << 21
  2889  	carry[16] = (s16 + (1 << 20)) >> 21
  2890  	s17 += carry[16]
  2891  	s16 -= carry[16] << 21
  2892  
  2893  	carry[7] = (s7 + (1 << 20)) >> 21
  2894  	s8 += carry[7]
  2895  	s7 -= carry[7] << 21
  2896  	carry[9] = (s9 + (1 << 20)) >> 21
  2897  	s10 += carry[9]
  2898  	s9 -= carry[9] << 21
  2899  	carry[11] = (s11 + (1 << 20)) >> 21
  2900  	s12 += carry[11]
  2901  	s11 -= carry[11] << 21
  2902  	carry[13] = (s13 + (1 << 20)) >> 21
  2903  	s14 += carry[13]
  2904  	s13 -= carry[13] << 21
  2905  	carry[15] = (s15 + (1 << 20)) >> 21
  2906  	s16 += carry[15]
  2907  	s15 -= carry[15] << 21
  2908  
  2909  	s5 += s17 * 666643
  2910  	s6 += s17 * 470296
  2911  	s7 += s17 * 654183
  2912  	s8 -= s17 * 997805
  2913  	s9 += s17 * 136657
  2914  	s10 -= s17 * 683901
  2915  	s17 = 0
  2916  
  2917  	s4 += s16 * 666643
  2918  	s5 += s16 * 470296
  2919  	s6 += s16 * 654183
  2920  	s7 -= s16 * 997805
  2921  	s8 += s16 * 136657
  2922  	s9 -= s16 * 683901
  2923  	s16 = 0
  2924  
  2925  	s3 += s15 * 666643
  2926  	s4 += s15 * 470296
  2927  	s5 += s15 * 654183
  2928  	s6 -= s15 * 997805
  2929  	s7 += s15 * 136657
  2930  	s8 -= s15 * 683901
  2931  	s15 = 0
  2932  
  2933  	s2 += s14 * 666643
  2934  	s3 += s14 * 470296
  2935  	s4 += s14 * 654183
  2936  	s5 -= s14 * 997805
  2937  	s6 += s14 * 136657
  2938  	s7 -= s14 * 683901
  2939  	s14 = 0
  2940  
  2941  	s1 += s13 * 666643
  2942  	s2 += s13 * 470296
  2943  	s3 += s13 * 654183
  2944  	s4 -= s13 * 997805
  2945  	s5 += s13 * 136657
  2946  	s6 -= s13 * 683901
  2947  	s13 = 0
  2948  
  2949  	s0 += s12 * 666643
  2950  	s1 += s12 * 470296
  2951  	s2 += s12 * 654183
  2952  	s3 -= s12 * 997805
  2953  	s4 += s12 * 136657
  2954  	s5 -= s12 * 683901
  2955  	s12 = 0
  2956  
  2957  	carry[0] = (s0 + (1 << 20)) >> 21
  2958  	s1 += carry[0]
  2959  	s0 -= carry[0] << 21
  2960  	carry[2] = (s2 + (1 << 20)) >> 21
  2961  	s3 += carry[2]
  2962  	s2 -= carry[2] << 21
  2963  	carry[4] = (s4 + (1 << 20)) >> 21
  2964  	s5 += carry[4]
  2965  	s4 -= carry[4] << 21
  2966  	carry[6] = (s6 + (1 << 20)) >> 21
  2967  	s7 += carry[6]
  2968  	s6 -= carry[6] << 21
  2969  	carry[8] = (s8 + (1 << 20)) >> 21
  2970  	s9 += carry[8]
  2971  	s8 -= carry[8] << 21
  2972  	carry[10] = (s10 + (1 << 20)) >> 21
  2973  	s11 += carry[10]
  2974  	s10 -= carry[10] << 21
  2975  
  2976  	carry[1] = (s1 + (1 << 20)) >> 21
  2977  	s2 += carry[1]
  2978  	s1 -= carry[1] << 21
  2979  	carry[3] = (s3 + (1 << 20)) >> 21
  2980  	s4 += carry[3]
  2981  	s3 -= carry[3] << 21
  2982  	carry[5] = (s5 + (1 << 20)) >> 21
  2983  	s6 += carry[5]
  2984  	s5 -= carry[5] << 21
  2985  	carry[7] = (s7 + (1 << 20)) >> 21
  2986  	s8 += carry[7]
  2987  	s7 -= carry[7] << 21
  2988  	carry[9] = (s9 + (1 << 20)) >> 21
  2989  	s10 += carry[9]
  2990  	s9 -= carry[9] << 21
  2991  	carry[11] = (s11 + (1 << 20)) >> 21
  2992  	s12 += carry[11]
  2993  	s11 -= carry[11] << 21
  2994  
  2995  	s0 += s12 * 666643
  2996  	s1 += s12 * 470296
  2997  	s2 += s12 * 654183
  2998  	s3 -= s12 * 997805
  2999  	s4 += s12 * 136657
  3000  	s5 -= s12 * 683901
  3001  	s12 = 0
  3002  
  3003  	carry[0] = s0 >> 21
  3004  	s1 += carry[0]
  3005  	s0 -= carry[0] << 21
  3006  	carry[1] = s1 >> 21
  3007  	s2 += carry[1]
  3008  	s1 -= carry[1] << 21
  3009  	carry[2] = s2 >> 21
  3010  	s3 += carry[2]
  3011  	s2 -= carry[2] << 21
  3012  	carry[3] = s3 >> 21
  3013  	s4 += carry[3]
  3014  	s3 -= carry[3] << 21
  3015  	carry[4] = s4 >> 21
  3016  	s5 += carry[4]
  3017  	s4 -= carry[4] << 21
  3018  	carry[5] = s5 >> 21
  3019  	s6 += carry[5]
  3020  	s5 -= carry[5] << 21
  3021  	carry[6] = s6 >> 21
  3022  	s7 += carry[6]
  3023  	s6 -= carry[6] << 21
  3024  	carry[7] = s7 >> 21
  3025  	s8 += carry[7]
  3026  	s7 -= carry[7] << 21
  3027  	carry[8] = s8 >> 21
  3028  	s9 += carry[8]
  3029  	s8 -= carry[8] << 21
  3030  	carry[9] = s9 >> 21
  3031  	s10 += carry[9]
  3032  	s9 -= carry[9] << 21
  3033  	carry[10] = s10 >> 21
  3034  	s11 += carry[10]
  3035  	s10 -= carry[10] << 21
  3036  	carry[11] = s11 >> 21
  3037  	s12 += carry[11]
  3038  	s11 -= carry[11] << 21
  3039  
  3040  	s0 += s12 * 666643
  3041  	s1 += s12 * 470296
  3042  	s2 += s12 * 654183
  3043  	s3 -= s12 * 997805
  3044  	s4 += s12 * 136657
  3045  	s5 -= s12 * 683901
  3046  	s12 = 0
  3047  
  3048  	carry[0] = s0 >> 21
  3049  	s1 += carry[0]
  3050  	s0 -= carry[0] << 21
  3051  	carry[1] = s1 >> 21
  3052  	s2 += carry[1]
  3053  	s1 -= carry[1] << 21
  3054  	carry[2] = s2 >> 21
  3055  	s3 += carry[2]
  3056  	s2 -= carry[2] << 21
  3057  	carry[3] = s3 >> 21
  3058  	s4 += carry[3]
  3059  	s3 -= carry[3] << 21
  3060  	carry[4] = s4 >> 21
  3061  	s5 += carry[4]
  3062  	s4 -= carry[4] << 21
  3063  	carry[5] = s5 >> 21
  3064  	s6 += carry[5]
  3065  	s5 -= carry[5] << 21
  3066  	carry[6] = s6 >> 21
  3067  	s7 += carry[6]
  3068  	s6 -= carry[6] << 21
  3069  	carry[7] = s7 >> 21
  3070  	s8 += carry[7]
  3071  	s7 -= carry[7] << 21
  3072  	carry[8] = s8 >> 21
  3073  	s9 += carry[8]
  3074  	s8 -= carry[8] << 21
  3075  	carry[9] = s9 >> 21
  3076  	s10 += carry[9]
  3077  	s9 -= carry[9] << 21
  3078  	carry[10] = s10 >> 21
  3079  	s11 += carry[10]
  3080  	s10 -= carry[10] << 21
  3081  
  3082  	s[0] = byte(s0 >> 0)
  3083  	s[1] = byte(s0 >> 8)
  3084  	s[2] = byte((s0 >> 16) | (s1 << 5))
  3085  	s[3] = byte(s1 >> 3)
  3086  	s[4] = byte(s1 >> 11)
  3087  	s[5] = byte((s1 >> 19) | (s2 << 2))
  3088  	s[6] = byte(s2 >> 6)
  3089  	s[7] = byte((s2 >> 14) | (s3 << 7))
  3090  	s[8] = byte(s3 >> 1)
  3091  	s[9] = byte(s3 >> 9)
  3092  	s[10] = byte((s3 >> 17) | (s4 << 4))
  3093  	s[11] = byte(s4 >> 4)
  3094  	s[12] = byte(s4 >> 12)
  3095  	s[13] = byte((s4 >> 20) | (s5 << 1))
  3096  	s[14] = byte(s5 >> 7)
  3097  	s[15] = byte((s5 >> 15) | (s6 << 6))
  3098  	s[16] = byte(s6 >> 2)
  3099  	s[17] = byte(s6 >> 10)
  3100  	s[18] = byte((s6 >> 18) | (s7 << 3))
  3101  	s[19] = byte(s7 >> 5)
  3102  	s[20] = byte(s7 >> 13)
  3103  	s[21] = byte(s8 >> 0)
  3104  	s[22] = byte(s8 >> 8)
  3105  	s[23] = byte((s8 >> 16) | (s9 << 5))
  3106  	s[24] = byte(s9 >> 3)
  3107  	s[25] = byte(s9 >> 11)
  3108  	s[26] = byte((s9 >> 19) | (s10 << 2))
  3109  	s[27] = byte(s10 >> 6)
  3110  	s[28] = byte((s10 >> 14) | (s11 << 7))
  3111  	s[29] = byte(s11 >> 1)
  3112  	s[30] = byte(s11 >> 9)
  3113  	s[31] = byte(s11 >> 17)
  3114  
  3115  }
  3116  
  3117  // Input:
  3118  //   s[0]+256*s[1]+...+256^63*s[63] = s
  3119  //
  3120  // Output:
  3121  //   s[0]+256*s[1]+...+256^31*s[31] = s mod l
  3122  //   where l = 2^252 + 27742317777372353535851937790883648493.
  3123  func ScReduce(out *Key, s *[64]byte) {
  3124  	s0 := 2097151 & load3(s[:])
  3125  	s1 := 2097151 & (load4(s[2:]) >> 5)
  3126  	s2 := 2097151 & (load3(s[5:]) >> 2)
  3127  	s3 := 2097151 & (load4(s[7:]) >> 7)
  3128  	s4 := 2097151 & (load4(s[10:]) >> 4)
  3129  	s5 := 2097151 & (load3(s[13:]) >> 1)
  3130  	s6 := 2097151 & (load4(s[15:]) >> 6)
  3131  	s7 := 2097151 & (load3(s[18:]) >> 3)
  3132  	s8 := 2097151 & load3(s[21:])
  3133  	s9 := 2097151 & (load4(s[23:]) >> 5)
  3134  	s10 := 2097151 & (load3(s[26:]) >> 2)
  3135  	s11 := 2097151 & (load4(s[28:]) >> 7)
  3136  	s12 := 2097151 & (load4(s[31:]) >> 4)
  3137  	s13 := 2097151 & (load3(s[34:]) >> 1)
  3138  	s14 := 2097151 & (load4(s[36:]) >> 6)
  3139  	s15 := 2097151 & (load3(s[39:]) >> 3)
  3140  	s16 := 2097151 & load3(s[42:])
  3141  	s17 := 2097151 & (load4(s[44:]) >> 5)
  3142  	s18 := 2097151 & (load3(s[47:]) >> 2)
  3143  	s19 := 2097151 & (load4(s[49:]) >> 7)
  3144  	s20 := 2097151 & (load4(s[52:]) >> 4)
  3145  	s21 := 2097151 & (load3(s[55:]) >> 1)
  3146  	s22 := 2097151 & (load4(s[57:]) >> 6)
  3147  	s23 := (load4(s[60:]) >> 3)
  3148  
  3149  	s11 += s23 * 666643
  3150  	s12 += s23 * 470296
  3151  	s13 += s23 * 654183
  3152  	s14 -= s23 * 997805
  3153  	s15 += s23 * 136657
  3154  	s16 -= s23 * 683901
  3155  	s23 = 0
  3156  
  3157  	s10 += s22 * 666643
  3158  	s11 += s22 * 470296
  3159  	s12 += s22 * 654183
  3160  	s13 -= s22 * 997805
  3161  	s14 += s22 * 136657
  3162  	s15 -= s22 * 683901
  3163  	s22 = 0
  3164  
  3165  	s9 += s21 * 666643
  3166  	s10 += s21 * 470296
  3167  	s11 += s21 * 654183
  3168  	s12 -= s21 * 997805
  3169  	s13 += s21 * 136657
  3170  	s14 -= s21 * 683901
  3171  	s21 = 0
  3172  
  3173  	s8 += s20 * 666643
  3174  	s9 += s20 * 470296
  3175  	s10 += s20 * 654183
  3176  	s11 -= s20 * 997805
  3177  	s12 += s20 * 136657
  3178  	s13 -= s20 * 683901
  3179  	s20 = 0
  3180  
  3181  	s7 += s19 * 666643
  3182  	s8 += s19 * 470296
  3183  	s9 += s19 * 654183
  3184  	s10 -= s19 * 997805
  3185  	s11 += s19 * 136657
  3186  	s12 -= s19 * 683901
  3187  	s19 = 0
  3188  
  3189  	s6 += s18 * 666643
  3190  	s7 += s18 * 470296
  3191  	s8 += s18 * 654183
  3192  	s9 -= s18 * 997805
  3193  	s10 += s18 * 136657
  3194  	s11 -= s18 * 683901
  3195  	s18 = 0
  3196  
  3197  	var carry [17]int64
  3198  
  3199  	carry[6] = (s6 + (1 << 20)) >> 21
  3200  	s7 += carry[6]
  3201  	s6 -= carry[6] << 21
  3202  	carry[8] = (s8 + (1 << 20)) >> 21
  3203  	s9 += carry[8]
  3204  	s8 -= carry[8] << 21
  3205  	carry[10] = (s10 + (1 << 20)) >> 21
  3206  	s11 += carry[10]
  3207  	s10 -= carry[10] << 21
  3208  	carry[12] = (s12 + (1 << 20)) >> 21
  3209  	s13 += carry[12]
  3210  	s12 -= carry[12] << 21
  3211  	carry[14] = (s14 + (1 << 20)) >> 21
  3212  	s15 += carry[14]
  3213  	s14 -= carry[14] << 21
  3214  	carry[16] = (s16 + (1 << 20)) >> 21
  3215  	s17 += carry[16]
  3216  	s16 -= carry[16] << 21
  3217  
  3218  	carry[7] = (s7 + (1 << 20)) >> 21
  3219  	s8 += carry[7]
  3220  	s7 -= carry[7] << 21
  3221  	carry[9] = (s9 + (1 << 20)) >> 21
  3222  	s10 += carry[9]
  3223  	s9 -= carry[9] << 21
  3224  	carry[11] = (s11 + (1 << 20)) >> 21
  3225  	s12 += carry[11]
  3226  	s11 -= carry[11] << 21
  3227  	carry[13] = (s13 + (1 << 20)) >> 21
  3228  	s14 += carry[13]
  3229  	s13 -= carry[13] << 21
  3230  	carry[15] = (s15 + (1 << 20)) >> 21
  3231  	s16 += carry[15]
  3232  	s15 -= carry[15] << 21
  3233  
  3234  	s5 += s17 * 666643
  3235  	s6 += s17 * 470296
  3236  	s7 += s17 * 654183
  3237  	s8 -= s17 * 997805
  3238  	s9 += s17 * 136657
  3239  	s10 -= s17 * 683901
  3240  	s17 = 0
  3241  
  3242  	s4 += s16 * 666643
  3243  	s5 += s16 * 470296
  3244  	s6 += s16 * 654183
  3245  	s7 -= s16 * 997805
  3246  	s8 += s16 * 136657
  3247  	s9 -= s16 * 683901
  3248  	s16 = 0
  3249  
  3250  	s3 += s15 * 666643
  3251  	s4 += s15 * 470296
  3252  	s5 += s15 * 654183
  3253  	s6 -= s15 * 997805
  3254  	s7 += s15 * 136657
  3255  	s8 -= s15 * 683901
  3256  	s15 = 0
  3257  
  3258  	s2 += s14 * 666643
  3259  	s3 += s14 * 470296
  3260  	s4 += s14 * 654183
  3261  	s5 -= s14 * 997805
  3262  	s6 += s14 * 136657
  3263  	s7 -= s14 * 683901
  3264  	s14 = 0
  3265  
  3266  	s1 += s13 * 666643
  3267  	s2 += s13 * 470296
  3268  	s3 += s13 * 654183
  3269  	s4 -= s13 * 997805
  3270  	s5 += s13 * 136657
  3271  	s6 -= s13 * 683901
  3272  	s13 = 0
  3273  
  3274  	s0 += s12 * 666643
  3275  	s1 += s12 * 470296
  3276  	s2 += s12 * 654183
  3277  	s3 -= s12 * 997805
  3278  	s4 += s12 * 136657
  3279  	s5 -= s12 * 683901
  3280  	s12 = 0
  3281  
  3282  	carry[0] = (s0 + (1 << 20)) >> 21
  3283  	s1 += carry[0]
  3284  	s0 -= carry[0] << 21
  3285  	carry[2] = (s2 + (1 << 20)) >> 21
  3286  	s3 += carry[2]
  3287  	s2 -= carry[2] << 21
  3288  	carry[4] = (s4 + (1 << 20)) >> 21
  3289  	s5 += carry[4]
  3290  	s4 -= carry[4] << 21
  3291  	carry[6] = (s6 + (1 << 20)) >> 21
  3292  	s7 += carry[6]
  3293  	s6 -= carry[6] << 21
  3294  	carry[8] = (s8 + (1 << 20)) >> 21
  3295  	s9 += carry[8]
  3296  	s8 -= carry[8] << 21
  3297  	carry[10] = (s10 + (1 << 20)) >> 21
  3298  	s11 += carry[10]
  3299  	s10 -= carry[10] << 21
  3300  
  3301  	carry[1] = (s1 + (1 << 20)) >> 21
  3302  	s2 += carry[1]
  3303  	s1 -= carry[1] << 21
  3304  	carry[3] = (s3 + (1 << 20)) >> 21
  3305  	s4 += carry[3]
  3306  	s3 -= carry[3] << 21
  3307  	carry[5] = (s5 + (1 << 20)) >> 21
  3308  	s6 += carry[5]
  3309  	s5 -= carry[5] << 21
  3310  	carry[7] = (s7 + (1 << 20)) >> 21
  3311  	s8 += carry[7]
  3312  	s7 -= carry[7] << 21
  3313  	carry[9] = (s9 + (1 << 20)) >> 21
  3314  	s10 += carry[9]
  3315  	s9 -= carry[9] << 21
  3316  	carry[11] = (s11 + (1 << 20)) >> 21
  3317  	s12 += carry[11]
  3318  	s11 -= carry[11] << 21
  3319  
  3320  	s0 += s12 * 666643
  3321  	s1 += s12 * 470296
  3322  	s2 += s12 * 654183
  3323  	s3 -= s12 * 997805
  3324  	s4 += s12 * 136657
  3325  	s5 -= s12 * 683901
  3326  	s12 = 0
  3327  
  3328  	carry[0] = s0 >> 21
  3329  	s1 += carry[0]
  3330  	s0 -= carry[0] << 21
  3331  	carry[1] = s1 >> 21
  3332  	s2 += carry[1]
  3333  	s1 -= carry[1] << 21
  3334  	carry[2] = s2 >> 21
  3335  	s3 += carry[2]
  3336  	s2 -= carry[2] << 21
  3337  	carry[3] = s3 >> 21
  3338  	s4 += carry[3]
  3339  	s3 -= carry[3] << 21
  3340  	carry[4] = s4 >> 21
  3341  	s5 += carry[4]
  3342  	s4 -= carry[4] << 21
  3343  	carry[5] = s5 >> 21
  3344  	s6 += carry[5]
  3345  	s5 -= carry[5] << 21
  3346  	carry[6] = s6 >> 21
  3347  	s7 += carry[6]
  3348  	s6 -= carry[6] << 21
  3349  	carry[7] = s7 >> 21
  3350  	s8 += carry[7]
  3351  	s7 -= carry[7] << 21
  3352  	carry[8] = s8 >> 21
  3353  	s9 += carry[8]
  3354  	s8 -= carry[8] << 21
  3355  	carry[9] = s9 >> 21
  3356  	s10 += carry[9]
  3357  	s9 -= carry[9] << 21
  3358  	carry[10] = s10 >> 21
  3359  	s11 += carry[10]
  3360  	s10 -= carry[10] << 21
  3361  	carry[11] = s11 >> 21
  3362  	s12 += carry[11]
  3363  	s11 -= carry[11] << 21
  3364  
  3365  	s0 += s12 * 666643
  3366  	s1 += s12 * 470296
  3367  	s2 += s12 * 654183
  3368  	s3 -= s12 * 997805
  3369  	s4 += s12 * 136657
  3370  	s5 -= s12 * 683901
  3371  	s12 = 0
  3372  
  3373  	carry[0] = s0 >> 21
  3374  	s1 += carry[0]
  3375  	s0 -= carry[0] << 21
  3376  	carry[1] = s1 >> 21
  3377  	s2 += carry[1]
  3378  	s1 -= carry[1] << 21
  3379  	carry[2] = s2 >> 21
  3380  	s3 += carry[2]
  3381  	s2 -= carry[2] << 21
  3382  	carry[3] = s3 >> 21
  3383  	s4 += carry[3]
  3384  	s3 -= carry[3] << 21
  3385  	carry[4] = s4 >> 21
  3386  	s5 += carry[4]
  3387  	s4 -= carry[4] << 21
  3388  	carry[5] = s5 >> 21
  3389  	s6 += carry[5]
  3390  	s5 -= carry[5] << 21
  3391  	carry[6] = s6 >> 21
  3392  	s7 += carry[6]
  3393  	s6 -= carry[6] << 21
  3394  	carry[7] = s7 >> 21
  3395  	s8 += carry[7]
  3396  	s7 -= carry[7] << 21
  3397  	carry[8] = s8 >> 21
  3398  	s9 += carry[8]
  3399  	s8 -= carry[8] << 21
  3400  	carry[9] = s9 >> 21
  3401  	s10 += carry[9]
  3402  	s9 -= carry[9] << 21
  3403  	carry[10] = s10 >> 21
  3404  	s11 += carry[10]
  3405  	s10 -= carry[10] << 21
  3406  
  3407  	out[0] = byte(s0 >> 0)
  3408  	out[1] = byte(s0 >> 8)
  3409  	out[2] = byte((s0 >> 16) | (s1 << 5))
  3410  	out[3] = byte(s1 >> 3)
  3411  	out[4] = byte(s1 >> 11)
  3412  	out[5] = byte((s1 >> 19) | (s2 << 2))
  3413  	out[6] = byte(s2 >> 6)
  3414  	out[7] = byte((s2 >> 14) | (s3 << 7))
  3415  	out[8] = byte(s3 >> 1)
  3416  	out[9] = byte(s3 >> 9)
  3417  	out[10] = byte((s3 >> 17) | (s4 << 4))
  3418  	out[11] = byte(s4 >> 4)
  3419  	out[12] = byte(s4 >> 12)
  3420  	out[13] = byte((s4 >> 20) | (s5 << 1))
  3421  	out[14] = byte(s5 >> 7)
  3422  	out[15] = byte((s5 >> 15) | (s6 << 6))
  3423  	out[16] = byte(s6 >> 2)
  3424  	out[17] = byte(s6 >> 10)
  3425  	out[18] = byte((s6 >> 18) | (s7 << 3))
  3426  	out[19] = byte(s7 >> 5)
  3427  	out[20] = byte(s7 >> 13)
  3428  	out[21] = byte(s8 >> 0)
  3429  	out[22] = byte(s8 >> 8)
  3430  	out[23] = byte((s8 >> 16) | (s9 << 5))
  3431  	out[24] = byte(s9 >> 3)
  3432  	out[25] = byte(s9 >> 11)
  3433  	out[26] = byte((s9 >> 19) | (s10 << 2))
  3434  	out[27] = byte(s10 >> 6)
  3435  	out[28] = byte((s10 >> 14) | (s11 << 7))
  3436  	out[29] = byte(s11 >> 1)
  3437  	out[30] = byte(s11 >> 9)
  3438  	out[31] = byte(s11 >> 17)
  3439  }
  3440  
  3441  func ScReduce32(s *Key) {
  3442  	s0 := 2097151 & load3(s[:])
  3443  	s1 := 2097151 & (load4(s[2:]) >> 5)
  3444  	s2 := 2097151 & (load3(s[5:]) >> 2)
  3445  	s3 := 2097151 & (load4(s[7:]) >> 7)
  3446  	s4 := 2097151 & (load4(s[10:]) >> 4)
  3447  	s5 := 2097151 & (load3(s[13:]) >> 1)
  3448  	s6 := 2097151 & (load4(s[15:]) >> 6)
  3449  	s7 := 2097151 & (load3(s[18:]) >> 3)
  3450  	s8 := 2097151 & load3(s[21:])
  3451  	s9 := 2097151 & (load4(s[23:]) >> 5)
  3452  	s10 := 2097151 & (load3(s[26:]) >> 2)
  3453  	s11 := (load4(s[28:]) >> 7)
  3454  	s12 := int64(0)
  3455  	var carry [12]int64
  3456  	carry[0] = (s0 + (1 << 20)) >> 21
  3457  	s1 += carry[0]
  3458  	s0 -= carry[0] << 21
  3459  	carry[2] = (s2 + (1 << 20)) >> 21
  3460  	s3 += carry[2]
  3461  	s2 -= carry[2] << 21
  3462  	carry[4] = (s4 + (1 << 20)) >> 21
  3463  	s5 += carry[4]
  3464  	s4 -= carry[4] << 21
  3465  	carry[6] = (s6 + (1 << 20)) >> 21
  3466  	s7 += carry[6]
  3467  	s6 -= carry[6] << 21
  3468  	carry[8] = (s8 + (1 << 20)) >> 21
  3469  	s9 += carry[8]
  3470  	s8 -= carry[8] << 21
  3471  	carry[10] = (s10 + (1 << 20)) >> 21
  3472  	s11 += carry[10]
  3473  	s10 -= carry[10] << 21
  3474  	carry[1] = (s1 + (1 << 20)) >> 21
  3475  	s2 += carry[1]
  3476  	s1 -= carry[1] << 21
  3477  	carry[3] = (s3 + (1 << 20)) >> 21
  3478  	s4 += carry[3]
  3479  	s3 -= carry[3] << 21
  3480  	carry[5] = (s5 + (1 << 20)) >> 21
  3481  	s6 += carry[5]
  3482  	s5 -= carry[5] << 21
  3483  	carry[7] = (s7 + (1 << 20)) >> 21
  3484  	s8 += carry[7]
  3485  	s7 -= carry[7] << 21
  3486  	carry[9] = (s9 + (1 << 20)) >> 21
  3487  	s10 += carry[9]
  3488  	s9 -= carry[9] << 21
  3489  	carry[11] = (s11 + (1 << 20)) >> 21
  3490  	s12 += carry[11]
  3491  	s11 -= carry[11] << 21
  3492  
  3493  	s0 += s12 * 666643
  3494  	s1 += s12 * 470296
  3495  	s2 += s12 * 654183
  3496  	s3 -= s12 * 997805
  3497  	s4 += s12 * 136657
  3498  	s5 -= s12 * 683901
  3499  	s12 = 0
  3500  
  3501  	carry[0] = s0 >> 21
  3502  	s1 += carry[0]
  3503  	s0 -= carry[0] << 21
  3504  	carry[1] = s1 >> 21
  3505  	s2 += carry[1]
  3506  	s1 -= carry[1] << 21
  3507  	carry[2] = s2 >> 21
  3508  	s3 += carry[2]
  3509  	s2 -= carry[2] << 21
  3510  	carry[3] = s3 >> 21
  3511  	s4 += carry[3]
  3512  	s3 -= carry[3] << 21
  3513  	carry[4] = s4 >> 21
  3514  	s5 += carry[4]
  3515  	s4 -= carry[4] << 21
  3516  	carry[5] = s5 >> 21
  3517  	s6 += carry[5]
  3518  	s5 -= carry[5] << 21
  3519  	carry[6] = s6 >> 21
  3520  	s7 += carry[6]
  3521  	s6 -= carry[6] << 21
  3522  	carry[7] = s7 >> 21
  3523  	s8 += carry[7]
  3524  	s7 -= carry[7] << 21
  3525  	carry[8] = s8 >> 21
  3526  	s9 += carry[8]
  3527  	s8 -= carry[8] << 21
  3528  	carry[9] = s9 >> 21
  3529  	s10 += carry[9]
  3530  	s9 -= carry[9] << 21
  3531  	carry[10] = s10 >> 21
  3532  	s11 += carry[10]
  3533  	s10 -= carry[10] << 21
  3534  	carry[11] = s11 >> 21
  3535  	s12 += carry[11]
  3536  	s11 -= carry[11] << 21
  3537  
  3538  	s0 += s12 * 666643
  3539  	s1 += s12 * 470296
  3540  	s2 += s12 * 654183
  3541  	s3 -= s12 * 997805
  3542  	s4 += s12 * 136657
  3543  	s5 -= s12 * 683901
  3544  
  3545  	carry[0] = s0 >> 21
  3546  	s1 += carry[0]
  3547  	s0 -= carry[0] << 21
  3548  	carry[1] = s1 >> 21
  3549  	s2 += carry[1]
  3550  	s1 -= carry[1] << 21
  3551  	carry[2] = s2 >> 21
  3552  	s3 += carry[2]
  3553  	s2 -= carry[2] << 21
  3554  	carry[3] = s3 >> 21
  3555  	s4 += carry[3]
  3556  	s3 -= carry[3] << 21
  3557  	carry[4] = s4 >> 21
  3558  	s5 += carry[4]
  3559  	s4 -= carry[4] << 21
  3560  	carry[5] = s5 >> 21
  3561  	s6 += carry[5]
  3562  	s5 -= carry[5] << 21
  3563  	carry[6] = s6 >> 21
  3564  	s7 += carry[6]
  3565  	s6 -= carry[6] << 21
  3566  	carry[7] = s7 >> 21
  3567  	s8 += carry[7]
  3568  	s7 -= carry[7] << 21
  3569  	carry[8] = s8 >> 21
  3570  	s9 += carry[8]
  3571  	s8 -= carry[8] << 21
  3572  	carry[9] = s9 >> 21
  3573  	s10 += carry[9]
  3574  	s9 -= carry[9] << 21
  3575  	carry[10] = s10 >> 21
  3576  	s11 += carry[10]
  3577  	s10 -= carry[10] << 21
  3578  
  3579  	s[0] = byte(s0 >> 0)
  3580  	s[1] = byte(s0 >> 8)
  3581  	s[2] = byte((s0 >> 16) | (s1 << 5))
  3582  	s[3] = byte(s1 >> 3)
  3583  	s[4] = byte(s1 >> 11)
  3584  	s[5] = byte((s1 >> 19) | (s2 << 2))
  3585  	s[6] = byte(s2 >> 6)
  3586  	s[7] = byte((s2 >> 14) | (s3 << 7))
  3587  	s[8] = byte(s3 >> 1)
  3588  	s[9] = byte(s3 >> 9)
  3589  	s[10] = byte((s3 >> 17) | (s4 << 4))
  3590  	s[11] = byte(s4 >> 4)
  3591  	s[12] = byte(s4 >> 12)
  3592  	s[13] = byte((s4 >> 20) | (s5 << 1))
  3593  	s[14] = byte(s5 >> 7)
  3594  	s[15] = byte((s5 >> 15) | (s6 << 6))
  3595  	s[16] = byte(s6 >> 2)
  3596  	s[17] = byte(s6 >> 10)
  3597  	s[18] = byte((s6 >> 18) | (s7 << 3))
  3598  	s[19] = byte(s7 >> 5)
  3599  	s[20] = byte(s7 >> 13)
  3600  	s[21] = byte(s8 >> 0)
  3601  	s[22] = byte(s8 >> 8)
  3602  	s[23] = byte((s8 >> 16) | (s9 << 5))
  3603  	s[24] = byte(s9 >> 3)
  3604  	s[25] = byte(s9 >> 11)
  3605  	s[26] = byte((s9 >> 19) | (s10 << 2))
  3606  	s[27] = byte(s10 >> 6)
  3607  	s[28] = byte((s10 >> 14) | (s11 << 7))
  3608  	s[29] = byte(s11 >> 1)
  3609  	s[30] = byte(s11 >> 9)
  3610  	s[31] = byte(s11 >> 17)
  3611  }