github.com/consensys/gnark-crypto@v0.14.0/ecc/bw6-633/fp/element_ops_purego.go (about)

     1  //go:build !amd64 || purego
     2  // +build !amd64 purego
     3  
     4  // Copyright 2020 ConsenSys Software Inc.
     5  //
     6  // Licensed under the Apache License, Version 2.0 (the "License");
     7  // you may not use this file except in compliance with the License.
     8  // You may obtain a copy of the License at
     9  //
    10  //     http://www.apache.org/licenses/LICENSE-2.0
    11  //
    12  // Unless required by applicable law or agreed to in writing, software
    13  // distributed under the License is distributed on an "AS IS" BASIS,
    14  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  // See the License for the specific language governing permissions and
    16  // limitations under the License.
    17  
    18  // Code generated by consensys/gnark-crypto DO NOT EDIT
    19  
    20  package fp
    21  
    22  import "math/bits"
    23  
    24  // MulBy3 x *= 3 (mod q)
    25  func MulBy3(x *Element) {
    26  	_x := *x
    27  	x.Double(x).Add(x, &_x)
    28  }
    29  
    30  // MulBy5 x *= 5 (mod q)
    31  func MulBy5(x *Element) {
    32  	_x := *x
    33  	x.Double(x).Double(x).Add(x, &_x)
    34  }
    35  
    36  // MulBy13 x *= 13 (mod q)
    37  func MulBy13(x *Element) {
    38  	var y = Element{
    39  		4881606927653498122,
    40  		47978232019095094,
    41  		8555661377410121478,
    42  		17849732488791568215,
    43  		5227097555314997552,
    44  		839611732066804726,
    45  		5234648925333584632,
    46  		11936054402769696488,
    47  		1228498468693814883,
    48  		2857848702739380,
    49  	}
    50  	x.Mul(x, &y)
    51  }
    52  
    53  // Butterfly sets
    54  //
    55  //	a = a + b (mod q)
    56  //	b = a - b (mod q)
    57  func Butterfly(a, b *Element) {
    58  	_butterflyGeneric(a, b)
    59  }
    60  
    61  func fromMont(z *Element) {
    62  	_fromMontGeneric(z)
    63  }
    64  
    65  func reduce(z *Element) {
    66  	_reduceGeneric(z)
    67  }
    68  
    69  // Mul z = x * y (mod q)
    70  //
    71  // x and y must be less than q
    72  func (z *Element) Mul(x, y *Element) *Element {
    73  
    74  	// Implements CIOS multiplication -- section 2.3.2 of Tolga Acar's thesis
    75  	// https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf
    76  	//
    77  	// The algorithm:
    78  	//
    79  	// for i=0 to N-1
    80  	// 		C := 0
    81  	// 		for j=0 to N-1
    82  	// 			(C,t[j]) := t[j] + x[j]*y[i] + C
    83  	// 		(t[N+1],t[N]) := t[N] + C
    84  	//
    85  	// 		C := 0
    86  	// 		m := t[0]*q'[0] mod D
    87  	// 		(C,_) := t[0] + m*q[0]
    88  	// 		for j=1 to N-1
    89  	// 			(C,t[j-1]) := t[j] + m*q[j] + C
    90  	//
    91  	// 		(C,t[N-1]) := t[N] + C
    92  	// 		t[N] := t[N+1] + C
    93  	//
    94  	// → N is the number of machine words needed to store the modulus q
    95  	// → D is the word size. For example, on a 64-bit architecture D is 2	64
    96  	// → x[i], y[i], q[i] is the ith word of the numbers x,y,q
    97  	// → q'[0] is the lowest word of the number -q⁻¹ mod r. This quantity is pre-computed, as it does not depend on the inputs.
    98  	// → t is a temporary array of size N+2
    99  	// → C, S are machine words. A pair (C,S) refers to (hi-bits, lo-bits) of a two-word number
   100  	//
   101  	// As described here https://hackmd.io/@gnark/modular_multiplication we can get rid of one carry chain and simplify:
   102  	// (also described in https://eprint.iacr.org/2022/1400.pdf annex)
   103  	//
   104  	// for i=0 to N-1
   105  	// 		(A,t[0]) := t[0] + x[0]*y[i]
   106  	// 		m := t[0]*q'[0] mod W
   107  	// 		C,_ := t[0] + m*q[0]
   108  	// 		for j=1 to N-1
   109  	// 			(A,t[j])  := t[j] + x[j]*y[i] + A
   110  	// 			(C,t[j-1]) := t[j] + m*q[j] + C
   111  	//
   112  	// 		t[N-1] = C + A
   113  	//
   114  	// This optimization saves 5N + 2 additions in the algorithm, and can be used whenever the highest bit
   115  	// of the modulus is zero (and not all of the remaining bits are set).
   116  
   117  	var t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 uint64
   118  	var u0, u1, u2, u3, u4, u5, u6, u7, u8, u9 uint64
   119  	{
   120  		var c0, c1, c2 uint64
   121  		v := x[0]
   122  		u0, t0 = bits.Mul64(v, y[0])
   123  		u1, t1 = bits.Mul64(v, y[1])
   124  		u2, t2 = bits.Mul64(v, y[2])
   125  		u3, t3 = bits.Mul64(v, y[3])
   126  		u4, t4 = bits.Mul64(v, y[4])
   127  		u5, t5 = bits.Mul64(v, y[5])
   128  		u6, t6 = bits.Mul64(v, y[6])
   129  		u7, t7 = bits.Mul64(v, y[7])
   130  		u8, t8 = bits.Mul64(v, y[8])
   131  		u9, t9 = bits.Mul64(v, y[9])
   132  		t1, c0 = bits.Add64(u0, t1, 0)
   133  		t2, c0 = bits.Add64(u1, t2, c0)
   134  		t3, c0 = bits.Add64(u2, t3, c0)
   135  		t4, c0 = bits.Add64(u3, t4, c0)
   136  		t5, c0 = bits.Add64(u4, t5, c0)
   137  		t6, c0 = bits.Add64(u5, t6, c0)
   138  		t7, c0 = bits.Add64(u6, t7, c0)
   139  		t8, c0 = bits.Add64(u7, t8, c0)
   140  		t9, c0 = bits.Add64(u8, t9, c0)
   141  		c2, _ = bits.Add64(u9, 0, c0)
   142  
   143  		m := qInvNeg * t0
   144  
   145  		u0, c1 = bits.Mul64(m, q0)
   146  		_, c0 = bits.Add64(t0, c1, 0)
   147  		u1, c1 = bits.Mul64(m, q1)
   148  		t0, c0 = bits.Add64(t1, c1, c0)
   149  		u2, c1 = bits.Mul64(m, q2)
   150  		t1, c0 = bits.Add64(t2, c1, c0)
   151  		u3, c1 = bits.Mul64(m, q3)
   152  		t2, c0 = bits.Add64(t3, c1, c0)
   153  		u4, c1 = bits.Mul64(m, q4)
   154  		t3, c0 = bits.Add64(t4, c1, c0)
   155  		u5, c1 = bits.Mul64(m, q5)
   156  		t4, c0 = bits.Add64(t5, c1, c0)
   157  		u6, c1 = bits.Mul64(m, q6)
   158  		t5, c0 = bits.Add64(t6, c1, c0)
   159  		u7, c1 = bits.Mul64(m, q7)
   160  		t6, c0 = bits.Add64(t7, c1, c0)
   161  		u8, c1 = bits.Mul64(m, q8)
   162  		t7, c0 = bits.Add64(t8, c1, c0)
   163  		u9, c1 = bits.Mul64(m, q9)
   164  
   165  		t8, c0 = bits.Add64(0, c1, c0)
   166  		u9, _ = bits.Add64(u9, 0, c0)
   167  		t0, c0 = bits.Add64(u0, t0, 0)
   168  		t1, c0 = bits.Add64(u1, t1, c0)
   169  		t2, c0 = bits.Add64(u2, t2, c0)
   170  		t3, c0 = bits.Add64(u3, t3, c0)
   171  		t4, c0 = bits.Add64(u4, t4, c0)
   172  		t5, c0 = bits.Add64(u5, t5, c0)
   173  		t6, c0 = bits.Add64(u6, t6, c0)
   174  		t7, c0 = bits.Add64(u7, t7, c0)
   175  		t8, c0 = bits.Add64(u8, t8, c0)
   176  		c2, _ = bits.Add64(c2, 0, c0)
   177  		t8, c0 = bits.Add64(t9, t8, 0)
   178  		t9, _ = bits.Add64(u9, c2, c0)
   179  
   180  	}
   181  	{
   182  		var c0, c1, c2 uint64
   183  		v := x[1]
   184  		u0, c1 = bits.Mul64(v, y[0])
   185  		t0, c0 = bits.Add64(c1, t0, 0)
   186  		u1, c1 = bits.Mul64(v, y[1])
   187  		t1, c0 = bits.Add64(c1, t1, c0)
   188  		u2, c1 = bits.Mul64(v, y[2])
   189  		t2, c0 = bits.Add64(c1, t2, c0)
   190  		u3, c1 = bits.Mul64(v, y[3])
   191  		t3, c0 = bits.Add64(c1, t3, c0)
   192  		u4, c1 = bits.Mul64(v, y[4])
   193  		t4, c0 = bits.Add64(c1, t4, c0)
   194  		u5, c1 = bits.Mul64(v, y[5])
   195  		t5, c0 = bits.Add64(c1, t5, c0)
   196  		u6, c1 = bits.Mul64(v, y[6])
   197  		t6, c0 = bits.Add64(c1, t6, c0)
   198  		u7, c1 = bits.Mul64(v, y[7])
   199  		t7, c0 = bits.Add64(c1, t7, c0)
   200  		u8, c1 = bits.Mul64(v, y[8])
   201  		t8, c0 = bits.Add64(c1, t8, c0)
   202  		u9, c1 = bits.Mul64(v, y[9])
   203  		t9, c0 = bits.Add64(c1, t9, c0)
   204  
   205  		c2, _ = bits.Add64(0, 0, c0)
   206  		t1, c0 = bits.Add64(u0, t1, 0)
   207  		t2, c0 = bits.Add64(u1, t2, c0)
   208  		t3, c0 = bits.Add64(u2, t3, c0)
   209  		t4, c0 = bits.Add64(u3, t4, c0)
   210  		t5, c0 = bits.Add64(u4, t5, c0)
   211  		t6, c0 = bits.Add64(u5, t6, c0)
   212  		t7, c0 = bits.Add64(u6, t7, c0)
   213  		t8, c0 = bits.Add64(u7, t8, c0)
   214  		t9, c0 = bits.Add64(u8, t9, c0)
   215  		c2, _ = bits.Add64(u9, c2, c0)
   216  
   217  		m := qInvNeg * t0
   218  
   219  		u0, c1 = bits.Mul64(m, q0)
   220  		_, c0 = bits.Add64(t0, c1, 0)
   221  		u1, c1 = bits.Mul64(m, q1)
   222  		t0, c0 = bits.Add64(t1, c1, c0)
   223  		u2, c1 = bits.Mul64(m, q2)
   224  		t1, c0 = bits.Add64(t2, c1, c0)
   225  		u3, c1 = bits.Mul64(m, q3)
   226  		t2, c0 = bits.Add64(t3, c1, c0)
   227  		u4, c1 = bits.Mul64(m, q4)
   228  		t3, c0 = bits.Add64(t4, c1, c0)
   229  		u5, c1 = bits.Mul64(m, q5)
   230  		t4, c0 = bits.Add64(t5, c1, c0)
   231  		u6, c1 = bits.Mul64(m, q6)
   232  		t5, c0 = bits.Add64(t6, c1, c0)
   233  		u7, c1 = bits.Mul64(m, q7)
   234  		t6, c0 = bits.Add64(t7, c1, c0)
   235  		u8, c1 = bits.Mul64(m, q8)
   236  		t7, c0 = bits.Add64(t8, c1, c0)
   237  		u9, c1 = bits.Mul64(m, q9)
   238  
   239  		t8, c0 = bits.Add64(0, c1, c0)
   240  		u9, _ = bits.Add64(u9, 0, c0)
   241  		t0, c0 = bits.Add64(u0, t0, 0)
   242  		t1, c0 = bits.Add64(u1, t1, c0)
   243  		t2, c0 = bits.Add64(u2, t2, c0)
   244  		t3, c0 = bits.Add64(u3, t3, c0)
   245  		t4, c0 = bits.Add64(u4, t4, c0)
   246  		t5, c0 = bits.Add64(u5, t5, c0)
   247  		t6, c0 = bits.Add64(u6, t6, c0)
   248  		t7, c0 = bits.Add64(u7, t7, c0)
   249  		t8, c0 = bits.Add64(u8, t8, c0)
   250  		c2, _ = bits.Add64(c2, 0, c0)
   251  		t8, c0 = bits.Add64(t9, t8, 0)
   252  		t9, _ = bits.Add64(u9, c2, c0)
   253  
   254  	}
   255  	{
   256  		var c0, c1, c2 uint64
   257  		v := x[2]
   258  		u0, c1 = bits.Mul64(v, y[0])
   259  		t0, c0 = bits.Add64(c1, t0, 0)
   260  		u1, c1 = bits.Mul64(v, y[1])
   261  		t1, c0 = bits.Add64(c1, t1, c0)
   262  		u2, c1 = bits.Mul64(v, y[2])
   263  		t2, c0 = bits.Add64(c1, t2, c0)
   264  		u3, c1 = bits.Mul64(v, y[3])
   265  		t3, c0 = bits.Add64(c1, t3, c0)
   266  		u4, c1 = bits.Mul64(v, y[4])
   267  		t4, c0 = bits.Add64(c1, t4, c0)
   268  		u5, c1 = bits.Mul64(v, y[5])
   269  		t5, c0 = bits.Add64(c1, t5, c0)
   270  		u6, c1 = bits.Mul64(v, y[6])
   271  		t6, c0 = bits.Add64(c1, t6, c0)
   272  		u7, c1 = bits.Mul64(v, y[7])
   273  		t7, c0 = bits.Add64(c1, t7, c0)
   274  		u8, c1 = bits.Mul64(v, y[8])
   275  		t8, c0 = bits.Add64(c1, t8, c0)
   276  		u9, c1 = bits.Mul64(v, y[9])
   277  		t9, c0 = bits.Add64(c1, t9, c0)
   278  
   279  		c2, _ = bits.Add64(0, 0, c0)
   280  		t1, c0 = bits.Add64(u0, t1, 0)
   281  		t2, c0 = bits.Add64(u1, t2, c0)
   282  		t3, c0 = bits.Add64(u2, t3, c0)
   283  		t4, c0 = bits.Add64(u3, t4, c0)
   284  		t5, c0 = bits.Add64(u4, t5, c0)
   285  		t6, c0 = bits.Add64(u5, t6, c0)
   286  		t7, c0 = bits.Add64(u6, t7, c0)
   287  		t8, c0 = bits.Add64(u7, t8, c0)
   288  		t9, c0 = bits.Add64(u8, t9, c0)
   289  		c2, _ = bits.Add64(u9, c2, c0)
   290  
   291  		m := qInvNeg * t0
   292  
   293  		u0, c1 = bits.Mul64(m, q0)
   294  		_, c0 = bits.Add64(t0, c1, 0)
   295  		u1, c1 = bits.Mul64(m, q1)
   296  		t0, c0 = bits.Add64(t1, c1, c0)
   297  		u2, c1 = bits.Mul64(m, q2)
   298  		t1, c0 = bits.Add64(t2, c1, c0)
   299  		u3, c1 = bits.Mul64(m, q3)
   300  		t2, c0 = bits.Add64(t3, c1, c0)
   301  		u4, c1 = bits.Mul64(m, q4)
   302  		t3, c0 = bits.Add64(t4, c1, c0)
   303  		u5, c1 = bits.Mul64(m, q5)
   304  		t4, c0 = bits.Add64(t5, c1, c0)
   305  		u6, c1 = bits.Mul64(m, q6)
   306  		t5, c0 = bits.Add64(t6, c1, c0)
   307  		u7, c1 = bits.Mul64(m, q7)
   308  		t6, c0 = bits.Add64(t7, c1, c0)
   309  		u8, c1 = bits.Mul64(m, q8)
   310  		t7, c0 = bits.Add64(t8, c1, c0)
   311  		u9, c1 = bits.Mul64(m, q9)
   312  
   313  		t8, c0 = bits.Add64(0, c1, c0)
   314  		u9, _ = bits.Add64(u9, 0, c0)
   315  		t0, c0 = bits.Add64(u0, t0, 0)
   316  		t1, c0 = bits.Add64(u1, t1, c0)
   317  		t2, c0 = bits.Add64(u2, t2, c0)
   318  		t3, c0 = bits.Add64(u3, t3, c0)
   319  		t4, c0 = bits.Add64(u4, t4, c0)
   320  		t5, c0 = bits.Add64(u5, t5, c0)
   321  		t6, c0 = bits.Add64(u6, t6, c0)
   322  		t7, c0 = bits.Add64(u7, t7, c0)
   323  		t8, c0 = bits.Add64(u8, t8, c0)
   324  		c2, _ = bits.Add64(c2, 0, c0)
   325  		t8, c0 = bits.Add64(t9, t8, 0)
   326  		t9, _ = bits.Add64(u9, c2, c0)
   327  
   328  	}
   329  	{
   330  		var c0, c1, c2 uint64
   331  		v := x[3]
   332  		u0, c1 = bits.Mul64(v, y[0])
   333  		t0, c0 = bits.Add64(c1, t0, 0)
   334  		u1, c1 = bits.Mul64(v, y[1])
   335  		t1, c0 = bits.Add64(c1, t1, c0)
   336  		u2, c1 = bits.Mul64(v, y[2])
   337  		t2, c0 = bits.Add64(c1, t2, c0)
   338  		u3, c1 = bits.Mul64(v, y[3])
   339  		t3, c0 = bits.Add64(c1, t3, c0)
   340  		u4, c1 = bits.Mul64(v, y[4])
   341  		t4, c0 = bits.Add64(c1, t4, c0)
   342  		u5, c1 = bits.Mul64(v, y[5])
   343  		t5, c0 = bits.Add64(c1, t5, c0)
   344  		u6, c1 = bits.Mul64(v, y[6])
   345  		t6, c0 = bits.Add64(c1, t6, c0)
   346  		u7, c1 = bits.Mul64(v, y[7])
   347  		t7, c0 = bits.Add64(c1, t7, c0)
   348  		u8, c1 = bits.Mul64(v, y[8])
   349  		t8, c0 = bits.Add64(c1, t8, c0)
   350  		u9, c1 = bits.Mul64(v, y[9])
   351  		t9, c0 = bits.Add64(c1, t9, c0)
   352  
   353  		c2, _ = bits.Add64(0, 0, c0)
   354  		t1, c0 = bits.Add64(u0, t1, 0)
   355  		t2, c0 = bits.Add64(u1, t2, c0)
   356  		t3, c0 = bits.Add64(u2, t3, c0)
   357  		t4, c0 = bits.Add64(u3, t4, c0)
   358  		t5, c0 = bits.Add64(u4, t5, c0)
   359  		t6, c0 = bits.Add64(u5, t6, c0)
   360  		t7, c0 = bits.Add64(u6, t7, c0)
   361  		t8, c0 = bits.Add64(u7, t8, c0)
   362  		t9, c0 = bits.Add64(u8, t9, c0)
   363  		c2, _ = bits.Add64(u9, c2, c0)
   364  
   365  		m := qInvNeg * t0
   366  
   367  		u0, c1 = bits.Mul64(m, q0)
   368  		_, c0 = bits.Add64(t0, c1, 0)
   369  		u1, c1 = bits.Mul64(m, q1)
   370  		t0, c0 = bits.Add64(t1, c1, c0)
   371  		u2, c1 = bits.Mul64(m, q2)
   372  		t1, c0 = bits.Add64(t2, c1, c0)
   373  		u3, c1 = bits.Mul64(m, q3)
   374  		t2, c0 = bits.Add64(t3, c1, c0)
   375  		u4, c1 = bits.Mul64(m, q4)
   376  		t3, c0 = bits.Add64(t4, c1, c0)
   377  		u5, c1 = bits.Mul64(m, q5)
   378  		t4, c0 = bits.Add64(t5, c1, c0)
   379  		u6, c1 = bits.Mul64(m, q6)
   380  		t5, c0 = bits.Add64(t6, c1, c0)
   381  		u7, c1 = bits.Mul64(m, q7)
   382  		t6, c0 = bits.Add64(t7, c1, c0)
   383  		u8, c1 = bits.Mul64(m, q8)
   384  		t7, c0 = bits.Add64(t8, c1, c0)
   385  		u9, c1 = bits.Mul64(m, q9)
   386  
   387  		t8, c0 = bits.Add64(0, c1, c0)
   388  		u9, _ = bits.Add64(u9, 0, c0)
   389  		t0, c0 = bits.Add64(u0, t0, 0)
   390  		t1, c0 = bits.Add64(u1, t1, c0)
   391  		t2, c0 = bits.Add64(u2, t2, c0)
   392  		t3, c0 = bits.Add64(u3, t3, c0)
   393  		t4, c0 = bits.Add64(u4, t4, c0)
   394  		t5, c0 = bits.Add64(u5, t5, c0)
   395  		t6, c0 = bits.Add64(u6, t6, c0)
   396  		t7, c0 = bits.Add64(u7, t7, c0)
   397  		t8, c0 = bits.Add64(u8, t8, c0)
   398  		c2, _ = bits.Add64(c2, 0, c0)
   399  		t8, c0 = bits.Add64(t9, t8, 0)
   400  		t9, _ = bits.Add64(u9, c2, c0)
   401  
   402  	}
   403  	{
   404  		var c0, c1, c2 uint64
   405  		v := x[4]
   406  		u0, c1 = bits.Mul64(v, y[0])
   407  		t0, c0 = bits.Add64(c1, t0, 0)
   408  		u1, c1 = bits.Mul64(v, y[1])
   409  		t1, c0 = bits.Add64(c1, t1, c0)
   410  		u2, c1 = bits.Mul64(v, y[2])
   411  		t2, c0 = bits.Add64(c1, t2, c0)
   412  		u3, c1 = bits.Mul64(v, y[3])
   413  		t3, c0 = bits.Add64(c1, t3, c0)
   414  		u4, c1 = bits.Mul64(v, y[4])
   415  		t4, c0 = bits.Add64(c1, t4, c0)
   416  		u5, c1 = bits.Mul64(v, y[5])
   417  		t5, c0 = bits.Add64(c1, t5, c0)
   418  		u6, c1 = bits.Mul64(v, y[6])
   419  		t6, c0 = bits.Add64(c1, t6, c0)
   420  		u7, c1 = bits.Mul64(v, y[7])
   421  		t7, c0 = bits.Add64(c1, t7, c0)
   422  		u8, c1 = bits.Mul64(v, y[8])
   423  		t8, c0 = bits.Add64(c1, t8, c0)
   424  		u9, c1 = bits.Mul64(v, y[9])
   425  		t9, c0 = bits.Add64(c1, t9, c0)
   426  
   427  		c2, _ = bits.Add64(0, 0, c0)
   428  		t1, c0 = bits.Add64(u0, t1, 0)
   429  		t2, c0 = bits.Add64(u1, t2, c0)
   430  		t3, c0 = bits.Add64(u2, t3, c0)
   431  		t4, c0 = bits.Add64(u3, t4, c0)
   432  		t5, c0 = bits.Add64(u4, t5, c0)
   433  		t6, c0 = bits.Add64(u5, t6, c0)
   434  		t7, c0 = bits.Add64(u6, t7, c0)
   435  		t8, c0 = bits.Add64(u7, t8, c0)
   436  		t9, c0 = bits.Add64(u8, t9, c0)
   437  		c2, _ = bits.Add64(u9, c2, c0)
   438  
   439  		m := qInvNeg * t0
   440  
   441  		u0, c1 = bits.Mul64(m, q0)
   442  		_, c0 = bits.Add64(t0, c1, 0)
   443  		u1, c1 = bits.Mul64(m, q1)
   444  		t0, c0 = bits.Add64(t1, c1, c0)
   445  		u2, c1 = bits.Mul64(m, q2)
   446  		t1, c0 = bits.Add64(t2, c1, c0)
   447  		u3, c1 = bits.Mul64(m, q3)
   448  		t2, c0 = bits.Add64(t3, c1, c0)
   449  		u4, c1 = bits.Mul64(m, q4)
   450  		t3, c0 = bits.Add64(t4, c1, c0)
   451  		u5, c1 = bits.Mul64(m, q5)
   452  		t4, c0 = bits.Add64(t5, c1, c0)
   453  		u6, c1 = bits.Mul64(m, q6)
   454  		t5, c0 = bits.Add64(t6, c1, c0)
   455  		u7, c1 = bits.Mul64(m, q7)
   456  		t6, c0 = bits.Add64(t7, c1, c0)
   457  		u8, c1 = bits.Mul64(m, q8)
   458  		t7, c0 = bits.Add64(t8, c1, c0)
   459  		u9, c1 = bits.Mul64(m, q9)
   460  
   461  		t8, c0 = bits.Add64(0, c1, c0)
   462  		u9, _ = bits.Add64(u9, 0, c0)
   463  		t0, c0 = bits.Add64(u0, t0, 0)
   464  		t1, c0 = bits.Add64(u1, t1, c0)
   465  		t2, c0 = bits.Add64(u2, t2, c0)
   466  		t3, c0 = bits.Add64(u3, t3, c0)
   467  		t4, c0 = bits.Add64(u4, t4, c0)
   468  		t5, c0 = bits.Add64(u5, t5, c0)
   469  		t6, c0 = bits.Add64(u6, t6, c0)
   470  		t7, c0 = bits.Add64(u7, t7, c0)
   471  		t8, c0 = bits.Add64(u8, t8, c0)
   472  		c2, _ = bits.Add64(c2, 0, c0)
   473  		t8, c0 = bits.Add64(t9, t8, 0)
   474  		t9, _ = bits.Add64(u9, c2, c0)
   475  
   476  	}
   477  	{
   478  		var c0, c1, c2 uint64
   479  		v := x[5]
   480  		u0, c1 = bits.Mul64(v, y[0])
   481  		t0, c0 = bits.Add64(c1, t0, 0)
   482  		u1, c1 = bits.Mul64(v, y[1])
   483  		t1, c0 = bits.Add64(c1, t1, c0)
   484  		u2, c1 = bits.Mul64(v, y[2])
   485  		t2, c0 = bits.Add64(c1, t2, c0)
   486  		u3, c1 = bits.Mul64(v, y[3])
   487  		t3, c0 = bits.Add64(c1, t3, c0)
   488  		u4, c1 = bits.Mul64(v, y[4])
   489  		t4, c0 = bits.Add64(c1, t4, c0)
   490  		u5, c1 = bits.Mul64(v, y[5])
   491  		t5, c0 = bits.Add64(c1, t5, c0)
   492  		u6, c1 = bits.Mul64(v, y[6])
   493  		t6, c0 = bits.Add64(c1, t6, c0)
   494  		u7, c1 = bits.Mul64(v, y[7])
   495  		t7, c0 = bits.Add64(c1, t7, c0)
   496  		u8, c1 = bits.Mul64(v, y[8])
   497  		t8, c0 = bits.Add64(c1, t8, c0)
   498  		u9, c1 = bits.Mul64(v, y[9])
   499  		t9, c0 = bits.Add64(c1, t9, c0)
   500  
   501  		c2, _ = bits.Add64(0, 0, c0)
   502  		t1, c0 = bits.Add64(u0, t1, 0)
   503  		t2, c0 = bits.Add64(u1, t2, c0)
   504  		t3, c0 = bits.Add64(u2, t3, c0)
   505  		t4, c0 = bits.Add64(u3, t4, c0)
   506  		t5, c0 = bits.Add64(u4, t5, c0)
   507  		t6, c0 = bits.Add64(u5, t6, c0)
   508  		t7, c0 = bits.Add64(u6, t7, c0)
   509  		t8, c0 = bits.Add64(u7, t8, c0)
   510  		t9, c0 = bits.Add64(u8, t9, c0)
   511  		c2, _ = bits.Add64(u9, c2, c0)
   512  
   513  		m := qInvNeg * t0
   514  
   515  		u0, c1 = bits.Mul64(m, q0)
   516  		_, c0 = bits.Add64(t0, c1, 0)
   517  		u1, c1 = bits.Mul64(m, q1)
   518  		t0, c0 = bits.Add64(t1, c1, c0)
   519  		u2, c1 = bits.Mul64(m, q2)
   520  		t1, c0 = bits.Add64(t2, c1, c0)
   521  		u3, c1 = bits.Mul64(m, q3)
   522  		t2, c0 = bits.Add64(t3, c1, c0)
   523  		u4, c1 = bits.Mul64(m, q4)
   524  		t3, c0 = bits.Add64(t4, c1, c0)
   525  		u5, c1 = bits.Mul64(m, q5)
   526  		t4, c0 = bits.Add64(t5, c1, c0)
   527  		u6, c1 = bits.Mul64(m, q6)
   528  		t5, c0 = bits.Add64(t6, c1, c0)
   529  		u7, c1 = bits.Mul64(m, q7)
   530  		t6, c0 = bits.Add64(t7, c1, c0)
   531  		u8, c1 = bits.Mul64(m, q8)
   532  		t7, c0 = bits.Add64(t8, c1, c0)
   533  		u9, c1 = bits.Mul64(m, q9)
   534  
   535  		t8, c0 = bits.Add64(0, c1, c0)
   536  		u9, _ = bits.Add64(u9, 0, c0)
   537  		t0, c0 = bits.Add64(u0, t0, 0)
   538  		t1, c0 = bits.Add64(u1, t1, c0)
   539  		t2, c0 = bits.Add64(u2, t2, c0)
   540  		t3, c0 = bits.Add64(u3, t3, c0)
   541  		t4, c0 = bits.Add64(u4, t4, c0)
   542  		t5, c0 = bits.Add64(u5, t5, c0)
   543  		t6, c0 = bits.Add64(u6, t6, c0)
   544  		t7, c0 = bits.Add64(u7, t7, c0)
   545  		t8, c0 = bits.Add64(u8, t8, c0)
   546  		c2, _ = bits.Add64(c2, 0, c0)
   547  		t8, c0 = bits.Add64(t9, t8, 0)
   548  		t9, _ = bits.Add64(u9, c2, c0)
   549  
   550  	}
   551  	{
   552  		var c0, c1, c2 uint64
   553  		v := x[6]
   554  		u0, c1 = bits.Mul64(v, y[0])
   555  		t0, c0 = bits.Add64(c1, t0, 0)
   556  		u1, c1 = bits.Mul64(v, y[1])
   557  		t1, c0 = bits.Add64(c1, t1, c0)
   558  		u2, c1 = bits.Mul64(v, y[2])
   559  		t2, c0 = bits.Add64(c1, t2, c0)
   560  		u3, c1 = bits.Mul64(v, y[3])
   561  		t3, c0 = bits.Add64(c1, t3, c0)
   562  		u4, c1 = bits.Mul64(v, y[4])
   563  		t4, c0 = bits.Add64(c1, t4, c0)
   564  		u5, c1 = bits.Mul64(v, y[5])
   565  		t5, c0 = bits.Add64(c1, t5, c0)
   566  		u6, c1 = bits.Mul64(v, y[6])
   567  		t6, c0 = bits.Add64(c1, t6, c0)
   568  		u7, c1 = bits.Mul64(v, y[7])
   569  		t7, c0 = bits.Add64(c1, t7, c0)
   570  		u8, c1 = bits.Mul64(v, y[8])
   571  		t8, c0 = bits.Add64(c1, t8, c0)
   572  		u9, c1 = bits.Mul64(v, y[9])
   573  		t9, c0 = bits.Add64(c1, t9, c0)
   574  
   575  		c2, _ = bits.Add64(0, 0, c0)
   576  		t1, c0 = bits.Add64(u0, t1, 0)
   577  		t2, c0 = bits.Add64(u1, t2, c0)
   578  		t3, c0 = bits.Add64(u2, t3, c0)
   579  		t4, c0 = bits.Add64(u3, t4, c0)
   580  		t5, c0 = bits.Add64(u4, t5, c0)
   581  		t6, c0 = bits.Add64(u5, t6, c0)
   582  		t7, c0 = bits.Add64(u6, t7, c0)
   583  		t8, c0 = bits.Add64(u7, t8, c0)
   584  		t9, c0 = bits.Add64(u8, t9, c0)
   585  		c2, _ = bits.Add64(u9, c2, c0)
   586  
   587  		m := qInvNeg * t0
   588  
   589  		u0, c1 = bits.Mul64(m, q0)
   590  		_, c0 = bits.Add64(t0, c1, 0)
   591  		u1, c1 = bits.Mul64(m, q1)
   592  		t0, c0 = bits.Add64(t1, c1, c0)
   593  		u2, c1 = bits.Mul64(m, q2)
   594  		t1, c0 = bits.Add64(t2, c1, c0)
   595  		u3, c1 = bits.Mul64(m, q3)
   596  		t2, c0 = bits.Add64(t3, c1, c0)
   597  		u4, c1 = bits.Mul64(m, q4)
   598  		t3, c0 = bits.Add64(t4, c1, c0)
   599  		u5, c1 = bits.Mul64(m, q5)
   600  		t4, c0 = bits.Add64(t5, c1, c0)
   601  		u6, c1 = bits.Mul64(m, q6)
   602  		t5, c0 = bits.Add64(t6, c1, c0)
   603  		u7, c1 = bits.Mul64(m, q7)
   604  		t6, c0 = bits.Add64(t7, c1, c0)
   605  		u8, c1 = bits.Mul64(m, q8)
   606  		t7, c0 = bits.Add64(t8, c1, c0)
   607  		u9, c1 = bits.Mul64(m, q9)
   608  
   609  		t8, c0 = bits.Add64(0, c1, c0)
   610  		u9, _ = bits.Add64(u9, 0, c0)
   611  		t0, c0 = bits.Add64(u0, t0, 0)
   612  		t1, c0 = bits.Add64(u1, t1, c0)
   613  		t2, c0 = bits.Add64(u2, t2, c0)
   614  		t3, c0 = bits.Add64(u3, t3, c0)
   615  		t4, c0 = bits.Add64(u4, t4, c0)
   616  		t5, c0 = bits.Add64(u5, t5, c0)
   617  		t6, c0 = bits.Add64(u6, t6, c0)
   618  		t7, c0 = bits.Add64(u7, t7, c0)
   619  		t8, c0 = bits.Add64(u8, t8, c0)
   620  		c2, _ = bits.Add64(c2, 0, c0)
   621  		t8, c0 = bits.Add64(t9, t8, 0)
   622  		t9, _ = bits.Add64(u9, c2, c0)
   623  
   624  	}
   625  	{
   626  		var c0, c1, c2 uint64
   627  		v := x[7]
   628  		u0, c1 = bits.Mul64(v, y[0])
   629  		t0, c0 = bits.Add64(c1, t0, 0)
   630  		u1, c1 = bits.Mul64(v, y[1])
   631  		t1, c0 = bits.Add64(c1, t1, c0)
   632  		u2, c1 = bits.Mul64(v, y[2])
   633  		t2, c0 = bits.Add64(c1, t2, c0)
   634  		u3, c1 = bits.Mul64(v, y[3])
   635  		t3, c0 = bits.Add64(c1, t3, c0)
   636  		u4, c1 = bits.Mul64(v, y[4])
   637  		t4, c0 = bits.Add64(c1, t4, c0)
   638  		u5, c1 = bits.Mul64(v, y[5])
   639  		t5, c0 = bits.Add64(c1, t5, c0)
   640  		u6, c1 = bits.Mul64(v, y[6])
   641  		t6, c0 = bits.Add64(c1, t6, c0)
   642  		u7, c1 = bits.Mul64(v, y[7])
   643  		t7, c0 = bits.Add64(c1, t7, c0)
   644  		u8, c1 = bits.Mul64(v, y[8])
   645  		t8, c0 = bits.Add64(c1, t8, c0)
   646  		u9, c1 = bits.Mul64(v, y[9])
   647  		t9, c0 = bits.Add64(c1, t9, c0)
   648  
   649  		c2, _ = bits.Add64(0, 0, c0)
   650  		t1, c0 = bits.Add64(u0, t1, 0)
   651  		t2, c0 = bits.Add64(u1, t2, c0)
   652  		t3, c0 = bits.Add64(u2, t3, c0)
   653  		t4, c0 = bits.Add64(u3, t4, c0)
   654  		t5, c0 = bits.Add64(u4, t5, c0)
   655  		t6, c0 = bits.Add64(u5, t6, c0)
   656  		t7, c0 = bits.Add64(u6, t7, c0)
   657  		t8, c0 = bits.Add64(u7, t8, c0)
   658  		t9, c0 = bits.Add64(u8, t9, c0)
   659  		c2, _ = bits.Add64(u9, c2, c0)
   660  
   661  		m := qInvNeg * t0
   662  
   663  		u0, c1 = bits.Mul64(m, q0)
   664  		_, c0 = bits.Add64(t0, c1, 0)
   665  		u1, c1 = bits.Mul64(m, q1)
   666  		t0, c0 = bits.Add64(t1, c1, c0)
   667  		u2, c1 = bits.Mul64(m, q2)
   668  		t1, c0 = bits.Add64(t2, c1, c0)
   669  		u3, c1 = bits.Mul64(m, q3)
   670  		t2, c0 = bits.Add64(t3, c1, c0)
   671  		u4, c1 = bits.Mul64(m, q4)
   672  		t3, c0 = bits.Add64(t4, c1, c0)
   673  		u5, c1 = bits.Mul64(m, q5)
   674  		t4, c0 = bits.Add64(t5, c1, c0)
   675  		u6, c1 = bits.Mul64(m, q6)
   676  		t5, c0 = bits.Add64(t6, c1, c0)
   677  		u7, c1 = bits.Mul64(m, q7)
   678  		t6, c0 = bits.Add64(t7, c1, c0)
   679  		u8, c1 = bits.Mul64(m, q8)
   680  		t7, c0 = bits.Add64(t8, c1, c0)
   681  		u9, c1 = bits.Mul64(m, q9)
   682  
   683  		t8, c0 = bits.Add64(0, c1, c0)
   684  		u9, _ = bits.Add64(u9, 0, c0)
   685  		t0, c0 = bits.Add64(u0, t0, 0)
   686  		t1, c0 = bits.Add64(u1, t1, c0)
   687  		t2, c0 = bits.Add64(u2, t2, c0)
   688  		t3, c0 = bits.Add64(u3, t3, c0)
   689  		t4, c0 = bits.Add64(u4, t4, c0)
   690  		t5, c0 = bits.Add64(u5, t5, c0)
   691  		t6, c0 = bits.Add64(u6, t6, c0)
   692  		t7, c0 = bits.Add64(u7, t7, c0)
   693  		t8, c0 = bits.Add64(u8, t8, c0)
   694  		c2, _ = bits.Add64(c2, 0, c0)
   695  		t8, c0 = bits.Add64(t9, t8, 0)
   696  		t9, _ = bits.Add64(u9, c2, c0)
   697  
   698  	}
   699  	{
   700  		var c0, c1, c2 uint64
   701  		v := x[8]
   702  		u0, c1 = bits.Mul64(v, y[0])
   703  		t0, c0 = bits.Add64(c1, t0, 0)
   704  		u1, c1 = bits.Mul64(v, y[1])
   705  		t1, c0 = bits.Add64(c1, t1, c0)
   706  		u2, c1 = bits.Mul64(v, y[2])
   707  		t2, c0 = bits.Add64(c1, t2, c0)
   708  		u3, c1 = bits.Mul64(v, y[3])
   709  		t3, c0 = bits.Add64(c1, t3, c0)
   710  		u4, c1 = bits.Mul64(v, y[4])
   711  		t4, c0 = bits.Add64(c1, t4, c0)
   712  		u5, c1 = bits.Mul64(v, y[5])
   713  		t5, c0 = bits.Add64(c1, t5, c0)
   714  		u6, c1 = bits.Mul64(v, y[6])
   715  		t6, c0 = bits.Add64(c1, t6, c0)
   716  		u7, c1 = bits.Mul64(v, y[7])
   717  		t7, c0 = bits.Add64(c1, t7, c0)
   718  		u8, c1 = bits.Mul64(v, y[8])
   719  		t8, c0 = bits.Add64(c1, t8, c0)
   720  		u9, c1 = bits.Mul64(v, y[9])
   721  		t9, c0 = bits.Add64(c1, t9, c0)
   722  
   723  		c2, _ = bits.Add64(0, 0, c0)
   724  		t1, c0 = bits.Add64(u0, t1, 0)
   725  		t2, c0 = bits.Add64(u1, t2, c0)
   726  		t3, c0 = bits.Add64(u2, t3, c0)
   727  		t4, c0 = bits.Add64(u3, t4, c0)
   728  		t5, c0 = bits.Add64(u4, t5, c0)
   729  		t6, c0 = bits.Add64(u5, t6, c0)
   730  		t7, c0 = bits.Add64(u6, t7, c0)
   731  		t8, c0 = bits.Add64(u7, t8, c0)
   732  		t9, c0 = bits.Add64(u8, t9, c0)
   733  		c2, _ = bits.Add64(u9, c2, c0)
   734  
   735  		m := qInvNeg * t0
   736  
   737  		u0, c1 = bits.Mul64(m, q0)
   738  		_, c0 = bits.Add64(t0, c1, 0)
   739  		u1, c1 = bits.Mul64(m, q1)
   740  		t0, c0 = bits.Add64(t1, c1, c0)
   741  		u2, c1 = bits.Mul64(m, q2)
   742  		t1, c0 = bits.Add64(t2, c1, c0)
   743  		u3, c1 = bits.Mul64(m, q3)
   744  		t2, c0 = bits.Add64(t3, c1, c0)
   745  		u4, c1 = bits.Mul64(m, q4)
   746  		t3, c0 = bits.Add64(t4, c1, c0)
   747  		u5, c1 = bits.Mul64(m, q5)
   748  		t4, c0 = bits.Add64(t5, c1, c0)
   749  		u6, c1 = bits.Mul64(m, q6)
   750  		t5, c0 = bits.Add64(t6, c1, c0)
   751  		u7, c1 = bits.Mul64(m, q7)
   752  		t6, c0 = bits.Add64(t7, c1, c0)
   753  		u8, c1 = bits.Mul64(m, q8)
   754  		t7, c0 = bits.Add64(t8, c1, c0)
   755  		u9, c1 = bits.Mul64(m, q9)
   756  
   757  		t8, c0 = bits.Add64(0, c1, c0)
   758  		u9, _ = bits.Add64(u9, 0, c0)
   759  		t0, c0 = bits.Add64(u0, t0, 0)
   760  		t1, c0 = bits.Add64(u1, t1, c0)
   761  		t2, c0 = bits.Add64(u2, t2, c0)
   762  		t3, c0 = bits.Add64(u3, t3, c0)
   763  		t4, c0 = bits.Add64(u4, t4, c0)
   764  		t5, c0 = bits.Add64(u5, t5, c0)
   765  		t6, c0 = bits.Add64(u6, t6, c0)
   766  		t7, c0 = bits.Add64(u7, t7, c0)
   767  		t8, c0 = bits.Add64(u8, t8, c0)
   768  		c2, _ = bits.Add64(c2, 0, c0)
   769  		t8, c0 = bits.Add64(t9, t8, 0)
   770  		t9, _ = bits.Add64(u9, c2, c0)
   771  
   772  	}
   773  	{
   774  		var c0, c1, c2 uint64
   775  		v := x[9]
   776  		u0, c1 = bits.Mul64(v, y[0])
   777  		t0, c0 = bits.Add64(c1, t0, 0)
   778  		u1, c1 = bits.Mul64(v, y[1])
   779  		t1, c0 = bits.Add64(c1, t1, c0)
   780  		u2, c1 = bits.Mul64(v, y[2])
   781  		t2, c0 = bits.Add64(c1, t2, c0)
   782  		u3, c1 = bits.Mul64(v, y[3])
   783  		t3, c0 = bits.Add64(c1, t3, c0)
   784  		u4, c1 = bits.Mul64(v, y[4])
   785  		t4, c0 = bits.Add64(c1, t4, c0)
   786  		u5, c1 = bits.Mul64(v, y[5])
   787  		t5, c0 = bits.Add64(c1, t5, c0)
   788  		u6, c1 = bits.Mul64(v, y[6])
   789  		t6, c0 = bits.Add64(c1, t6, c0)
   790  		u7, c1 = bits.Mul64(v, y[7])
   791  		t7, c0 = bits.Add64(c1, t7, c0)
   792  		u8, c1 = bits.Mul64(v, y[8])
   793  		t8, c0 = bits.Add64(c1, t8, c0)
   794  		u9, c1 = bits.Mul64(v, y[9])
   795  		t9, c0 = bits.Add64(c1, t9, c0)
   796  
   797  		c2, _ = bits.Add64(0, 0, c0)
   798  		t1, c0 = bits.Add64(u0, t1, 0)
   799  		t2, c0 = bits.Add64(u1, t2, c0)
   800  		t3, c0 = bits.Add64(u2, t3, c0)
   801  		t4, c0 = bits.Add64(u3, t4, c0)
   802  		t5, c0 = bits.Add64(u4, t5, c0)
   803  		t6, c0 = bits.Add64(u5, t6, c0)
   804  		t7, c0 = bits.Add64(u6, t7, c0)
   805  		t8, c0 = bits.Add64(u7, t8, c0)
   806  		t9, c0 = bits.Add64(u8, t9, c0)
   807  		c2, _ = bits.Add64(u9, c2, c0)
   808  
   809  		m := qInvNeg * t0
   810  
   811  		u0, c1 = bits.Mul64(m, q0)
   812  		_, c0 = bits.Add64(t0, c1, 0)
   813  		u1, c1 = bits.Mul64(m, q1)
   814  		t0, c0 = bits.Add64(t1, c1, c0)
   815  		u2, c1 = bits.Mul64(m, q2)
   816  		t1, c0 = bits.Add64(t2, c1, c0)
   817  		u3, c1 = bits.Mul64(m, q3)
   818  		t2, c0 = bits.Add64(t3, c1, c0)
   819  		u4, c1 = bits.Mul64(m, q4)
   820  		t3, c0 = bits.Add64(t4, c1, c0)
   821  		u5, c1 = bits.Mul64(m, q5)
   822  		t4, c0 = bits.Add64(t5, c1, c0)
   823  		u6, c1 = bits.Mul64(m, q6)
   824  		t5, c0 = bits.Add64(t6, c1, c0)
   825  		u7, c1 = bits.Mul64(m, q7)
   826  		t6, c0 = bits.Add64(t7, c1, c0)
   827  		u8, c1 = bits.Mul64(m, q8)
   828  		t7, c0 = bits.Add64(t8, c1, c0)
   829  		u9, c1 = bits.Mul64(m, q9)
   830  
   831  		t8, c0 = bits.Add64(0, c1, c0)
   832  		u9, _ = bits.Add64(u9, 0, c0)
   833  		t0, c0 = bits.Add64(u0, t0, 0)
   834  		t1, c0 = bits.Add64(u1, t1, c0)
   835  		t2, c0 = bits.Add64(u2, t2, c0)
   836  		t3, c0 = bits.Add64(u3, t3, c0)
   837  		t4, c0 = bits.Add64(u4, t4, c0)
   838  		t5, c0 = bits.Add64(u5, t5, c0)
   839  		t6, c0 = bits.Add64(u6, t6, c0)
   840  		t7, c0 = bits.Add64(u7, t7, c0)
   841  		t8, c0 = bits.Add64(u8, t8, c0)
   842  		c2, _ = bits.Add64(c2, 0, c0)
   843  		t8, c0 = bits.Add64(t9, t8, 0)
   844  		t9, _ = bits.Add64(u9, c2, c0)
   845  
   846  	}
   847  	z[0] = t0
   848  	z[1] = t1
   849  	z[2] = t2
   850  	z[3] = t3
   851  	z[4] = t4
   852  	z[5] = t5
   853  	z[6] = t6
   854  	z[7] = t7
   855  	z[8] = t8
   856  	z[9] = t9
   857  
   858  	// if z ⩾ q → z -= q
   859  	if !z.smallerThanModulus() {
   860  		var b uint64
   861  		z[0], b = bits.Sub64(z[0], q0, 0)
   862  		z[1], b = bits.Sub64(z[1], q1, b)
   863  		z[2], b = bits.Sub64(z[2], q2, b)
   864  		z[3], b = bits.Sub64(z[3], q3, b)
   865  		z[4], b = bits.Sub64(z[4], q4, b)
   866  		z[5], b = bits.Sub64(z[5], q5, b)
   867  		z[6], b = bits.Sub64(z[6], q6, b)
   868  		z[7], b = bits.Sub64(z[7], q7, b)
   869  		z[8], b = bits.Sub64(z[8], q8, b)
   870  		z[9], _ = bits.Sub64(z[9], q9, b)
   871  	}
   872  	return z
   873  }
   874  
   875  // Square z = x * x (mod q)
   876  //
   877  // x must be less than q
   878  func (z *Element) Square(x *Element) *Element {
   879  	// see Mul for algorithm documentation
   880  
   881  	var t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 uint64
   882  	var u0, u1, u2, u3, u4, u5, u6, u7, u8, u9 uint64
   883  	{
   884  		var c0, c1, c2 uint64
   885  		v := x[0]
   886  		u0, t0 = bits.Mul64(v, x[0])
   887  		u1, t1 = bits.Mul64(v, x[1])
   888  		u2, t2 = bits.Mul64(v, x[2])
   889  		u3, t3 = bits.Mul64(v, x[3])
   890  		u4, t4 = bits.Mul64(v, x[4])
   891  		u5, t5 = bits.Mul64(v, x[5])
   892  		u6, t6 = bits.Mul64(v, x[6])
   893  		u7, t7 = bits.Mul64(v, x[7])
   894  		u8, t8 = bits.Mul64(v, x[8])
   895  		u9, t9 = bits.Mul64(v, x[9])
   896  		t1, c0 = bits.Add64(u0, t1, 0)
   897  		t2, c0 = bits.Add64(u1, t2, c0)
   898  		t3, c0 = bits.Add64(u2, t3, c0)
   899  		t4, c0 = bits.Add64(u3, t4, c0)
   900  		t5, c0 = bits.Add64(u4, t5, c0)
   901  		t6, c0 = bits.Add64(u5, t6, c0)
   902  		t7, c0 = bits.Add64(u6, t7, c0)
   903  		t8, c0 = bits.Add64(u7, t8, c0)
   904  		t9, c0 = bits.Add64(u8, t9, c0)
   905  		c2, _ = bits.Add64(u9, 0, c0)
   906  
   907  		m := qInvNeg * t0
   908  
   909  		u0, c1 = bits.Mul64(m, q0)
   910  		_, c0 = bits.Add64(t0, c1, 0)
   911  		u1, c1 = bits.Mul64(m, q1)
   912  		t0, c0 = bits.Add64(t1, c1, c0)
   913  		u2, c1 = bits.Mul64(m, q2)
   914  		t1, c0 = bits.Add64(t2, c1, c0)
   915  		u3, c1 = bits.Mul64(m, q3)
   916  		t2, c0 = bits.Add64(t3, c1, c0)
   917  		u4, c1 = bits.Mul64(m, q4)
   918  		t3, c0 = bits.Add64(t4, c1, c0)
   919  		u5, c1 = bits.Mul64(m, q5)
   920  		t4, c0 = bits.Add64(t5, c1, c0)
   921  		u6, c1 = bits.Mul64(m, q6)
   922  		t5, c0 = bits.Add64(t6, c1, c0)
   923  		u7, c1 = bits.Mul64(m, q7)
   924  		t6, c0 = bits.Add64(t7, c1, c0)
   925  		u8, c1 = bits.Mul64(m, q8)
   926  		t7, c0 = bits.Add64(t8, c1, c0)
   927  		u9, c1 = bits.Mul64(m, q9)
   928  
   929  		t8, c0 = bits.Add64(0, c1, c0)
   930  		u9, _ = bits.Add64(u9, 0, c0)
   931  		t0, c0 = bits.Add64(u0, t0, 0)
   932  		t1, c0 = bits.Add64(u1, t1, c0)
   933  		t2, c0 = bits.Add64(u2, t2, c0)
   934  		t3, c0 = bits.Add64(u3, t3, c0)
   935  		t4, c0 = bits.Add64(u4, t4, c0)
   936  		t5, c0 = bits.Add64(u5, t5, c0)
   937  		t6, c0 = bits.Add64(u6, t6, c0)
   938  		t7, c0 = bits.Add64(u7, t7, c0)
   939  		t8, c0 = bits.Add64(u8, t8, c0)
   940  		c2, _ = bits.Add64(c2, 0, c0)
   941  		t8, c0 = bits.Add64(t9, t8, 0)
   942  		t9, _ = bits.Add64(u9, c2, c0)
   943  
   944  	}
   945  	{
   946  		var c0, c1, c2 uint64
   947  		v := x[1]
   948  		u0, c1 = bits.Mul64(v, x[0])
   949  		t0, c0 = bits.Add64(c1, t0, 0)
   950  		u1, c1 = bits.Mul64(v, x[1])
   951  		t1, c0 = bits.Add64(c1, t1, c0)
   952  		u2, c1 = bits.Mul64(v, x[2])
   953  		t2, c0 = bits.Add64(c1, t2, c0)
   954  		u3, c1 = bits.Mul64(v, x[3])
   955  		t3, c0 = bits.Add64(c1, t3, c0)
   956  		u4, c1 = bits.Mul64(v, x[4])
   957  		t4, c0 = bits.Add64(c1, t4, c0)
   958  		u5, c1 = bits.Mul64(v, x[5])
   959  		t5, c0 = bits.Add64(c1, t5, c0)
   960  		u6, c1 = bits.Mul64(v, x[6])
   961  		t6, c0 = bits.Add64(c1, t6, c0)
   962  		u7, c1 = bits.Mul64(v, x[7])
   963  		t7, c0 = bits.Add64(c1, t7, c0)
   964  		u8, c1 = bits.Mul64(v, x[8])
   965  		t8, c0 = bits.Add64(c1, t8, c0)
   966  		u9, c1 = bits.Mul64(v, x[9])
   967  		t9, c0 = bits.Add64(c1, t9, c0)
   968  
   969  		c2, _ = bits.Add64(0, 0, c0)
   970  		t1, c0 = bits.Add64(u0, t1, 0)
   971  		t2, c0 = bits.Add64(u1, t2, c0)
   972  		t3, c0 = bits.Add64(u2, t3, c0)
   973  		t4, c0 = bits.Add64(u3, t4, c0)
   974  		t5, c0 = bits.Add64(u4, t5, c0)
   975  		t6, c0 = bits.Add64(u5, t6, c0)
   976  		t7, c0 = bits.Add64(u6, t7, c0)
   977  		t8, c0 = bits.Add64(u7, t8, c0)
   978  		t9, c0 = bits.Add64(u8, t9, c0)
   979  		c2, _ = bits.Add64(u9, c2, c0)
   980  
   981  		m := qInvNeg * t0
   982  
   983  		u0, c1 = bits.Mul64(m, q0)
   984  		_, c0 = bits.Add64(t0, c1, 0)
   985  		u1, c1 = bits.Mul64(m, q1)
   986  		t0, c0 = bits.Add64(t1, c1, c0)
   987  		u2, c1 = bits.Mul64(m, q2)
   988  		t1, c0 = bits.Add64(t2, c1, c0)
   989  		u3, c1 = bits.Mul64(m, q3)
   990  		t2, c0 = bits.Add64(t3, c1, c0)
   991  		u4, c1 = bits.Mul64(m, q4)
   992  		t3, c0 = bits.Add64(t4, c1, c0)
   993  		u5, c1 = bits.Mul64(m, q5)
   994  		t4, c0 = bits.Add64(t5, c1, c0)
   995  		u6, c1 = bits.Mul64(m, q6)
   996  		t5, c0 = bits.Add64(t6, c1, c0)
   997  		u7, c1 = bits.Mul64(m, q7)
   998  		t6, c0 = bits.Add64(t7, c1, c0)
   999  		u8, c1 = bits.Mul64(m, q8)
  1000  		t7, c0 = bits.Add64(t8, c1, c0)
  1001  		u9, c1 = bits.Mul64(m, q9)
  1002  
  1003  		t8, c0 = bits.Add64(0, c1, c0)
  1004  		u9, _ = bits.Add64(u9, 0, c0)
  1005  		t0, c0 = bits.Add64(u0, t0, 0)
  1006  		t1, c0 = bits.Add64(u1, t1, c0)
  1007  		t2, c0 = bits.Add64(u2, t2, c0)
  1008  		t3, c0 = bits.Add64(u3, t3, c0)
  1009  		t4, c0 = bits.Add64(u4, t4, c0)
  1010  		t5, c0 = bits.Add64(u5, t5, c0)
  1011  		t6, c0 = bits.Add64(u6, t6, c0)
  1012  		t7, c0 = bits.Add64(u7, t7, c0)
  1013  		t8, c0 = bits.Add64(u8, t8, c0)
  1014  		c2, _ = bits.Add64(c2, 0, c0)
  1015  		t8, c0 = bits.Add64(t9, t8, 0)
  1016  		t9, _ = bits.Add64(u9, c2, c0)
  1017  
  1018  	}
  1019  	{
  1020  		var c0, c1, c2 uint64
  1021  		v := x[2]
  1022  		u0, c1 = bits.Mul64(v, x[0])
  1023  		t0, c0 = bits.Add64(c1, t0, 0)
  1024  		u1, c1 = bits.Mul64(v, x[1])
  1025  		t1, c0 = bits.Add64(c1, t1, c0)
  1026  		u2, c1 = bits.Mul64(v, x[2])
  1027  		t2, c0 = bits.Add64(c1, t2, c0)
  1028  		u3, c1 = bits.Mul64(v, x[3])
  1029  		t3, c0 = bits.Add64(c1, t3, c0)
  1030  		u4, c1 = bits.Mul64(v, x[4])
  1031  		t4, c0 = bits.Add64(c1, t4, c0)
  1032  		u5, c1 = bits.Mul64(v, x[5])
  1033  		t5, c0 = bits.Add64(c1, t5, c0)
  1034  		u6, c1 = bits.Mul64(v, x[6])
  1035  		t6, c0 = bits.Add64(c1, t6, c0)
  1036  		u7, c1 = bits.Mul64(v, x[7])
  1037  		t7, c0 = bits.Add64(c1, t7, c0)
  1038  		u8, c1 = bits.Mul64(v, x[8])
  1039  		t8, c0 = bits.Add64(c1, t8, c0)
  1040  		u9, c1 = bits.Mul64(v, x[9])
  1041  		t9, c0 = bits.Add64(c1, t9, c0)
  1042  
  1043  		c2, _ = bits.Add64(0, 0, c0)
  1044  		t1, c0 = bits.Add64(u0, t1, 0)
  1045  		t2, c0 = bits.Add64(u1, t2, c0)
  1046  		t3, c0 = bits.Add64(u2, t3, c0)
  1047  		t4, c0 = bits.Add64(u3, t4, c0)
  1048  		t5, c0 = bits.Add64(u4, t5, c0)
  1049  		t6, c0 = bits.Add64(u5, t6, c0)
  1050  		t7, c0 = bits.Add64(u6, t7, c0)
  1051  		t8, c0 = bits.Add64(u7, t8, c0)
  1052  		t9, c0 = bits.Add64(u8, t9, c0)
  1053  		c2, _ = bits.Add64(u9, c2, c0)
  1054  
  1055  		m := qInvNeg * t0
  1056  
  1057  		u0, c1 = bits.Mul64(m, q0)
  1058  		_, c0 = bits.Add64(t0, c1, 0)
  1059  		u1, c1 = bits.Mul64(m, q1)
  1060  		t0, c0 = bits.Add64(t1, c1, c0)
  1061  		u2, c1 = bits.Mul64(m, q2)
  1062  		t1, c0 = bits.Add64(t2, c1, c0)
  1063  		u3, c1 = bits.Mul64(m, q3)
  1064  		t2, c0 = bits.Add64(t3, c1, c0)
  1065  		u4, c1 = bits.Mul64(m, q4)
  1066  		t3, c0 = bits.Add64(t4, c1, c0)
  1067  		u5, c1 = bits.Mul64(m, q5)
  1068  		t4, c0 = bits.Add64(t5, c1, c0)
  1069  		u6, c1 = bits.Mul64(m, q6)
  1070  		t5, c0 = bits.Add64(t6, c1, c0)
  1071  		u7, c1 = bits.Mul64(m, q7)
  1072  		t6, c0 = bits.Add64(t7, c1, c0)
  1073  		u8, c1 = bits.Mul64(m, q8)
  1074  		t7, c0 = bits.Add64(t8, c1, c0)
  1075  		u9, c1 = bits.Mul64(m, q9)
  1076  
  1077  		t8, c0 = bits.Add64(0, c1, c0)
  1078  		u9, _ = bits.Add64(u9, 0, c0)
  1079  		t0, c0 = bits.Add64(u0, t0, 0)
  1080  		t1, c0 = bits.Add64(u1, t1, c0)
  1081  		t2, c0 = bits.Add64(u2, t2, c0)
  1082  		t3, c0 = bits.Add64(u3, t3, c0)
  1083  		t4, c0 = bits.Add64(u4, t4, c0)
  1084  		t5, c0 = bits.Add64(u5, t5, c0)
  1085  		t6, c0 = bits.Add64(u6, t6, c0)
  1086  		t7, c0 = bits.Add64(u7, t7, c0)
  1087  		t8, c0 = bits.Add64(u8, t8, c0)
  1088  		c2, _ = bits.Add64(c2, 0, c0)
  1089  		t8, c0 = bits.Add64(t9, t8, 0)
  1090  		t9, _ = bits.Add64(u9, c2, c0)
  1091  
  1092  	}
  1093  	{
  1094  		var c0, c1, c2 uint64
  1095  		v := x[3]
  1096  		u0, c1 = bits.Mul64(v, x[0])
  1097  		t0, c0 = bits.Add64(c1, t0, 0)
  1098  		u1, c1 = bits.Mul64(v, x[1])
  1099  		t1, c0 = bits.Add64(c1, t1, c0)
  1100  		u2, c1 = bits.Mul64(v, x[2])
  1101  		t2, c0 = bits.Add64(c1, t2, c0)
  1102  		u3, c1 = bits.Mul64(v, x[3])
  1103  		t3, c0 = bits.Add64(c1, t3, c0)
  1104  		u4, c1 = bits.Mul64(v, x[4])
  1105  		t4, c0 = bits.Add64(c1, t4, c0)
  1106  		u5, c1 = bits.Mul64(v, x[5])
  1107  		t5, c0 = bits.Add64(c1, t5, c0)
  1108  		u6, c1 = bits.Mul64(v, x[6])
  1109  		t6, c0 = bits.Add64(c1, t6, c0)
  1110  		u7, c1 = bits.Mul64(v, x[7])
  1111  		t7, c0 = bits.Add64(c1, t7, c0)
  1112  		u8, c1 = bits.Mul64(v, x[8])
  1113  		t8, c0 = bits.Add64(c1, t8, c0)
  1114  		u9, c1 = bits.Mul64(v, x[9])
  1115  		t9, c0 = bits.Add64(c1, t9, c0)
  1116  
  1117  		c2, _ = bits.Add64(0, 0, c0)
  1118  		t1, c0 = bits.Add64(u0, t1, 0)
  1119  		t2, c0 = bits.Add64(u1, t2, c0)
  1120  		t3, c0 = bits.Add64(u2, t3, c0)
  1121  		t4, c0 = bits.Add64(u3, t4, c0)
  1122  		t5, c0 = bits.Add64(u4, t5, c0)
  1123  		t6, c0 = bits.Add64(u5, t6, c0)
  1124  		t7, c0 = bits.Add64(u6, t7, c0)
  1125  		t8, c0 = bits.Add64(u7, t8, c0)
  1126  		t9, c0 = bits.Add64(u8, t9, c0)
  1127  		c2, _ = bits.Add64(u9, c2, c0)
  1128  
  1129  		m := qInvNeg * t0
  1130  
  1131  		u0, c1 = bits.Mul64(m, q0)
  1132  		_, c0 = bits.Add64(t0, c1, 0)
  1133  		u1, c1 = bits.Mul64(m, q1)
  1134  		t0, c0 = bits.Add64(t1, c1, c0)
  1135  		u2, c1 = bits.Mul64(m, q2)
  1136  		t1, c0 = bits.Add64(t2, c1, c0)
  1137  		u3, c1 = bits.Mul64(m, q3)
  1138  		t2, c0 = bits.Add64(t3, c1, c0)
  1139  		u4, c1 = bits.Mul64(m, q4)
  1140  		t3, c0 = bits.Add64(t4, c1, c0)
  1141  		u5, c1 = bits.Mul64(m, q5)
  1142  		t4, c0 = bits.Add64(t5, c1, c0)
  1143  		u6, c1 = bits.Mul64(m, q6)
  1144  		t5, c0 = bits.Add64(t6, c1, c0)
  1145  		u7, c1 = bits.Mul64(m, q7)
  1146  		t6, c0 = bits.Add64(t7, c1, c0)
  1147  		u8, c1 = bits.Mul64(m, q8)
  1148  		t7, c0 = bits.Add64(t8, c1, c0)
  1149  		u9, c1 = bits.Mul64(m, q9)
  1150  
  1151  		t8, c0 = bits.Add64(0, c1, c0)
  1152  		u9, _ = bits.Add64(u9, 0, c0)
  1153  		t0, c0 = bits.Add64(u0, t0, 0)
  1154  		t1, c0 = bits.Add64(u1, t1, c0)
  1155  		t2, c0 = bits.Add64(u2, t2, c0)
  1156  		t3, c0 = bits.Add64(u3, t3, c0)
  1157  		t4, c0 = bits.Add64(u4, t4, c0)
  1158  		t5, c0 = bits.Add64(u5, t5, c0)
  1159  		t6, c0 = bits.Add64(u6, t6, c0)
  1160  		t7, c0 = bits.Add64(u7, t7, c0)
  1161  		t8, c0 = bits.Add64(u8, t8, c0)
  1162  		c2, _ = bits.Add64(c2, 0, c0)
  1163  		t8, c0 = bits.Add64(t9, t8, 0)
  1164  		t9, _ = bits.Add64(u9, c2, c0)
  1165  
  1166  	}
  1167  	{
  1168  		var c0, c1, c2 uint64
  1169  		v := x[4]
  1170  		u0, c1 = bits.Mul64(v, x[0])
  1171  		t0, c0 = bits.Add64(c1, t0, 0)
  1172  		u1, c1 = bits.Mul64(v, x[1])
  1173  		t1, c0 = bits.Add64(c1, t1, c0)
  1174  		u2, c1 = bits.Mul64(v, x[2])
  1175  		t2, c0 = bits.Add64(c1, t2, c0)
  1176  		u3, c1 = bits.Mul64(v, x[3])
  1177  		t3, c0 = bits.Add64(c1, t3, c0)
  1178  		u4, c1 = bits.Mul64(v, x[4])
  1179  		t4, c0 = bits.Add64(c1, t4, c0)
  1180  		u5, c1 = bits.Mul64(v, x[5])
  1181  		t5, c0 = bits.Add64(c1, t5, c0)
  1182  		u6, c1 = bits.Mul64(v, x[6])
  1183  		t6, c0 = bits.Add64(c1, t6, c0)
  1184  		u7, c1 = bits.Mul64(v, x[7])
  1185  		t7, c0 = bits.Add64(c1, t7, c0)
  1186  		u8, c1 = bits.Mul64(v, x[8])
  1187  		t8, c0 = bits.Add64(c1, t8, c0)
  1188  		u9, c1 = bits.Mul64(v, x[9])
  1189  		t9, c0 = bits.Add64(c1, t9, c0)
  1190  
  1191  		c2, _ = bits.Add64(0, 0, c0)
  1192  		t1, c0 = bits.Add64(u0, t1, 0)
  1193  		t2, c0 = bits.Add64(u1, t2, c0)
  1194  		t3, c0 = bits.Add64(u2, t3, c0)
  1195  		t4, c0 = bits.Add64(u3, t4, c0)
  1196  		t5, c0 = bits.Add64(u4, t5, c0)
  1197  		t6, c0 = bits.Add64(u5, t6, c0)
  1198  		t7, c0 = bits.Add64(u6, t7, c0)
  1199  		t8, c0 = bits.Add64(u7, t8, c0)
  1200  		t9, c0 = bits.Add64(u8, t9, c0)
  1201  		c2, _ = bits.Add64(u9, c2, c0)
  1202  
  1203  		m := qInvNeg * t0
  1204  
  1205  		u0, c1 = bits.Mul64(m, q0)
  1206  		_, c0 = bits.Add64(t0, c1, 0)
  1207  		u1, c1 = bits.Mul64(m, q1)
  1208  		t0, c0 = bits.Add64(t1, c1, c0)
  1209  		u2, c1 = bits.Mul64(m, q2)
  1210  		t1, c0 = bits.Add64(t2, c1, c0)
  1211  		u3, c1 = bits.Mul64(m, q3)
  1212  		t2, c0 = bits.Add64(t3, c1, c0)
  1213  		u4, c1 = bits.Mul64(m, q4)
  1214  		t3, c0 = bits.Add64(t4, c1, c0)
  1215  		u5, c1 = bits.Mul64(m, q5)
  1216  		t4, c0 = bits.Add64(t5, c1, c0)
  1217  		u6, c1 = bits.Mul64(m, q6)
  1218  		t5, c0 = bits.Add64(t6, c1, c0)
  1219  		u7, c1 = bits.Mul64(m, q7)
  1220  		t6, c0 = bits.Add64(t7, c1, c0)
  1221  		u8, c1 = bits.Mul64(m, q8)
  1222  		t7, c0 = bits.Add64(t8, c1, c0)
  1223  		u9, c1 = bits.Mul64(m, q9)
  1224  
  1225  		t8, c0 = bits.Add64(0, c1, c0)
  1226  		u9, _ = bits.Add64(u9, 0, c0)
  1227  		t0, c0 = bits.Add64(u0, t0, 0)
  1228  		t1, c0 = bits.Add64(u1, t1, c0)
  1229  		t2, c0 = bits.Add64(u2, t2, c0)
  1230  		t3, c0 = bits.Add64(u3, t3, c0)
  1231  		t4, c0 = bits.Add64(u4, t4, c0)
  1232  		t5, c0 = bits.Add64(u5, t5, c0)
  1233  		t6, c0 = bits.Add64(u6, t6, c0)
  1234  		t7, c0 = bits.Add64(u7, t7, c0)
  1235  		t8, c0 = bits.Add64(u8, t8, c0)
  1236  		c2, _ = bits.Add64(c2, 0, c0)
  1237  		t8, c0 = bits.Add64(t9, t8, 0)
  1238  		t9, _ = bits.Add64(u9, c2, c0)
  1239  
  1240  	}
  1241  	{
  1242  		var c0, c1, c2 uint64
  1243  		v := x[5]
  1244  		u0, c1 = bits.Mul64(v, x[0])
  1245  		t0, c0 = bits.Add64(c1, t0, 0)
  1246  		u1, c1 = bits.Mul64(v, x[1])
  1247  		t1, c0 = bits.Add64(c1, t1, c0)
  1248  		u2, c1 = bits.Mul64(v, x[2])
  1249  		t2, c0 = bits.Add64(c1, t2, c0)
  1250  		u3, c1 = bits.Mul64(v, x[3])
  1251  		t3, c0 = bits.Add64(c1, t3, c0)
  1252  		u4, c1 = bits.Mul64(v, x[4])
  1253  		t4, c0 = bits.Add64(c1, t4, c0)
  1254  		u5, c1 = bits.Mul64(v, x[5])
  1255  		t5, c0 = bits.Add64(c1, t5, c0)
  1256  		u6, c1 = bits.Mul64(v, x[6])
  1257  		t6, c0 = bits.Add64(c1, t6, c0)
  1258  		u7, c1 = bits.Mul64(v, x[7])
  1259  		t7, c0 = bits.Add64(c1, t7, c0)
  1260  		u8, c1 = bits.Mul64(v, x[8])
  1261  		t8, c0 = bits.Add64(c1, t8, c0)
  1262  		u9, c1 = bits.Mul64(v, x[9])
  1263  		t9, c0 = bits.Add64(c1, t9, c0)
  1264  
  1265  		c2, _ = bits.Add64(0, 0, c0)
  1266  		t1, c0 = bits.Add64(u0, t1, 0)
  1267  		t2, c0 = bits.Add64(u1, t2, c0)
  1268  		t3, c0 = bits.Add64(u2, t3, c0)
  1269  		t4, c0 = bits.Add64(u3, t4, c0)
  1270  		t5, c0 = bits.Add64(u4, t5, c0)
  1271  		t6, c0 = bits.Add64(u5, t6, c0)
  1272  		t7, c0 = bits.Add64(u6, t7, c0)
  1273  		t8, c0 = bits.Add64(u7, t8, c0)
  1274  		t9, c0 = bits.Add64(u8, t9, c0)
  1275  		c2, _ = bits.Add64(u9, c2, c0)
  1276  
  1277  		m := qInvNeg * t0
  1278  
  1279  		u0, c1 = bits.Mul64(m, q0)
  1280  		_, c0 = bits.Add64(t0, c1, 0)
  1281  		u1, c1 = bits.Mul64(m, q1)
  1282  		t0, c0 = bits.Add64(t1, c1, c0)
  1283  		u2, c1 = bits.Mul64(m, q2)
  1284  		t1, c0 = bits.Add64(t2, c1, c0)
  1285  		u3, c1 = bits.Mul64(m, q3)
  1286  		t2, c0 = bits.Add64(t3, c1, c0)
  1287  		u4, c1 = bits.Mul64(m, q4)
  1288  		t3, c0 = bits.Add64(t4, c1, c0)
  1289  		u5, c1 = bits.Mul64(m, q5)
  1290  		t4, c0 = bits.Add64(t5, c1, c0)
  1291  		u6, c1 = bits.Mul64(m, q6)
  1292  		t5, c0 = bits.Add64(t6, c1, c0)
  1293  		u7, c1 = bits.Mul64(m, q7)
  1294  		t6, c0 = bits.Add64(t7, c1, c0)
  1295  		u8, c1 = bits.Mul64(m, q8)
  1296  		t7, c0 = bits.Add64(t8, c1, c0)
  1297  		u9, c1 = bits.Mul64(m, q9)
  1298  
  1299  		t8, c0 = bits.Add64(0, c1, c0)
  1300  		u9, _ = bits.Add64(u9, 0, c0)
  1301  		t0, c0 = bits.Add64(u0, t0, 0)
  1302  		t1, c0 = bits.Add64(u1, t1, c0)
  1303  		t2, c0 = bits.Add64(u2, t2, c0)
  1304  		t3, c0 = bits.Add64(u3, t3, c0)
  1305  		t4, c0 = bits.Add64(u4, t4, c0)
  1306  		t5, c0 = bits.Add64(u5, t5, c0)
  1307  		t6, c0 = bits.Add64(u6, t6, c0)
  1308  		t7, c0 = bits.Add64(u7, t7, c0)
  1309  		t8, c0 = bits.Add64(u8, t8, c0)
  1310  		c2, _ = bits.Add64(c2, 0, c0)
  1311  		t8, c0 = bits.Add64(t9, t8, 0)
  1312  		t9, _ = bits.Add64(u9, c2, c0)
  1313  
  1314  	}
  1315  	{
  1316  		var c0, c1, c2 uint64
  1317  		v := x[6]
  1318  		u0, c1 = bits.Mul64(v, x[0])
  1319  		t0, c0 = bits.Add64(c1, t0, 0)
  1320  		u1, c1 = bits.Mul64(v, x[1])
  1321  		t1, c0 = bits.Add64(c1, t1, c0)
  1322  		u2, c1 = bits.Mul64(v, x[2])
  1323  		t2, c0 = bits.Add64(c1, t2, c0)
  1324  		u3, c1 = bits.Mul64(v, x[3])
  1325  		t3, c0 = bits.Add64(c1, t3, c0)
  1326  		u4, c1 = bits.Mul64(v, x[4])
  1327  		t4, c0 = bits.Add64(c1, t4, c0)
  1328  		u5, c1 = bits.Mul64(v, x[5])
  1329  		t5, c0 = bits.Add64(c1, t5, c0)
  1330  		u6, c1 = bits.Mul64(v, x[6])
  1331  		t6, c0 = bits.Add64(c1, t6, c0)
  1332  		u7, c1 = bits.Mul64(v, x[7])
  1333  		t7, c0 = bits.Add64(c1, t7, c0)
  1334  		u8, c1 = bits.Mul64(v, x[8])
  1335  		t8, c0 = bits.Add64(c1, t8, c0)
  1336  		u9, c1 = bits.Mul64(v, x[9])
  1337  		t9, c0 = bits.Add64(c1, t9, c0)
  1338  
  1339  		c2, _ = bits.Add64(0, 0, c0)
  1340  		t1, c0 = bits.Add64(u0, t1, 0)
  1341  		t2, c0 = bits.Add64(u1, t2, c0)
  1342  		t3, c0 = bits.Add64(u2, t3, c0)
  1343  		t4, c0 = bits.Add64(u3, t4, c0)
  1344  		t5, c0 = bits.Add64(u4, t5, c0)
  1345  		t6, c0 = bits.Add64(u5, t6, c0)
  1346  		t7, c0 = bits.Add64(u6, t7, c0)
  1347  		t8, c0 = bits.Add64(u7, t8, c0)
  1348  		t9, c0 = bits.Add64(u8, t9, c0)
  1349  		c2, _ = bits.Add64(u9, c2, c0)
  1350  
  1351  		m := qInvNeg * t0
  1352  
  1353  		u0, c1 = bits.Mul64(m, q0)
  1354  		_, c0 = bits.Add64(t0, c1, 0)
  1355  		u1, c1 = bits.Mul64(m, q1)
  1356  		t0, c0 = bits.Add64(t1, c1, c0)
  1357  		u2, c1 = bits.Mul64(m, q2)
  1358  		t1, c0 = bits.Add64(t2, c1, c0)
  1359  		u3, c1 = bits.Mul64(m, q3)
  1360  		t2, c0 = bits.Add64(t3, c1, c0)
  1361  		u4, c1 = bits.Mul64(m, q4)
  1362  		t3, c0 = bits.Add64(t4, c1, c0)
  1363  		u5, c1 = bits.Mul64(m, q5)
  1364  		t4, c0 = bits.Add64(t5, c1, c0)
  1365  		u6, c1 = bits.Mul64(m, q6)
  1366  		t5, c0 = bits.Add64(t6, c1, c0)
  1367  		u7, c1 = bits.Mul64(m, q7)
  1368  		t6, c0 = bits.Add64(t7, c1, c0)
  1369  		u8, c1 = bits.Mul64(m, q8)
  1370  		t7, c0 = bits.Add64(t8, c1, c0)
  1371  		u9, c1 = bits.Mul64(m, q9)
  1372  
  1373  		t8, c0 = bits.Add64(0, c1, c0)
  1374  		u9, _ = bits.Add64(u9, 0, c0)
  1375  		t0, c0 = bits.Add64(u0, t0, 0)
  1376  		t1, c0 = bits.Add64(u1, t1, c0)
  1377  		t2, c0 = bits.Add64(u2, t2, c0)
  1378  		t3, c0 = bits.Add64(u3, t3, c0)
  1379  		t4, c0 = bits.Add64(u4, t4, c0)
  1380  		t5, c0 = bits.Add64(u5, t5, c0)
  1381  		t6, c0 = bits.Add64(u6, t6, c0)
  1382  		t7, c0 = bits.Add64(u7, t7, c0)
  1383  		t8, c0 = bits.Add64(u8, t8, c0)
  1384  		c2, _ = bits.Add64(c2, 0, c0)
  1385  		t8, c0 = bits.Add64(t9, t8, 0)
  1386  		t9, _ = bits.Add64(u9, c2, c0)
  1387  
  1388  	}
  1389  	{
  1390  		var c0, c1, c2 uint64
  1391  		v := x[7]
  1392  		u0, c1 = bits.Mul64(v, x[0])
  1393  		t0, c0 = bits.Add64(c1, t0, 0)
  1394  		u1, c1 = bits.Mul64(v, x[1])
  1395  		t1, c0 = bits.Add64(c1, t1, c0)
  1396  		u2, c1 = bits.Mul64(v, x[2])
  1397  		t2, c0 = bits.Add64(c1, t2, c0)
  1398  		u3, c1 = bits.Mul64(v, x[3])
  1399  		t3, c0 = bits.Add64(c1, t3, c0)
  1400  		u4, c1 = bits.Mul64(v, x[4])
  1401  		t4, c0 = bits.Add64(c1, t4, c0)
  1402  		u5, c1 = bits.Mul64(v, x[5])
  1403  		t5, c0 = bits.Add64(c1, t5, c0)
  1404  		u6, c1 = bits.Mul64(v, x[6])
  1405  		t6, c0 = bits.Add64(c1, t6, c0)
  1406  		u7, c1 = bits.Mul64(v, x[7])
  1407  		t7, c0 = bits.Add64(c1, t7, c0)
  1408  		u8, c1 = bits.Mul64(v, x[8])
  1409  		t8, c0 = bits.Add64(c1, t8, c0)
  1410  		u9, c1 = bits.Mul64(v, x[9])
  1411  		t9, c0 = bits.Add64(c1, t9, c0)
  1412  
  1413  		c2, _ = bits.Add64(0, 0, c0)
  1414  		t1, c0 = bits.Add64(u0, t1, 0)
  1415  		t2, c0 = bits.Add64(u1, t2, c0)
  1416  		t3, c0 = bits.Add64(u2, t3, c0)
  1417  		t4, c0 = bits.Add64(u3, t4, c0)
  1418  		t5, c0 = bits.Add64(u4, t5, c0)
  1419  		t6, c0 = bits.Add64(u5, t6, c0)
  1420  		t7, c0 = bits.Add64(u6, t7, c0)
  1421  		t8, c0 = bits.Add64(u7, t8, c0)
  1422  		t9, c0 = bits.Add64(u8, t9, c0)
  1423  		c2, _ = bits.Add64(u9, c2, c0)
  1424  
  1425  		m := qInvNeg * t0
  1426  
  1427  		u0, c1 = bits.Mul64(m, q0)
  1428  		_, c0 = bits.Add64(t0, c1, 0)
  1429  		u1, c1 = bits.Mul64(m, q1)
  1430  		t0, c0 = bits.Add64(t1, c1, c0)
  1431  		u2, c1 = bits.Mul64(m, q2)
  1432  		t1, c0 = bits.Add64(t2, c1, c0)
  1433  		u3, c1 = bits.Mul64(m, q3)
  1434  		t2, c0 = bits.Add64(t3, c1, c0)
  1435  		u4, c1 = bits.Mul64(m, q4)
  1436  		t3, c0 = bits.Add64(t4, c1, c0)
  1437  		u5, c1 = bits.Mul64(m, q5)
  1438  		t4, c0 = bits.Add64(t5, c1, c0)
  1439  		u6, c1 = bits.Mul64(m, q6)
  1440  		t5, c0 = bits.Add64(t6, c1, c0)
  1441  		u7, c1 = bits.Mul64(m, q7)
  1442  		t6, c0 = bits.Add64(t7, c1, c0)
  1443  		u8, c1 = bits.Mul64(m, q8)
  1444  		t7, c0 = bits.Add64(t8, c1, c0)
  1445  		u9, c1 = bits.Mul64(m, q9)
  1446  
  1447  		t8, c0 = bits.Add64(0, c1, c0)
  1448  		u9, _ = bits.Add64(u9, 0, c0)
  1449  		t0, c0 = bits.Add64(u0, t0, 0)
  1450  		t1, c0 = bits.Add64(u1, t1, c0)
  1451  		t2, c0 = bits.Add64(u2, t2, c0)
  1452  		t3, c0 = bits.Add64(u3, t3, c0)
  1453  		t4, c0 = bits.Add64(u4, t4, c0)
  1454  		t5, c0 = bits.Add64(u5, t5, c0)
  1455  		t6, c0 = bits.Add64(u6, t6, c0)
  1456  		t7, c0 = bits.Add64(u7, t7, c0)
  1457  		t8, c0 = bits.Add64(u8, t8, c0)
  1458  		c2, _ = bits.Add64(c2, 0, c0)
  1459  		t8, c0 = bits.Add64(t9, t8, 0)
  1460  		t9, _ = bits.Add64(u9, c2, c0)
  1461  
  1462  	}
  1463  	{
  1464  		var c0, c1, c2 uint64
  1465  		v := x[8]
  1466  		u0, c1 = bits.Mul64(v, x[0])
  1467  		t0, c0 = bits.Add64(c1, t0, 0)
  1468  		u1, c1 = bits.Mul64(v, x[1])
  1469  		t1, c0 = bits.Add64(c1, t1, c0)
  1470  		u2, c1 = bits.Mul64(v, x[2])
  1471  		t2, c0 = bits.Add64(c1, t2, c0)
  1472  		u3, c1 = bits.Mul64(v, x[3])
  1473  		t3, c0 = bits.Add64(c1, t3, c0)
  1474  		u4, c1 = bits.Mul64(v, x[4])
  1475  		t4, c0 = bits.Add64(c1, t4, c0)
  1476  		u5, c1 = bits.Mul64(v, x[5])
  1477  		t5, c0 = bits.Add64(c1, t5, c0)
  1478  		u6, c1 = bits.Mul64(v, x[6])
  1479  		t6, c0 = bits.Add64(c1, t6, c0)
  1480  		u7, c1 = bits.Mul64(v, x[7])
  1481  		t7, c0 = bits.Add64(c1, t7, c0)
  1482  		u8, c1 = bits.Mul64(v, x[8])
  1483  		t8, c0 = bits.Add64(c1, t8, c0)
  1484  		u9, c1 = bits.Mul64(v, x[9])
  1485  		t9, c0 = bits.Add64(c1, t9, c0)
  1486  
  1487  		c2, _ = bits.Add64(0, 0, c0)
  1488  		t1, c0 = bits.Add64(u0, t1, 0)
  1489  		t2, c0 = bits.Add64(u1, t2, c0)
  1490  		t3, c0 = bits.Add64(u2, t3, c0)
  1491  		t4, c0 = bits.Add64(u3, t4, c0)
  1492  		t5, c0 = bits.Add64(u4, t5, c0)
  1493  		t6, c0 = bits.Add64(u5, t6, c0)
  1494  		t7, c0 = bits.Add64(u6, t7, c0)
  1495  		t8, c0 = bits.Add64(u7, t8, c0)
  1496  		t9, c0 = bits.Add64(u8, t9, c0)
  1497  		c2, _ = bits.Add64(u9, c2, c0)
  1498  
  1499  		m := qInvNeg * t0
  1500  
  1501  		u0, c1 = bits.Mul64(m, q0)
  1502  		_, c0 = bits.Add64(t0, c1, 0)
  1503  		u1, c1 = bits.Mul64(m, q1)
  1504  		t0, c0 = bits.Add64(t1, c1, c0)
  1505  		u2, c1 = bits.Mul64(m, q2)
  1506  		t1, c0 = bits.Add64(t2, c1, c0)
  1507  		u3, c1 = bits.Mul64(m, q3)
  1508  		t2, c0 = bits.Add64(t3, c1, c0)
  1509  		u4, c1 = bits.Mul64(m, q4)
  1510  		t3, c0 = bits.Add64(t4, c1, c0)
  1511  		u5, c1 = bits.Mul64(m, q5)
  1512  		t4, c0 = bits.Add64(t5, c1, c0)
  1513  		u6, c1 = bits.Mul64(m, q6)
  1514  		t5, c0 = bits.Add64(t6, c1, c0)
  1515  		u7, c1 = bits.Mul64(m, q7)
  1516  		t6, c0 = bits.Add64(t7, c1, c0)
  1517  		u8, c1 = bits.Mul64(m, q8)
  1518  		t7, c0 = bits.Add64(t8, c1, c0)
  1519  		u9, c1 = bits.Mul64(m, q9)
  1520  
  1521  		t8, c0 = bits.Add64(0, c1, c0)
  1522  		u9, _ = bits.Add64(u9, 0, c0)
  1523  		t0, c0 = bits.Add64(u0, t0, 0)
  1524  		t1, c0 = bits.Add64(u1, t1, c0)
  1525  		t2, c0 = bits.Add64(u2, t2, c0)
  1526  		t3, c0 = bits.Add64(u3, t3, c0)
  1527  		t4, c0 = bits.Add64(u4, t4, c0)
  1528  		t5, c0 = bits.Add64(u5, t5, c0)
  1529  		t6, c0 = bits.Add64(u6, t6, c0)
  1530  		t7, c0 = bits.Add64(u7, t7, c0)
  1531  		t8, c0 = bits.Add64(u8, t8, c0)
  1532  		c2, _ = bits.Add64(c2, 0, c0)
  1533  		t8, c0 = bits.Add64(t9, t8, 0)
  1534  		t9, _ = bits.Add64(u9, c2, c0)
  1535  
  1536  	}
  1537  	{
  1538  		var c0, c1, c2 uint64
  1539  		v := x[9]
  1540  		u0, c1 = bits.Mul64(v, x[0])
  1541  		t0, c0 = bits.Add64(c1, t0, 0)
  1542  		u1, c1 = bits.Mul64(v, x[1])
  1543  		t1, c0 = bits.Add64(c1, t1, c0)
  1544  		u2, c1 = bits.Mul64(v, x[2])
  1545  		t2, c0 = bits.Add64(c1, t2, c0)
  1546  		u3, c1 = bits.Mul64(v, x[3])
  1547  		t3, c0 = bits.Add64(c1, t3, c0)
  1548  		u4, c1 = bits.Mul64(v, x[4])
  1549  		t4, c0 = bits.Add64(c1, t4, c0)
  1550  		u5, c1 = bits.Mul64(v, x[5])
  1551  		t5, c0 = bits.Add64(c1, t5, c0)
  1552  		u6, c1 = bits.Mul64(v, x[6])
  1553  		t6, c0 = bits.Add64(c1, t6, c0)
  1554  		u7, c1 = bits.Mul64(v, x[7])
  1555  		t7, c0 = bits.Add64(c1, t7, c0)
  1556  		u8, c1 = bits.Mul64(v, x[8])
  1557  		t8, c0 = bits.Add64(c1, t8, c0)
  1558  		u9, c1 = bits.Mul64(v, x[9])
  1559  		t9, c0 = bits.Add64(c1, t9, c0)
  1560  
  1561  		c2, _ = bits.Add64(0, 0, c0)
  1562  		t1, c0 = bits.Add64(u0, t1, 0)
  1563  		t2, c0 = bits.Add64(u1, t2, c0)
  1564  		t3, c0 = bits.Add64(u2, t3, c0)
  1565  		t4, c0 = bits.Add64(u3, t4, c0)
  1566  		t5, c0 = bits.Add64(u4, t5, c0)
  1567  		t6, c0 = bits.Add64(u5, t6, c0)
  1568  		t7, c0 = bits.Add64(u6, t7, c0)
  1569  		t8, c0 = bits.Add64(u7, t8, c0)
  1570  		t9, c0 = bits.Add64(u8, t9, c0)
  1571  		c2, _ = bits.Add64(u9, c2, c0)
  1572  
  1573  		m := qInvNeg * t0
  1574  
  1575  		u0, c1 = bits.Mul64(m, q0)
  1576  		_, c0 = bits.Add64(t0, c1, 0)
  1577  		u1, c1 = bits.Mul64(m, q1)
  1578  		t0, c0 = bits.Add64(t1, c1, c0)
  1579  		u2, c1 = bits.Mul64(m, q2)
  1580  		t1, c0 = bits.Add64(t2, c1, c0)
  1581  		u3, c1 = bits.Mul64(m, q3)
  1582  		t2, c0 = bits.Add64(t3, c1, c0)
  1583  		u4, c1 = bits.Mul64(m, q4)
  1584  		t3, c0 = bits.Add64(t4, c1, c0)
  1585  		u5, c1 = bits.Mul64(m, q5)
  1586  		t4, c0 = bits.Add64(t5, c1, c0)
  1587  		u6, c1 = bits.Mul64(m, q6)
  1588  		t5, c0 = bits.Add64(t6, c1, c0)
  1589  		u7, c1 = bits.Mul64(m, q7)
  1590  		t6, c0 = bits.Add64(t7, c1, c0)
  1591  		u8, c1 = bits.Mul64(m, q8)
  1592  		t7, c0 = bits.Add64(t8, c1, c0)
  1593  		u9, c1 = bits.Mul64(m, q9)
  1594  
  1595  		t8, c0 = bits.Add64(0, c1, c0)
  1596  		u9, _ = bits.Add64(u9, 0, c0)
  1597  		t0, c0 = bits.Add64(u0, t0, 0)
  1598  		t1, c0 = bits.Add64(u1, t1, c0)
  1599  		t2, c0 = bits.Add64(u2, t2, c0)
  1600  		t3, c0 = bits.Add64(u3, t3, c0)
  1601  		t4, c0 = bits.Add64(u4, t4, c0)
  1602  		t5, c0 = bits.Add64(u5, t5, c0)
  1603  		t6, c0 = bits.Add64(u6, t6, c0)
  1604  		t7, c0 = bits.Add64(u7, t7, c0)
  1605  		t8, c0 = bits.Add64(u8, t8, c0)
  1606  		c2, _ = bits.Add64(c2, 0, c0)
  1607  		t8, c0 = bits.Add64(t9, t8, 0)
  1608  		t9, _ = bits.Add64(u9, c2, c0)
  1609  
  1610  	}
  1611  	z[0] = t0
  1612  	z[1] = t1
  1613  	z[2] = t2
  1614  	z[3] = t3
  1615  	z[4] = t4
  1616  	z[5] = t5
  1617  	z[6] = t6
  1618  	z[7] = t7
  1619  	z[8] = t8
  1620  	z[9] = t9
  1621  
  1622  	// if z ⩾ q → z -= q
  1623  	if !z.smallerThanModulus() {
  1624  		var b uint64
  1625  		z[0], b = bits.Sub64(z[0], q0, 0)
  1626  		z[1], b = bits.Sub64(z[1], q1, b)
  1627  		z[2], b = bits.Sub64(z[2], q2, b)
  1628  		z[3], b = bits.Sub64(z[3], q3, b)
  1629  		z[4], b = bits.Sub64(z[4], q4, b)
  1630  		z[5], b = bits.Sub64(z[5], q5, b)
  1631  		z[6], b = bits.Sub64(z[6], q6, b)
  1632  		z[7], b = bits.Sub64(z[7], q7, b)
  1633  		z[8], b = bits.Sub64(z[8], q8, b)
  1634  		z[9], _ = bits.Sub64(z[9], q9, b)
  1635  	}
  1636  	return z
  1637  }