github.com/consensys/gnark-crypto@v0.14.0/ecc/bw6-761/fp/element_ops_purego.go (about)

     1  //go:build !amd64 || purego
     2  // +build !amd64 purego
     3  
     4  // Copyright 2020 ConsenSys Software Inc.
     5  //
     6  // Licensed under the Apache License, Version 2.0 (the "License");
     7  // you may not use this file except in compliance with the License.
     8  // You may obtain a copy of the License at
     9  //
    10  //     http://www.apache.org/licenses/LICENSE-2.0
    11  //
    12  // Unless required by applicable law or agreed to in writing, software
    13  // distributed under the License is distributed on an "AS IS" BASIS,
    14  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  // See the License for the specific language governing permissions and
    16  // limitations under the License.
    17  
    18  // Code generated by consensys/gnark-crypto DO NOT EDIT
    19  
    20  package fp
    21  
    22  import "math/bits"
    23  
    24  // MulBy3 x *= 3 (mod q)
    25  func MulBy3(x *Element) {
    26  	_x := *x
    27  	x.Double(x).Add(x, &_x)
    28  }
    29  
    30  // MulBy5 x *= 5 (mod q)
    31  func MulBy5(x *Element) {
    32  	_x := *x
    33  	x.Double(x).Double(x).Add(x, &_x)
    34  }
    35  
    36  // MulBy13 x *= 13 (mod q)
    37  func MulBy13(x *Element) {
    38  	var y = Element{
    39  		4345973640412121648,
    40  		16340807117537158706,
    41  		14673764841507373218,
    42  		5587754667198343811,
    43  		12846753860245084942,
    44  		4041391838244625385,
    45  		8324122986343791677,
    46  		8773809490091176420,
    47  		5465994123296109449,
    48  		6649773564661156048,
    49  		9147430723089113754,
    50  		54281803719730243,
    51  	}
    52  	x.Mul(x, &y)
    53  }
    54  
    55  // Butterfly sets
    56  //
    57  //	a = a + b (mod q)
    58  //	b = a - b (mod q)
    59  func Butterfly(a, b *Element) {
    60  	_butterflyGeneric(a, b)
    61  }
    62  
    63  func fromMont(z *Element) {
    64  	_fromMontGeneric(z)
    65  }
    66  
    67  func reduce(z *Element) {
    68  	_reduceGeneric(z)
    69  }
    70  
    71  // Mul z = x * y (mod q)
    72  //
    73  // x and y must be less than q
    74  func (z *Element) Mul(x, y *Element) *Element {
    75  
    76  	// Implements CIOS multiplication -- section 2.3.2 of Tolga Acar's thesis
    77  	// https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf
    78  	//
    79  	// The algorithm:
    80  	//
    81  	// for i=0 to N-1
    82  	// 		C := 0
    83  	// 		for j=0 to N-1
    84  	// 			(C,t[j]) := t[j] + x[j]*y[i] + C
    85  	// 		(t[N+1],t[N]) := t[N] + C
    86  	//
    87  	// 		C := 0
    88  	// 		m := t[0]*q'[0] mod D
    89  	// 		(C,_) := t[0] + m*q[0]
    90  	// 		for j=1 to N-1
    91  	// 			(C,t[j-1]) := t[j] + m*q[j] + C
    92  	//
    93  	// 		(C,t[N-1]) := t[N] + C
    94  	// 		t[N] := t[N+1] + C
    95  	//
    96  	// → N is the number of machine words needed to store the modulus q
    97  	// → D is the word size. For example, on a 64-bit architecture D is 2	64
    98  	// → x[i], y[i], q[i] is the ith word of the numbers x,y,q
    99  	// → q'[0] is the lowest word of the number -q⁻¹ mod r. This quantity is pre-computed, as it does not depend on the inputs.
   100  	// → t is a temporary array of size N+2
   101  	// → C, S are machine words. A pair (C,S) refers to (hi-bits, lo-bits) of a two-word number
   102  	//
   103  	// As described here https://hackmd.io/@gnark/modular_multiplication we can get rid of one carry chain and simplify:
   104  	// (also described in https://eprint.iacr.org/2022/1400.pdf annex)
   105  	//
   106  	// for i=0 to N-1
   107  	// 		(A,t[0]) := t[0] + x[0]*y[i]
   108  	// 		m := t[0]*q'[0] mod W
   109  	// 		C,_ := t[0] + m*q[0]
   110  	// 		for j=1 to N-1
   111  	// 			(A,t[j])  := t[j] + x[j]*y[i] + A
   112  	// 			(C,t[j-1]) := t[j] + m*q[j] + C
   113  	//
   114  	// 		t[N-1] = C + A
   115  	//
   116  	// This optimization saves 5N + 2 additions in the algorithm, and can be used whenever the highest bit
   117  	// of the modulus is zero (and not all of the remaining bits are set).
   118  
   119  	var t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11 uint64
   120  	var u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11 uint64
   121  	{
   122  		var c0, c1, c2 uint64
   123  		v := x[0]
   124  		u0, t0 = bits.Mul64(v, y[0])
   125  		u1, t1 = bits.Mul64(v, y[1])
   126  		u2, t2 = bits.Mul64(v, y[2])
   127  		u3, t3 = bits.Mul64(v, y[3])
   128  		u4, t4 = bits.Mul64(v, y[4])
   129  		u5, t5 = bits.Mul64(v, y[5])
   130  		u6, t6 = bits.Mul64(v, y[6])
   131  		u7, t7 = bits.Mul64(v, y[7])
   132  		u8, t8 = bits.Mul64(v, y[8])
   133  		u9, t9 = bits.Mul64(v, y[9])
   134  		u10, t10 = bits.Mul64(v, y[10])
   135  		u11, t11 = bits.Mul64(v, y[11])
   136  		t1, c0 = bits.Add64(u0, t1, 0)
   137  		t2, c0 = bits.Add64(u1, t2, c0)
   138  		t3, c0 = bits.Add64(u2, t3, c0)
   139  		t4, c0 = bits.Add64(u3, t4, c0)
   140  		t5, c0 = bits.Add64(u4, t5, c0)
   141  		t6, c0 = bits.Add64(u5, t6, c0)
   142  		t7, c0 = bits.Add64(u6, t7, c0)
   143  		t8, c0 = bits.Add64(u7, t8, c0)
   144  		t9, c0 = bits.Add64(u8, t9, c0)
   145  		t10, c0 = bits.Add64(u9, t10, c0)
   146  		t11, c0 = bits.Add64(u10, t11, c0)
   147  		c2, _ = bits.Add64(u11, 0, c0)
   148  
   149  		m := qInvNeg * t0
   150  
   151  		u0, c1 = bits.Mul64(m, q0)
   152  		_, c0 = bits.Add64(t0, c1, 0)
   153  		u1, c1 = bits.Mul64(m, q1)
   154  		t0, c0 = bits.Add64(t1, c1, c0)
   155  		u2, c1 = bits.Mul64(m, q2)
   156  		t1, c0 = bits.Add64(t2, c1, c0)
   157  		u3, c1 = bits.Mul64(m, q3)
   158  		t2, c0 = bits.Add64(t3, c1, c0)
   159  		u4, c1 = bits.Mul64(m, q4)
   160  		t3, c0 = bits.Add64(t4, c1, c0)
   161  		u5, c1 = bits.Mul64(m, q5)
   162  		t4, c0 = bits.Add64(t5, c1, c0)
   163  		u6, c1 = bits.Mul64(m, q6)
   164  		t5, c0 = bits.Add64(t6, c1, c0)
   165  		u7, c1 = bits.Mul64(m, q7)
   166  		t6, c0 = bits.Add64(t7, c1, c0)
   167  		u8, c1 = bits.Mul64(m, q8)
   168  		t7, c0 = bits.Add64(t8, c1, c0)
   169  		u9, c1 = bits.Mul64(m, q9)
   170  		t8, c0 = bits.Add64(t9, c1, c0)
   171  		u10, c1 = bits.Mul64(m, q10)
   172  		t9, c0 = bits.Add64(t10, c1, c0)
   173  		u11, c1 = bits.Mul64(m, q11)
   174  
   175  		t10, c0 = bits.Add64(0, c1, c0)
   176  		u11, _ = bits.Add64(u11, 0, c0)
   177  		t0, c0 = bits.Add64(u0, t0, 0)
   178  		t1, c0 = bits.Add64(u1, t1, c0)
   179  		t2, c0 = bits.Add64(u2, t2, c0)
   180  		t3, c0 = bits.Add64(u3, t3, c0)
   181  		t4, c0 = bits.Add64(u4, t4, c0)
   182  		t5, c0 = bits.Add64(u5, t5, c0)
   183  		t6, c0 = bits.Add64(u6, t6, c0)
   184  		t7, c0 = bits.Add64(u7, t7, c0)
   185  		t8, c0 = bits.Add64(u8, t8, c0)
   186  		t9, c0 = bits.Add64(u9, t9, c0)
   187  		t10, c0 = bits.Add64(u10, t10, c0)
   188  		c2, _ = bits.Add64(c2, 0, c0)
   189  		t10, c0 = bits.Add64(t11, t10, 0)
   190  		t11, _ = bits.Add64(u11, c2, c0)
   191  
   192  	}
   193  	{
   194  		var c0, c1, c2 uint64
   195  		v := x[1]
   196  		u0, c1 = bits.Mul64(v, y[0])
   197  		t0, c0 = bits.Add64(c1, t0, 0)
   198  		u1, c1 = bits.Mul64(v, y[1])
   199  		t1, c0 = bits.Add64(c1, t1, c0)
   200  		u2, c1 = bits.Mul64(v, y[2])
   201  		t2, c0 = bits.Add64(c1, t2, c0)
   202  		u3, c1 = bits.Mul64(v, y[3])
   203  		t3, c0 = bits.Add64(c1, t3, c0)
   204  		u4, c1 = bits.Mul64(v, y[4])
   205  		t4, c0 = bits.Add64(c1, t4, c0)
   206  		u5, c1 = bits.Mul64(v, y[5])
   207  		t5, c0 = bits.Add64(c1, t5, c0)
   208  		u6, c1 = bits.Mul64(v, y[6])
   209  		t6, c0 = bits.Add64(c1, t6, c0)
   210  		u7, c1 = bits.Mul64(v, y[7])
   211  		t7, c0 = bits.Add64(c1, t7, c0)
   212  		u8, c1 = bits.Mul64(v, y[8])
   213  		t8, c0 = bits.Add64(c1, t8, c0)
   214  		u9, c1 = bits.Mul64(v, y[9])
   215  		t9, c0 = bits.Add64(c1, t9, c0)
   216  		u10, c1 = bits.Mul64(v, y[10])
   217  		t10, c0 = bits.Add64(c1, t10, c0)
   218  		u11, c1 = bits.Mul64(v, y[11])
   219  		t11, c0 = bits.Add64(c1, t11, c0)
   220  
   221  		c2, _ = bits.Add64(0, 0, c0)
   222  		t1, c0 = bits.Add64(u0, t1, 0)
   223  		t2, c0 = bits.Add64(u1, t2, c0)
   224  		t3, c0 = bits.Add64(u2, t3, c0)
   225  		t4, c0 = bits.Add64(u3, t4, c0)
   226  		t5, c0 = bits.Add64(u4, t5, c0)
   227  		t6, c0 = bits.Add64(u5, t6, c0)
   228  		t7, c0 = bits.Add64(u6, t7, c0)
   229  		t8, c0 = bits.Add64(u7, t8, c0)
   230  		t9, c0 = bits.Add64(u8, t9, c0)
   231  		t10, c0 = bits.Add64(u9, t10, c0)
   232  		t11, c0 = bits.Add64(u10, t11, c0)
   233  		c2, _ = bits.Add64(u11, c2, c0)
   234  
   235  		m := qInvNeg * t0
   236  
   237  		u0, c1 = bits.Mul64(m, q0)
   238  		_, c0 = bits.Add64(t0, c1, 0)
   239  		u1, c1 = bits.Mul64(m, q1)
   240  		t0, c0 = bits.Add64(t1, c1, c0)
   241  		u2, c1 = bits.Mul64(m, q2)
   242  		t1, c0 = bits.Add64(t2, c1, c0)
   243  		u3, c1 = bits.Mul64(m, q3)
   244  		t2, c0 = bits.Add64(t3, c1, c0)
   245  		u4, c1 = bits.Mul64(m, q4)
   246  		t3, c0 = bits.Add64(t4, c1, c0)
   247  		u5, c1 = bits.Mul64(m, q5)
   248  		t4, c0 = bits.Add64(t5, c1, c0)
   249  		u6, c1 = bits.Mul64(m, q6)
   250  		t5, c0 = bits.Add64(t6, c1, c0)
   251  		u7, c1 = bits.Mul64(m, q7)
   252  		t6, c0 = bits.Add64(t7, c1, c0)
   253  		u8, c1 = bits.Mul64(m, q8)
   254  		t7, c0 = bits.Add64(t8, c1, c0)
   255  		u9, c1 = bits.Mul64(m, q9)
   256  		t8, c0 = bits.Add64(t9, c1, c0)
   257  		u10, c1 = bits.Mul64(m, q10)
   258  		t9, c0 = bits.Add64(t10, c1, c0)
   259  		u11, c1 = bits.Mul64(m, q11)
   260  
   261  		t10, c0 = bits.Add64(0, c1, c0)
   262  		u11, _ = bits.Add64(u11, 0, c0)
   263  		t0, c0 = bits.Add64(u0, t0, 0)
   264  		t1, c0 = bits.Add64(u1, t1, c0)
   265  		t2, c0 = bits.Add64(u2, t2, c0)
   266  		t3, c0 = bits.Add64(u3, t3, c0)
   267  		t4, c0 = bits.Add64(u4, t4, c0)
   268  		t5, c0 = bits.Add64(u5, t5, c0)
   269  		t6, c0 = bits.Add64(u6, t6, c0)
   270  		t7, c0 = bits.Add64(u7, t7, c0)
   271  		t8, c0 = bits.Add64(u8, t8, c0)
   272  		t9, c0 = bits.Add64(u9, t9, c0)
   273  		t10, c0 = bits.Add64(u10, t10, c0)
   274  		c2, _ = bits.Add64(c2, 0, c0)
   275  		t10, c0 = bits.Add64(t11, t10, 0)
   276  		t11, _ = bits.Add64(u11, c2, c0)
   277  
   278  	}
   279  	{
   280  		var c0, c1, c2 uint64
   281  		v := x[2]
   282  		u0, c1 = bits.Mul64(v, y[0])
   283  		t0, c0 = bits.Add64(c1, t0, 0)
   284  		u1, c1 = bits.Mul64(v, y[1])
   285  		t1, c0 = bits.Add64(c1, t1, c0)
   286  		u2, c1 = bits.Mul64(v, y[2])
   287  		t2, c0 = bits.Add64(c1, t2, c0)
   288  		u3, c1 = bits.Mul64(v, y[3])
   289  		t3, c0 = bits.Add64(c1, t3, c0)
   290  		u4, c1 = bits.Mul64(v, y[4])
   291  		t4, c0 = bits.Add64(c1, t4, c0)
   292  		u5, c1 = bits.Mul64(v, y[5])
   293  		t5, c0 = bits.Add64(c1, t5, c0)
   294  		u6, c1 = bits.Mul64(v, y[6])
   295  		t6, c0 = bits.Add64(c1, t6, c0)
   296  		u7, c1 = bits.Mul64(v, y[7])
   297  		t7, c0 = bits.Add64(c1, t7, c0)
   298  		u8, c1 = bits.Mul64(v, y[8])
   299  		t8, c0 = bits.Add64(c1, t8, c0)
   300  		u9, c1 = bits.Mul64(v, y[9])
   301  		t9, c0 = bits.Add64(c1, t9, c0)
   302  		u10, c1 = bits.Mul64(v, y[10])
   303  		t10, c0 = bits.Add64(c1, t10, c0)
   304  		u11, c1 = bits.Mul64(v, y[11])
   305  		t11, c0 = bits.Add64(c1, t11, c0)
   306  
   307  		c2, _ = bits.Add64(0, 0, c0)
   308  		t1, c0 = bits.Add64(u0, t1, 0)
   309  		t2, c0 = bits.Add64(u1, t2, c0)
   310  		t3, c0 = bits.Add64(u2, t3, c0)
   311  		t4, c0 = bits.Add64(u3, t4, c0)
   312  		t5, c0 = bits.Add64(u4, t5, c0)
   313  		t6, c0 = bits.Add64(u5, t6, c0)
   314  		t7, c0 = bits.Add64(u6, t7, c0)
   315  		t8, c0 = bits.Add64(u7, t8, c0)
   316  		t9, c0 = bits.Add64(u8, t9, c0)
   317  		t10, c0 = bits.Add64(u9, t10, c0)
   318  		t11, c0 = bits.Add64(u10, t11, c0)
   319  		c2, _ = bits.Add64(u11, c2, c0)
   320  
   321  		m := qInvNeg * t0
   322  
   323  		u0, c1 = bits.Mul64(m, q0)
   324  		_, c0 = bits.Add64(t0, c1, 0)
   325  		u1, c1 = bits.Mul64(m, q1)
   326  		t0, c0 = bits.Add64(t1, c1, c0)
   327  		u2, c1 = bits.Mul64(m, q2)
   328  		t1, c0 = bits.Add64(t2, c1, c0)
   329  		u3, c1 = bits.Mul64(m, q3)
   330  		t2, c0 = bits.Add64(t3, c1, c0)
   331  		u4, c1 = bits.Mul64(m, q4)
   332  		t3, c0 = bits.Add64(t4, c1, c0)
   333  		u5, c1 = bits.Mul64(m, q5)
   334  		t4, c0 = bits.Add64(t5, c1, c0)
   335  		u6, c1 = bits.Mul64(m, q6)
   336  		t5, c0 = bits.Add64(t6, c1, c0)
   337  		u7, c1 = bits.Mul64(m, q7)
   338  		t6, c0 = bits.Add64(t7, c1, c0)
   339  		u8, c1 = bits.Mul64(m, q8)
   340  		t7, c0 = bits.Add64(t8, c1, c0)
   341  		u9, c1 = bits.Mul64(m, q9)
   342  		t8, c0 = bits.Add64(t9, c1, c0)
   343  		u10, c1 = bits.Mul64(m, q10)
   344  		t9, c0 = bits.Add64(t10, c1, c0)
   345  		u11, c1 = bits.Mul64(m, q11)
   346  
   347  		t10, c0 = bits.Add64(0, c1, c0)
   348  		u11, _ = bits.Add64(u11, 0, c0)
   349  		t0, c0 = bits.Add64(u0, t0, 0)
   350  		t1, c0 = bits.Add64(u1, t1, c0)
   351  		t2, c0 = bits.Add64(u2, t2, c0)
   352  		t3, c0 = bits.Add64(u3, t3, c0)
   353  		t4, c0 = bits.Add64(u4, t4, c0)
   354  		t5, c0 = bits.Add64(u5, t5, c0)
   355  		t6, c0 = bits.Add64(u6, t6, c0)
   356  		t7, c0 = bits.Add64(u7, t7, c0)
   357  		t8, c0 = bits.Add64(u8, t8, c0)
   358  		t9, c0 = bits.Add64(u9, t9, c0)
   359  		t10, c0 = bits.Add64(u10, t10, c0)
   360  		c2, _ = bits.Add64(c2, 0, c0)
   361  		t10, c0 = bits.Add64(t11, t10, 0)
   362  		t11, _ = bits.Add64(u11, c2, c0)
   363  
   364  	}
   365  	{
   366  		var c0, c1, c2 uint64
   367  		v := x[3]
   368  		u0, c1 = bits.Mul64(v, y[0])
   369  		t0, c0 = bits.Add64(c1, t0, 0)
   370  		u1, c1 = bits.Mul64(v, y[1])
   371  		t1, c0 = bits.Add64(c1, t1, c0)
   372  		u2, c1 = bits.Mul64(v, y[2])
   373  		t2, c0 = bits.Add64(c1, t2, c0)
   374  		u3, c1 = bits.Mul64(v, y[3])
   375  		t3, c0 = bits.Add64(c1, t3, c0)
   376  		u4, c1 = bits.Mul64(v, y[4])
   377  		t4, c0 = bits.Add64(c1, t4, c0)
   378  		u5, c1 = bits.Mul64(v, y[5])
   379  		t5, c0 = bits.Add64(c1, t5, c0)
   380  		u6, c1 = bits.Mul64(v, y[6])
   381  		t6, c0 = bits.Add64(c1, t6, c0)
   382  		u7, c1 = bits.Mul64(v, y[7])
   383  		t7, c0 = bits.Add64(c1, t7, c0)
   384  		u8, c1 = bits.Mul64(v, y[8])
   385  		t8, c0 = bits.Add64(c1, t8, c0)
   386  		u9, c1 = bits.Mul64(v, y[9])
   387  		t9, c0 = bits.Add64(c1, t9, c0)
   388  		u10, c1 = bits.Mul64(v, y[10])
   389  		t10, c0 = bits.Add64(c1, t10, c0)
   390  		u11, c1 = bits.Mul64(v, y[11])
   391  		t11, c0 = bits.Add64(c1, t11, c0)
   392  
   393  		c2, _ = bits.Add64(0, 0, c0)
   394  		t1, c0 = bits.Add64(u0, t1, 0)
   395  		t2, c0 = bits.Add64(u1, t2, c0)
   396  		t3, c0 = bits.Add64(u2, t3, c0)
   397  		t4, c0 = bits.Add64(u3, t4, c0)
   398  		t5, c0 = bits.Add64(u4, t5, c0)
   399  		t6, c0 = bits.Add64(u5, t6, c0)
   400  		t7, c0 = bits.Add64(u6, t7, c0)
   401  		t8, c0 = bits.Add64(u7, t8, c0)
   402  		t9, c0 = bits.Add64(u8, t9, c0)
   403  		t10, c0 = bits.Add64(u9, t10, c0)
   404  		t11, c0 = bits.Add64(u10, t11, c0)
   405  		c2, _ = bits.Add64(u11, c2, c0)
   406  
   407  		m := qInvNeg * t0
   408  
   409  		u0, c1 = bits.Mul64(m, q0)
   410  		_, c0 = bits.Add64(t0, c1, 0)
   411  		u1, c1 = bits.Mul64(m, q1)
   412  		t0, c0 = bits.Add64(t1, c1, c0)
   413  		u2, c1 = bits.Mul64(m, q2)
   414  		t1, c0 = bits.Add64(t2, c1, c0)
   415  		u3, c1 = bits.Mul64(m, q3)
   416  		t2, c0 = bits.Add64(t3, c1, c0)
   417  		u4, c1 = bits.Mul64(m, q4)
   418  		t3, c0 = bits.Add64(t4, c1, c0)
   419  		u5, c1 = bits.Mul64(m, q5)
   420  		t4, c0 = bits.Add64(t5, c1, c0)
   421  		u6, c1 = bits.Mul64(m, q6)
   422  		t5, c0 = bits.Add64(t6, c1, c0)
   423  		u7, c1 = bits.Mul64(m, q7)
   424  		t6, c0 = bits.Add64(t7, c1, c0)
   425  		u8, c1 = bits.Mul64(m, q8)
   426  		t7, c0 = bits.Add64(t8, c1, c0)
   427  		u9, c1 = bits.Mul64(m, q9)
   428  		t8, c0 = bits.Add64(t9, c1, c0)
   429  		u10, c1 = bits.Mul64(m, q10)
   430  		t9, c0 = bits.Add64(t10, c1, c0)
   431  		u11, c1 = bits.Mul64(m, q11)
   432  
   433  		t10, c0 = bits.Add64(0, c1, c0)
   434  		u11, _ = bits.Add64(u11, 0, c0)
   435  		t0, c0 = bits.Add64(u0, t0, 0)
   436  		t1, c0 = bits.Add64(u1, t1, c0)
   437  		t2, c0 = bits.Add64(u2, t2, c0)
   438  		t3, c0 = bits.Add64(u3, t3, c0)
   439  		t4, c0 = bits.Add64(u4, t4, c0)
   440  		t5, c0 = bits.Add64(u5, t5, c0)
   441  		t6, c0 = bits.Add64(u6, t6, c0)
   442  		t7, c0 = bits.Add64(u7, t7, c0)
   443  		t8, c0 = bits.Add64(u8, t8, c0)
   444  		t9, c0 = bits.Add64(u9, t9, c0)
   445  		t10, c0 = bits.Add64(u10, t10, c0)
   446  		c2, _ = bits.Add64(c2, 0, c0)
   447  		t10, c0 = bits.Add64(t11, t10, 0)
   448  		t11, _ = bits.Add64(u11, c2, c0)
   449  
   450  	}
   451  	{
   452  		var c0, c1, c2 uint64
   453  		v := x[4]
   454  		u0, c1 = bits.Mul64(v, y[0])
   455  		t0, c0 = bits.Add64(c1, t0, 0)
   456  		u1, c1 = bits.Mul64(v, y[1])
   457  		t1, c0 = bits.Add64(c1, t1, c0)
   458  		u2, c1 = bits.Mul64(v, y[2])
   459  		t2, c0 = bits.Add64(c1, t2, c0)
   460  		u3, c1 = bits.Mul64(v, y[3])
   461  		t3, c0 = bits.Add64(c1, t3, c0)
   462  		u4, c1 = bits.Mul64(v, y[4])
   463  		t4, c0 = bits.Add64(c1, t4, c0)
   464  		u5, c1 = bits.Mul64(v, y[5])
   465  		t5, c0 = bits.Add64(c1, t5, c0)
   466  		u6, c1 = bits.Mul64(v, y[6])
   467  		t6, c0 = bits.Add64(c1, t6, c0)
   468  		u7, c1 = bits.Mul64(v, y[7])
   469  		t7, c0 = bits.Add64(c1, t7, c0)
   470  		u8, c1 = bits.Mul64(v, y[8])
   471  		t8, c0 = bits.Add64(c1, t8, c0)
   472  		u9, c1 = bits.Mul64(v, y[9])
   473  		t9, c0 = bits.Add64(c1, t9, c0)
   474  		u10, c1 = bits.Mul64(v, y[10])
   475  		t10, c0 = bits.Add64(c1, t10, c0)
   476  		u11, c1 = bits.Mul64(v, y[11])
   477  		t11, c0 = bits.Add64(c1, t11, c0)
   478  
   479  		c2, _ = bits.Add64(0, 0, c0)
   480  		t1, c0 = bits.Add64(u0, t1, 0)
   481  		t2, c0 = bits.Add64(u1, t2, c0)
   482  		t3, c0 = bits.Add64(u2, t3, c0)
   483  		t4, c0 = bits.Add64(u3, t4, c0)
   484  		t5, c0 = bits.Add64(u4, t5, c0)
   485  		t6, c0 = bits.Add64(u5, t6, c0)
   486  		t7, c0 = bits.Add64(u6, t7, c0)
   487  		t8, c0 = bits.Add64(u7, t8, c0)
   488  		t9, c0 = bits.Add64(u8, t9, c0)
   489  		t10, c0 = bits.Add64(u9, t10, c0)
   490  		t11, c0 = bits.Add64(u10, t11, c0)
   491  		c2, _ = bits.Add64(u11, c2, c0)
   492  
   493  		m := qInvNeg * t0
   494  
   495  		u0, c1 = bits.Mul64(m, q0)
   496  		_, c0 = bits.Add64(t0, c1, 0)
   497  		u1, c1 = bits.Mul64(m, q1)
   498  		t0, c0 = bits.Add64(t1, c1, c0)
   499  		u2, c1 = bits.Mul64(m, q2)
   500  		t1, c0 = bits.Add64(t2, c1, c0)
   501  		u3, c1 = bits.Mul64(m, q3)
   502  		t2, c0 = bits.Add64(t3, c1, c0)
   503  		u4, c1 = bits.Mul64(m, q4)
   504  		t3, c0 = bits.Add64(t4, c1, c0)
   505  		u5, c1 = bits.Mul64(m, q5)
   506  		t4, c0 = bits.Add64(t5, c1, c0)
   507  		u6, c1 = bits.Mul64(m, q6)
   508  		t5, c0 = bits.Add64(t6, c1, c0)
   509  		u7, c1 = bits.Mul64(m, q7)
   510  		t6, c0 = bits.Add64(t7, c1, c0)
   511  		u8, c1 = bits.Mul64(m, q8)
   512  		t7, c0 = bits.Add64(t8, c1, c0)
   513  		u9, c1 = bits.Mul64(m, q9)
   514  		t8, c0 = bits.Add64(t9, c1, c0)
   515  		u10, c1 = bits.Mul64(m, q10)
   516  		t9, c0 = bits.Add64(t10, c1, c0)
   517  		u11, c1 = bits.Mul64(m, q11)
   518  
   519  		t10, c0 = bits.Add64(0, c1, c0)
   520  		u11, _ = bits.Add64(u11, 0, c0)
   521  		t0, c0 = bits.Add64(u0, t0, 0)
   522  		t1, c0 = bits.Add64(u1, t1, c0)
   523  		t2, c0 = bits.Add64(u2, t2, c0)
   524  		t3, c0 = bits.Add64(u3, t3, c0)
   525  		t4, c0 = bits.Add64(u4, t4, c0)
   526  		t5, c0 = bits.Add64(u5, t5, c0)
   527  		t6, c0 = bits.Add64(u6, t6, c0)
   528  		t7, c0 = bits.Add64(u7, t7, c0)
   529  		t8, c0 = bits.Add64(u8, t8, c0)
   530  		t9, c0 = bits.Add64(u9, t9, c0)
   531  		t10, c0 = bits.Add64(u10, t10, c0)
   532  		c2, _ = bits.Add64(c2, 0, c0)
   533  		t10, c0 = bits.Add64(t11, t10, 0)
   534  		t11, _ = bits.Add64(u11, c2, c0)
   535  
   536  	}
   537  	{
   538  		var c0, c1, c2 uint64
   539  		v := x[5]
   540  		u0, c1 = bits.Mul64(v, y[0])
   541  		t0, c0 = bits.Add64(c1, t0, 0)
   542  		u1, c1 = bits.Mul64(v, y[1])
   543  		t1, c0 = bits.Add64(c1, t1, c0)
   544  		u2, c1 = bits.Mul64(v, y[2])
   545  		t2, c0 = bits.Add64(c1, t2, c0)
   546  		u3, c1 = bits.Mul64(v, y[3])
   547  		t3, c0 = bits.Add64(c1, t3, c0)
   548  		u4, c1 = bits.Mul64(v, y[4])
   549  		t4, c0 = bits.Add64(c1, t4, c0)
   550  		u5, c1 = bits.Mul64(v, y[5])
   551  		t5, c0 = bits.Add64(c1, t5, c0)
   552  		u6, c1 = bits.Mul64(v, y[6])
   553  		t6, c0 = bits.Add64(c1, t6, c0)
   554  		u7, c1 = bits.Mul64(v, y[7])
   555  		t7, c0 = bits.Add64(c1, t7, c0)
   556  		u8, c1 = bits.Mul64(v, y[8])
   557  		t8, c0 = bits.Add64(c1, t8, c0)
   558  		u9, c1 = bits.Mul64(v, y[9])
   559  		t9, c0 = bits.Add64(c1, t9, c0)
   560  		u10, c1 = bits.Mul64(v, y[10])
   561  		t10, c0 = bits.Add64(c1, t10, c0)
   562  		u11, c1 = bits.Mul64(v, y[11])
   563  		t11, c0 = bits.Add64(c1, t11, c0)
   564  
   565  		c2, _ = bits.Add64(0, 0, c0)
   566  		t1, c0 = bits.Add64(u0, t1, 0)
   567  		t2, c0 = bits.Add64(u1, t2, c0)
   568  		t3, c0 = bits.Add64(u2, t3, c0)
   569  		t4, c0 = bits.Add64(u3, t4, c0)
   570  		t5, c0 = bits.Add64(u4, t5, c0)
   571  		t6, c0 = bits.Add64(u5, t6, c0)
   572  		t7, c0 = bits.Add64(u6, t7, c0)
   573  		t8, c0 = bits.Add64(u7, t8, c0)
   574  		t9, c0 = bits.Add64(u8, t9, c0)
   575  		t10, c0 = bits.Add64(u9, t10, c0)
   576  		t11, c0 = bits.Add64(u10, t11, c0)
   577  		c2, _ = bits.Add64(u11, c2, c0)
   578  
   579  		m := qInvNeg * t0
   580  
   581  		u0, c1 = bits.Mul64(m, q0)
   582  		_, c0 = bits.Add64(t0, c1, 0)
   583  		u1, c1 = bits.Mul64(m, q1)
   584  		t0, c0 = bits.Add64(t1, c1, c0)
   585  		u2, c1 = bits.Mul64(m, q2)
   586  		t1, c0 = bits.Add64(t2, c1, c0)
   587  		u3, c1 = bits.Mul64(m, q3)
   588  		t2, c0 = bits.Add64(t3, c1, c0)
   589  		u4, c1 = bits.Mul64(m, q4)
   590  		t3, c0 = bits.Add64(t4, c1, c0)
   591  		u5, c1 = bits.Mul64(m, q5)
   592  		t4, c0 = bits.Add64(t5, c1, c0)
   593  		u6, c1 = bits.Mul64(m, q6)
   594  		t5, c0 = bits.Add64(t6, c1, c0)
   595  		u7, c1 = bits.Mul64(m, q7)
   596  		t6, c0 = bits.Add64(t7, c1, c0)
   597  		u8, c1 = bits.Mul64(m, q8)
   598  		t7, c0 = bits.Add64(t8, c1, c0)
   599  		u9, c1 = bits.Mul64(m, q9)
   600  		t8, c0 = bits.Add64(t9, c1, c0)
   601  		u10, c1 = bits.Mul64(m, q10)
   602  		t9, c0 = bits.Add64(t10, c1, c0)
   603  		u11, c1 = bits.Mul64(m, q11)
   604  
   605  		t10, c0 = bits.Add64(0, c1, c0)
   606  		u11, _ = bits.Add64(u11, 0, c0)
   607  		t0, c0 = bits.Add64(u0, t0, 0)
   608  		t1, c0 = bits.Add64(u1, t1, c0)
   609  		t2, c0 = bits.Add64(u2, t2, c0)
   610  		t3, c0 = bits.Add64(u3, t3, c0)
   611  		t4, c0 = bits.Add64(u4, t4, c0)
   612  		t5, c0 = bits.Add64(u5, t5, c0)
   613  		t6, c0 = bits.Add64(u6, t6, c0)
   614  		t7, c0 = bits.Add64(u7, t7, c0)
   615  		t8, c0 = bits.Add64(u8, t8, c0)
   616  		t9, c0 = bits.Add64(u9, t9, c0)
   617  		t10, c0 = bits.Add64(u10, t10, c0)
   618  		c2, _ = bits.Add64(c2, 0, c0)
   619  		t10, c0 = bits.Add64(t11, t10, 0)
   620  		t11, _ = bits.Add64(u11, c2, c0)
   621  
   622  	}
   623  	{
   624  		var c0, c1, c2 uint64
   625  		v := x[6]
   626  		u0, c1 = bits.Mul64(v, y[0])
   627  		t0, c0 = bits.Add64(c1, t0, 0)
   628  		u1, c1 = bits.Mul64(v, y[1])
   629  		t1, c0 = bits.Add64(c1, t1, c0)
   630  		u2, c1 = bits.Mul64(v, y[2])
   631  		t2, c0 = bits.Add64(c1, t2, c0)
   632  		u3, c1 = bits.Mul64(v, y[3])
   633  		t3, c0 = bits.Add64(c1, t3, c0)
   634  		u4, c1 = bits.Mul64(v, y[4])
   635  		t4, c0 = bits.Add64(c1, t4, c0)
   636  		u5, c1 = bits.Mul64(v, y[5])
   637  		t5, c0 = bits.Add64(c1, t5, c0)
   638  		u6, c1 = bits.Mul64(v, y[6])
   639  		t6, c0 = bits.Add64(c1, t6, c0)
   640  		u7, c1 = bits.Mul64(v, y[7])
   641  		t7, c0 = bits.Add64(c1, t7, c0)
   642  		u8, c1 = bits.Mul64(v, y[8])
   643  		t8, c0 = bits.Add64(c1, t8, c0)
   644  		u9, c1 = bits.Mul64(v, y[9])
   645  		t9, c0 = bits.Add64(c1, t9, c0)
   646  		u10, c1 = bits.Mul64(v, y[10])
   647  		t10, c0 = bits.Add64(c1, t10, c0)
   648  		u11, c1 = bits.Mul64(v, y[11])
   649  		t11, c0 = bits.Add64(c1, t11, c0)
   650  
   651  		c2, _ = bits.Add64(0, 0, c0)
   652  		t1, c0 = bits.Add64(u0, t1, 0)
   653  		t2, c0 = bits.Add64(u1, t2, c0)
   654  		t3, c0 = bits.Add64(u2, t3, c0)
   655  		t4, c0 = bits.Add64(u3, t4, c0)
   656  		t5, c0 = bits.Add64(u4, t5, c0)
   657  		t6, c0 = bits.Add64(u5, t6, c0)
   658  		t7, c0 = bits.Add64(u6, t7, c0)
   659  		t8, c0 = bits.Add64(u7, t8, c0)
   660  		t9, c0 = bits.Add64(u8, t9, c0)
   661  		t10, c0 = bits.Add64(u9, t10, c0)
   662  		t11, c0 = bits.Add64(u10, t11, c0)
   663  		c2, _ = bits.Add64(u11, c2, c0)
   664  
   665  		m := qInvNeg * t0
   666  
   667  		u0, c1 = bits.Mul64(m, q0)
   668  		_, c0 = bits.Add64(t0, c1, 0)
   669  		u1, c1 = bits.Mul64(m, q1)
   670  		t0, c0 = bits.Add64(t1, c1, c0)
   671  		u2, c1 = bits.Mul64(m, q2)
   672  		t1, c0 = bits.Add64(t2, c1, c0)
   673  		u3, c1 = bits.Mul64(m, q3)
   674  		t2, c0 = bits.Add64(t3, c1, c0)
   675  		u4, c1 = bits.Mul64(m, q4)
   676  		t3, c0 = bits.Add64(t4, c1, c0)
   677  		u5, c1 = bits.Mul64(m, q5)
   678  		t4, c0 = bits.Add64(t5, c1, c0)
   679  		u6, c1 = bits.Mul64(m, q6)
   680  		t5, c0 = bits.Add64(t6, c1, c0)
   681  		u7, c1 = bits.Mul64(m, q7)
   682  		t6, c0 = bits.Add64(t7, c1, c0)
   683  		u8, c1 = bits.Mul64(m, q8)
   684  		t7, c0 = bits.Add64(t8, c1, c0)
   685  		u9, c1 = bits.Mul64(m, q9)
   686  		t8, c0 = bits.Add64(t9, c1, c0)
   687  		u10, c1 = bits.Mul64(m, q10)
   688  		t9, c0 = bits.Add64(t10, c1, c0)
   689  		u11, c1 = bits.Mul64(m, q11)
   690  
   691  		t10, c0 = bits.Add64(0, c1, c0)
   692  		u11, _ = bits.Add64(u11, 0, c0)
   693  		t0, c0 = bits.Add64(u0, t0, 0)
   694  		t1, c0 = bits.Add64(u1, t1, c0)
   695  		t2, c0 = bits.Add64(u2, t2, c0)
   696  		t3, c0 = bits.Add64(u3, t3, c0)
   697  		t4, c0 = bits.Add64(u4, t4, c0)
   698  		t5, c0 = bits.Add64(u5, t5, c0)
   699  		t6, c0 = bits.Add64(u6, t6, c0)
   700  		t7, c0 = bits.Add64(u7, t7, c0)
   701  		t8, c0 = bits.Add64(u8, t8, c0)
   702  		t9, c0 = bits.Add64(u9, t9, c0)
   703  		t10, c0 = bits.Add64(u10, t10, c0)
   704  		c2, _ = bits.Add64(c2, 0, c0)
   705  		t10, c0 = bits.Add64(t11, t10, 0)
   706  		t11, _ = bits.Add64(u11, c2, c0)
   707  
   708  	}
   709  	{
   710  		var c0, c1, c2 uint64
   711  		v := x[7]
   712  		u0, c1 = bits.Mul64(v, y[0])
   713  		t0, c0 = bits.Add64(c1, t0, 0)
   714  		u1, c1 = bits.Mul64(v, y[1])
   715  		t1, c0 = bits.Add64(c1, t1, c0)
   716  		u2, c1 = bits.Mul64(v, y[2])
   717  		t2, c0 = bits.Add64(c1, t2, c0)
   718  		u3, c1 = bits.Mul64(v, y[3])
   719  		t3, c0 = bits.Add64(c1, t3, c0)
   720  		u4, c1 = bits.Mul64(v, y[4])
   721  		t4, c0 = bits.Add64(c1, t4, c0)
   722  		u5, c1 = bits.Mul64(v, y[5])
   723  		t5, c0 = bits.Add64(c1, t5, c0)
   724  		u6, c1 = bits.Mul64(v, y[6])
   725  		t6, c0 = bits.Add64(c1, t6, c0)
   726  		u7, c1 = bits.Mul64(v, y[7])
   727  		t7, c0 = bits.Add64(c1, t7, c0)
   728  		u8, c1 = bits.Mul64(v, y[8])
   729  		t8, c0 = bits.Add64(c1, t8, c0)
   730  		u9, c1 = bits.Mul64(v, y[9])
   731  		t9, c0 = bits.Add64(c1, t9, c0)
   732  		u10, c1 = bits.Mul64(v, y[10])
   733  		t10, c0 = bits.Add64(c1, t10, c0)
   734  		u11, c1 = bits.Mul64(v, y[11])
   735  		t11, c0 = bits.Add64(c1, t11, c0)
   736  
   737  		c2, _ = bits.Add64(0, 0, c0)
   738  		t1, c0 = bits.Add64(u0, t1, 0)
   739  		t2, c0 = bits.Add64(u1, t2, c0)
   740  		t3, c0 = bits.Add64(u2, t3, c0)
   741  		t4, c0 = bits.Add64(u3, t4, c0)
   742  		t5, c0 = bits.Add64(u4, t5, c0)
   743  		t6, c0 = bits.Add64(u5, t6, c0)
   744  		t7, c0 = bits.Add64(u6, t7, c0)
   745  		t8, c0 = bits.Add64(u7, t8, c0)
   746  		t9, c0 = bits.Add64(u8, t9, c0)
   747  		t10, c0 = bits.Add64(u9, t10, c0)
   748  		t11, c0 = bits.Add64(u10, t11, c0)
   749  		c2, _ = bits.Add64(u11, c2, c0)
   750  
   751  		m := qInvNeg * t0
   752  
   753  		u0, c1 = bits.Mul64(m, q0)
   754  		_, c0 = bits.Add64(t0, c1, 0)
   755  		u1, c1 = bits.Mul64(m, q1)
   756  		t0, c0 = bits.Add64(t1, c1, c0)
   757  		u2, c1 = bits.Mul64(m, q2)
   758  		t1, c0 = bits.Add64(t2, c1, c0)
   759  		u3, c1 = bits.Mul64(m, q3)
   760  		t2, c0 = bits.Add64(t3, c1, c0)
   761  		u4, c1 = bits.Mul64(m, q4)
   762  		t3, c0 = bits.Add64(t4, c1, c0)
   763  		u5, c1 = bits.Mul64(m, q5)
   764  		t4, c0 = bits.Add64(t5, c1, c0)
   765  		u6, c1 = bits.Mul64(m, q6)
   766  		t5, c0 = bits.Add64(t6, c1, c0)
   767  		u7, c1 = bits.Mul64(m, q7)
   768  		t6, c0 = bits.Add64(t7, c1, c0)
   769  		u8, c1 = bits.Mul64(m, q8)
   770  		t7, c0 = bits.Add64(t8, c1, c0)
   771  		u9, c1 = bits.Mul64(m, q9)
   772  		t8, c0 = bits.Add64(t9, c1, c0)
   773  		u10, c1 = bits.Mul64(m, q10)
   774  		t9, c0 = bits.Add64(t10, c1, c0)
   775  		u11, c1 = bits.Mul64(m, q11)
   776  
   777  		t10, c0 = bits.Add64(0, c1, c0)
   778  		u11, _ = bits.Add64(u11, 0, c0)
   779  		t0, c0 = bits.Add64(u0, t0, 0)
   780  		t1, c0 = bits.Add64(u1, t1, c0)
   781  		t2, c0 = bits.Add64(u2, t2, c0)
   782  		t3, c0 = bits.Add64(u3, t3, c0)
   783  		t4, c0 = bits.Add64(u4, t4, c0)
   784  		t5, c0 = bits.Add64(u5, t5, c0)
   785  		t6, c0 = bits.Add64(u6, t6, c0)
   786  		t7, c0 = bits.Add64(u7, t7, c0)
   787  		t8, c0 = bits.Add64(u8, t8, c0)
   788  		t9, c0 = bits.Add64(u9, t9, c0)
   789  		t10, c0 = bits.Add64(u10, t10, c0)
   790  		c2, _ = bits.Add64(c2, 0, c0)
   791  		t10, c0 = bits.Add64(t11, t10, 0)
   792  		t11, _ = bits.Add64(u11, c2, c0)
   793  
   794  	}
   795  	{
   796  		var c0, c1, c2 uint64
   797  		v := x[8]
   798  		u0, c1 = bits.Mul64(v, y[0])
   799  		t0, c0 = bits.Add64(c1, t0, 0)
   800  		u1, c1 = bits.Mul64(v, y[1])
   801  		t1, c0 = bits.Add64(c1, t1, c0)
   802  		u2, c1 = bits.Mul64(v, y[2])
   803  		t2, c0 = bits.Add64(c1, t2, c0)
   804  		u3, c1 = bits.Mul64(v, y[3])
   805  		t3, c0 = bits.Add64(c1, t3, c0)
   806  		u4, c1 = bits.Mul64(v, y[4])
   807  		t4, c0 = bits.Add64(c1, t4, c0)
   808  		u5, c1 = bits.Mul64(v, y[5])
   809  		t5, c0 = bits.Add64(c1, t5, c0)
   810  		u6, c1 = bits.Mul64(v, y[6])
   811  		t6, c0 = bits.Add64(c1, t6, c0)
   812  		u7, c1 = bits.Mul64(v, y[7])
   813  		t7, c0 = bits.Add64(c1, t7, c0)
   814  		u8, c1 = bits.Mul64(v, y[8])
   815  		t8, c0 = bits.Add64(c1, t8, c0)
   816  		u9, c1 = bits.Mul64(v, y[9])
   817  		t9, c0 = bits.Add64(c1, t9, c0)
   818  		u10, c1 = bits.Mul64(v, y[10])
   819  		t10, c0 = bits.Add64(c1, t10, c0)
   820  		u11, c1 = bits.Mul64(v, y[11])
   821  		t11, c0 = bits.Add64(c1, t11, c0)
   822  
   823  		c2, _ = bits.Add64(0, 0, c0)
   824  		t1, c0 = bits.Add64(u0, t1, 0)
   825  		t2, c0 = bits.Add64(u1, t2, c0)
   826  		t3, c0 = bits.Add64(u2, t3, c0)
   827  		t4, c0 = bits.Add64(u3, t4, c0)
   828  		t5, c0 = bits.Add64(u4, t5, c0)
   829  		t6, c0 = bits.Add64(u5, t6, c0)
   830  		t7, c0 = bits.Add64(u6, t7, c0)
   831  		t8, c0 = bits.Add64(u7, t8, c0)
   832  		t9, c0 = bits.Add64(u8, t9, c0)
   833  		t10, c0 = bits.Add64(u9, t10, c0)
   834  		t11, c0 = bits.Add64(u10, t11, c0)
   835  		c2, _ = bits.Add64(u11, c2, c0)
   836  
   837  		m := qInvNeg * t0
   838  
   839  		u0, c1 = bits.Mul64(m, q0)
   840  		_, c0 = bits.Add64(t0, c1, 0)
   841  		u1, c1 = bits.Mul64(m, q1)
   842  		t0, c0 = bits.Add64(t1, c1, c0)
   843  		u2, c1 = bits.Mul64(m, q2)
   844  		t1, c0 = bits.Add64(t2, c1, c0)
   845  		u3, c1 = bits.Mul64(m, q3)
   846  		t2, c0 = bits.Add64(t3, c1, c0)
   847  		u4, c1 = bits.Mul64(m, q4)
   848  		t3, c0 = bits.Add64(t4, c1, c0)
   849  		u5, c1 = bits.Mul64(m, q5)
   850  		t4, c0 = bits.Add64(t5, c1, c0)
   851  		u6, c1 = bits.Mul64(m, q6)
   852  		t5, c0 = bits.Add64(t6, c1, c0)
   853  		u7, c1 = bits.Mul64(m, q7)
   854  		t6, c0 = bits.Add64(t7, c1, c0)
   855  		u8, c1 = bits.Mul64(m, q8)
   856  		t7, c0 = bits.Add64(t8, c1, c0)
   857  		u9, c1 = bits.Mul64(m, q9)
   858  		t8, c0 = bits.Add64(t9, c1, c0)
   859  		u10, c1 = bits.Mul64(m, q10)
   860  		t9, c0 = bits.Add64(t10, c1, c0)
   861  		u11, c1 = bits.Mul64(m, q11)
   862  
   863  		t10, c0 = bits.Add64(0, c1, c0)
   864  		u11, _ = bits.Add64(u11, 0, c0)
   865  		t0, c0 = bits.Add64(u0, t0, 0)
   866  		t1, c0 = bits.Add64(u1, t1, c0)
   867  		t2, c0 = bits.Add64(u2, t2, c0)
   868  		t3, c0 = bits.Add64(u3, t3, c0)
   869  		t4, c0 = bits.Add64(u4, t4, c0)
   870  		t5, c0 = bits.Add64(u5, t5, c0)
   871  		t6, c0 = bits.Add64(u6, t6, c0)
   872  		t7, c0 = bits.Add64(u7, t7, c0)
   873  		t8, c0 = bits.Add64(u8, t8, c0)
   874  		t9, c0 = bits.Add64(u9, t9, c0)
   875  		t10, c0 = bits.Add64(u10, t10, c0)
   876  		c2, _ = bits.Add64(c2, 0, c0)
   877  		t10, c0 = bits.Add64(t11, t10, 0)
   878  		t11, _ = bits.Add64(u11, c2, c0)
   879  
   880  	}
   881  	{
   882  		var c0, c1, c2 uint64
   883  		v := x[9]
   884  		u0, c1 = bits.Mul64(v, y[0])
   885  		t0, c0 = bits.Add64(c1, t0, 0)
   886  		u1, c1 = bits.Mul64(v, y[1])
   887  		t1, c0 = bits.Add64(c1, t1, c0)
   888  		u2, c1 = bits.Mul64(v, y[2])
   889  		t2, c0 = bits.Add64(c1, t2, c0)
   890  		u3, c1 = bits.Mul64(v, y[3])
   891  		t3, c0 = bits.Add64(c1, t3, c0)
   892  		u4, c1 = bits.Mul64(v, y[4])
   893  		t4, c0 = bits.Add64(c1, t4, c0)
   894  		u5, c1 = bits.Mul64(v, y[5])
   895  		t5, c0 = bits.Add64(c1, t5, c0)
   896  		u6, c1 = bits.Mul64(v, y[6])
   897  		t6, c0 = bits.Add64(c1, t6, c0)
   898  		u7, c1 = bits.Mul64(v, y[7])
   899  		t7, c0 = bits.Add64(c1, t7, c0)
   900  		u8, c1 = bits.Mul64(v, y[8])
   901  		t8, c0 = bits.Add64(c1, t8, c0)
   902  		u9, c1 = bits.Mul64(v, y[9])
   903  		t9, c0 = bits.Add64(c1, t9, c0)
   904  		u10, c1 = bits.Mul64(v, y[10])
   905  		t10, c0 = bits.Add64(c1, t10, c0)
   906  		u11, c1 = bits.Mul64(v, y[11])
   907  		t11, c0 = bits.Add64(c1, t11, c0)
   908  
   909  		c2, _ = bits.Add64(0, 0, c0)
   910  		t1, c0 = bits.Add64(u0, t1, 0)
   911  		t2, c0 = bits.Add64(u1, t2, c0)
   912  		t3, c0 = bits.Add64(u2, t3, c0)
   913  		t4, c0 = bits.Add64(u3, t4, c0)
   914  		t5, c0 = bits.Add64(u4, t5, c0)
   915  		t6, c0 = bits.Add64(u5, t6, c0)
   916  		t7, c0 = bits.Add64(u6, t7, c0)
   917  		t8, c0 = bits.Add64(u7, t8, c0)
   918  		t9, c0 = bits.Add64(u8, t9, c0)
   919  		t10, c0 = bits.Add64(u9, t10, c0)
   920  		t11, c0 = bits.Add64(u10, t11, c0)
   921  		c2, _ = bits.Add64(u11, c2, c0)
   922  
   923  		m := qInvNeg * t0
   924  
   925  		u0, c1 = bits.Mul64(m, q0)
   926  		_, c0 = bits.Add64(t0, c1, 0)
   927  		u1, c1 = bits.Mul64(m, q1)
   928  		t0, c0 = bits.Add64(t1, c1, c0)
   929  		u2, c1 = bits.Mul64(m, q2)
   930  		t1, c0 = bits.Add64(t2, c1, c0)
   931  		u3, c1 = bits.Mul64(m, q3)
   932  		t2, c0 = bits.Add64(t3, c1, c0)
   933  		u4, c1 = bits.Mul64(m, q4)
   934  		t3, c0 = bits.Add64(t4, c1, c0)
   935  		u5, c1 = bits.Mul64(m, q5)
   936  		t4, c0 = bits.Add64(t5, c1, c0)
   937  		u6, c1 = bits.Mul64(m, q6)
   938  		t5, c0 = bits.Add64(t6, c1, c0)
   939  		u7, c1 = bits.Mul64(m, q7)
   940  		t6, c0 = bits.Add64(t7, c1, c0)
   941  		u8, c1 = bits.Mul64(m, q8)
   942  		t7, c0 = bits.Add64(t8, c1, c0)
   943  		u9, c1 = bits.Mul64(m, q9)
   944  		t8, c0 = bits.Add64(t9, c1, c0)
   945  		u10, c1 = bits.Mul64(m, q10)
   946  		t9, c0 = bits.Add64(t10, c1, c0)
   947  		u11, c1 = bits.Mul64(m, q11)
   948  
   949  		t10, c0 = bits.Add64(0, c1, c0)
   950  		u11, _ = bits.Add64(u11, 0, c0)
   951  		t0, c0 = bits.Add64(u0, t0, 0)
   952  		t1, c0 = bits.Add64(u1, t1, c0)
   953  		t2, c0 = bits.Add64(u2, t2, c0)
   954  		t3, c0 = bits.Add64(u3, t3, c0)
   955  		t4, c0 = bits.Add64(u4, t4, c0)
   956  		t5, c0 = bits.Add64(u5, t5, c0)
   957  		t6, c0 = bits.Add64(u6, t6, c0)
   958  		t7, c0 = bits.Add64(u7, t7, c0)
   959  		t8, c0 = bits.Add64(u8, t8, c0)
   960  		t9, c0 = bits.Add64(u9, t9, c0)
   961  		t10, c0 = bits.Add64(u10, t10, c0)
   962  		c2, _ = bits.Add64(c2, 0, c0)
   963  		t10, c0 = bits.Add64(t11, t10, 0)
   964  		t11, _ = bits.Add64(u11, c2, c0)
   965  
   966  	}
   967  	{
   968  		var c0, c1, c2 uint64
   969  		v := x[10]
   970  		u0, c1 = bits.Mul64(v, y[0])
   971  		t0, c0 = bits.Add64(c1, t0, 0)
   972  		u1, c1 = bits.Mul64(v, y[1])
   973  		t1, c0 = bits.Add64(c1, t1, c0)
   974  		u2, c1 = bits.Mul64(v, y[2])
   975  		t2, c0 = bits.Add64(c1, t2, c0)
   976  		u3, c1 = bits.Mul64(v, y[3])
   977  		t3, c0 = bits.Add64(c1, t3, c0)
   978  		u4, c1 = bits.Mul64(v, y[4])
   979  		t4, c0 = bits.Add64(c1, t4, c0)
   980  		u5, c1 = bits.Mul64(v, y[5])
   981  		t5, c0 = bits.Add64(c1, t5, c0)
   982  		u6, c1 = bits.Mul64(v, y[6])
   983  		t6, c0 = bits.Add64(c1, t6, c0)
   984  		u7, c1 = bits.Mul64(v, y[7])
   985  		t7, c0 = bits.Add64(c1, t7, c0)
   986  		u8, c1 = bits.Mul64(v, y[8])
   987  		t8, c0 = bits.Add64(c1, t8, c0)
   988  		u9, c1 = bits.Mul64(v, y[9])
   989  		t9, c0 = bits.Add64(c1, t9, c0)
   990  		u10, c1 = bits.Mul64(v, y[10])
   991  		t10, c0 = bits.Add64(c1, t10, c0)
   992  		u11, c1 = bits.Mul64(v, y[11])
   993  		t11, c0 = bits.Add64(c1, t11, c0)
   994  
   995  		c2, _ = bits.Add64(0, 0, c0)
   996  		t1, c0 = bits.Add64(u0, t1, 0)
   997  		t2, c0 = bits.Add64(u1, t2, c0)
   998  		t3, c0 = bits.Add64(u2, t3, c0)
   999  		t4, c0 = bits.Add64(u3, t4, c0)
  1000  		t5, c0 = bits.Add64(u4, t5, c0)
  1001  		t6, c0 = bits.Add64(u5, t6, c0)
  1002  		t7, c0 = bits.Add64(u6, t7, c0)
  1003  		t8, c0 = bits.Add64(u7, t8, c0)
  1004  		t9, c0 = bits.Add64(u8, t9, c0)
  1005  		t10, c0 = bits.Add64(u9, t10, c0)
  1006  		t11, c0 = bits.Add64(u10, t11, c0)
  1007  		c2, _ = bits.Add64(u11, c2, c0)
  1008  
  1009  		m := qInvNeg * t0
  1010  
  1011  		u0, c1 = bits.Mul64(m, q0)
  1012  		_, c0 = bits.Add64(t0, c1, 0)
  1013  		u1, c1 = bits.Mul64(m, q1)
  1014  		t0, c0 = bits.Add64(t1, c1, c0)
  1015  		u2, c1 = bits.Mul64(m, q2)
  1016  		t1, c0 = bits.Add64(t2, c1, c0)
  1017  		u3, c1 = bits.Mul64(m, q3)
  1018  		t2, c0 = bits.Add64(t3, c1, c0)
  1019  		u4, c1 = bits.Mul64(m, q4)
  1020  		t3, c0 = bits.Add64(t4, c1, c0)
  1021  		u5, c1 = bits.Mul64(m, q5)
  1022  		t4, c0 = bits.Add64(t5, c1, c0)
  1023  		u6, c1 = bits.Mul64(m, q6)
  1024  		t5, c0 = bits.Add64(t6, c1, c0)
  1025  		u7, c1 = bits.Mul64(m, q7)
  1026  		t6, c0 = bits.Add64(t7, c1, c0)
  1027  		u8, c1 = bits.Mul64(m, q8)
  1028  		t7, c0 = bits.Add64(t8, c1, c0)
  1029  		u9, c1 = bits.Mul64(m, q9)
  1030  		t8, c0 = bits.Add64(t9, c1, c0)
  1031  		u10, c1 = bits.Mul64(m, q10)
  1032  		t9, c0 = bits.Add64(t10, c1, c0)
  1033  		u11, c1 = bits.Mul64(m, q11)
  1034  
  1035  		t10, c0 = bits.Add64(0, c1, c0)
  1036  		u11, _ = bits.Add64(u11, 0, c0)
  1037  		t0, c0 = bits.Add64(u0, t0, 0)
  1038  		t1, c0 = bits.Add64(u1, t1, c0)
  1039  		t2, c0 = bits.Add64(u2, t2, c0)
  1040  		t3, c0 = bits.Add64(u3, t3, c0)
  1041  		t4, c0 = bits.Add64(u4, t4, c0)
  1042  		t5, c0 = bits.Add64(u5, t5, c0)
  1043  		t6, c0 = bits.Add64(u6, t6, c0)
  1044  		t7, c0 = bits.Add64(u7, t7, c0)
  1045  		t8, c0 = bits.Add64(u8, t8, c0)
  1046  		t9, c0 = bits.Add64(u9, t9, c0)
  1047  		t10, c0 = bits.Add64(u10, t10, c0)
  1048  		c2, _ = bits.Add64(c2, 0, c0)
  1049  		t10, c0 = bits.Add64(t11, t10, 0)
  1050  		t11, _ = bits.Add64(u11, c2, c0)
  1051  
  1052  	}
  1053  	{
  1054  		var c0, c1, c2 uint64
  1055  		v := x[11]
  1056  		u0, c1 = bits.Mul64(v, y[0])
  1057  		t0, c0 = bits.Add64(c1, t0, 0)
  1058  		u1, c1 = bits.Mul64(v, y[1])
  1059  		t1, c0 = bits.Add64(c1, t1, c0)
  1060  		u2, c1 = bits.Mul64(v, y[2])
  1061  		t2, c0 = bits.Add64(c1, t2, c0)
  1062  		u3, c1 = bits.Mul64(v, y[3])
  1063  		t3, c0 = bits.Add64(c1, t3, c0)
  1064  		u4, c1 = bits.Mul64(v, y[4])
  1065  		t4, c0 = bits.Add64(c1, t4, c0)
  1066  		u5, c1 = bits.Mul64(v, y[5])
  1067  		t5, c0 = bits.Add64(c1, t5, c0)
  1068  		u6, c1 = bits.Mul64(v, y[6])
  1069  		t6, c0 = bits.Add64(c1, t6, c0)
  1070  		u7, c1 = bits.Mul64(v, y[7])
  1071  		t7, c0 = bits.Add64(c1, t7, c0)
  1072  		u8, c1 = bits.Mul64(v, y[8])
  1073  		t8, c0 = bits.Add64(c1, t8, c0)
  1074  		u9, c1 = bits.Mul64(v, y[9])
  1075  		t9, c0 = bits.Add64(c1, t9, c0)
  1076  		u10, c1 = bits.Mul64(v, y[10])
  1077  		t10, c0 = bits.Add64(c1, t10, c0)
  1078  		u11, c1 = bits.Mul64(v, y[11])
  1079  		t11, c0 = bits.Add64(c1, t11, c0)
  1080  
  1081  		c2, _ = bits.Add64(0, 0, c0)
  1082  		t1, c0 = bits.Add64(u0, t1, 0)
  1083  		t2, c0 = bits.Add64(u1, t2, c0)
  1084  		t3, c0 = bits.Add64(u2, t3, c0)
  1085  		t4, c0 = bits.Add64(u3, t4, c0)
  1086  		t5, c0 = bits.Add64(u4, t5, c0)
  1087  		t6, c0 = bits.Add64(u5, t6, c0)
  1088  		t7, c0 = bits.Add64(u6, t7, c0)
  1089  		t8, c0 = bits.Add64(u7, t8, c0)
  1090  		t9, c0 = bits.Add64(u8, t9, c0)
  1091  		t10, c0 = bits.Add64(u9, t10, c0)
  1092  		t11, c0 = bits.Add64(u10, t11, c0)
  1093  		c2, _ = bits.Add64(u11, c2, c0)
  1094  
  1095  		m := qInvNeg * t0
  1096  
  1097  		u0, c1 = bits.Mul64(m, q0)
  1098  		_, c0 = bits.Add64(t0, c1, 0)
  1099  		u1, c1 = bits.Mul64(m, q1)
  1100  		t0, c0 = bits.Add64(t1, c1, c0)
  1101  		u2, c1 = bits.Mul64(m, q2)
  1102  		t1, c0 = bits.Add64(t2, c1, c0)
  1103  		u3, c1 = bits.Mul64(m, q3)
  1104  		t2, c0 = bits.Add64(t3, c1, c0)
  1105  		u4, c1 = bits.Mul64(m, q4)
  1106  		t3, c0 = bits.Add64(t4, c1, c0)
  1107  		u5, c1 = bits.Mul64(m, q5)
  1108  		t4, c0 = bits.Add64(t5, c1, c0)
  1109  		u6, c1 = bits.Mul64(m, q6)
  1110  		t5, c0 = bits.Add64(t6, c1, c0)
  1111  		u7, c1 = bits.Mul64(m, q7)
  1112  		t6, c0 = bits.Add64(t7, c1, c0)
  1113  		u8, c1 = bits.Mul64(m, q8)
  1114  		t7, c0 = bits.Add64(t8, c1, c0)
  1115  		u9, c1 = bits.Mul64(m, q9)
  1116  		t8, c0 = bits.Add64(t9, c1, c0)
  1117  		u10, c1 = bits.Mul64(m, q10)
  1118  		t9, c0 = bits.Add64(t10, c1, c0)
  1119  		u11, c1 = bits.Mul64(m, q11)
  1120  
  1121  		t10, c0 = bits.Add64(0, c1, c0)
  1122  		u11, _ = bits.Add64(u11, 0, c0)
  1123  		t0, c0 = bits.Add64(u0, t0, 0)
  1124  		t1, c0 = bits.Add64(u1, t1, c0)
  1125  		t2, c0 = bits.Add64(u2, t2, c0)
  1126  		t3, c0 = bits.Add64(u3, t3, c0)
  1127  		t4, c0 = bits.Add64(u4, t4, c0)
  1128  		t5, c0 = bits.Add64(u5, t5, c0)
  1129  		t6, c0 = bits.Add64(u6, t6, c0)
  1130  		t7, c0 = bits.Add64(u7, t7, c0)
  1131  		t8, c0 = bits.Add64(u8, t8, c0)
  1132  		t9, c0 = bits.Add64(u9, t9, c0)
  1133  		t10, c0 = bits.Add64(u10, t10, c0)
  1134  		c2, _ = bits.Add64(c2, 0, c0)
  1135  		t10, c0 = bits.Add64(t11, t10, 0)
  1136  		t11, _ = bits.Add64(u11, c2, c0)
  1137  
  1138  	}
  1139  	z[0] = t0
  1140  	z[1] = t1
  1141  	z[2] = t2
  1142  	z[3] = t3
  1143  	z[4] = t4
  1144  	z[5] = t5
  1145  	z[6] = t6
  1146  	z[7] = t7
  1147  	z[8] = t8
  1148  	z[9] = t9
  1149  	z[10] = t10
  1150  	z[11] = t11
  1151  
  1152  	// if z ⩾ q → z -= q
  1153  	if !z.smallerThanModulus() {
  1154  		var b uint64
  1155  		z[0], b = bits.Sub64(z[0], q0, 0)
  1156  		z[1], b = bits.Sub64(z[1], q1, b)
  1157  		z[2], b = bits.Sub64(z[2], q2, b)
  1158  		z[3], b = bits.Sub64(z[3], q3, b)
  1159  		z[4], b = bits.Sub64(z[4], q4, b)
  1160  		z[5], b = bits.Sub64(z[5], q5, b)
  1161  		z[6], b = bits.Sub64(z[6], q6, b)
  1162  		z[7], b = bits.Sub64(z[7], q7, b)
  1163  		z[8], b = bits.Sub64(z[8], q8, b)
  1164  		z[9], b = bits.Sub64(z[9], q9, b)
  1165  		z[10], b = bits.Sub64(z[10], q10, b)
  1166  		z[11], _ = bits.Sub64(z[11], q11, b)
  1167  	}
  1168  	return z
  1169  }
  1170  
  1171  // Square z = x * x (mod q)
  1172  //
  1173  // x must be less than q
  1174  func (z *Element) Square(x *Element) *Element {
  1175  	// see Mul for algorithm documentation
  1176  
  1177  	var t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11 uint64
  1178  	var u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11 uint64
  1179  	{
  1180  		var c0, c1, c2 uint64
  1181  		v := x[0]
  1182  		u0, t0 = bits.Mul64(v, x[0])
  1183  		u1, t1 = bits.Mul64(v, x[1])
  1184  		u2, t2 = bits.Mul64(v, x[2])
  1185  		u3, t3 = bits.Mul64(v, x[3])
  1186  		u4, t4 = bits.Mul64(v, x[4])
  1187  		u5, t5 = bits.Mul64(v, x[5])
  1188  		u6, t6 = bits.Mul64(v, x[6])
  1189  		u7, t7 = bits.Mul64(v, x[7])
  1190  		u8, t8 = bits.Mul64(v, x[8])
  1191  		u9, t9 = bits.Mul64(v, x[9])
  1192  		u10, t10 = bits.Mul64(v, x[10])
  1193  		u11, t11 = bits.Mul64(v, x[11])
  1194  		t1, c0 = bits.Add64(u0, t1, 0)
  1195  		t2, c0 = bits.Add64(u1, t2, c0)
  1196  		t3, c0 = bits.Add64(u2, t3, c0)
  1197  		t4, c0 = bits.Add64(u3, t4, c0)
  1198  		t5, c0 = bits.Add64(u4, t5, c0)
  1199  		t6, c0 = bits.Add64(u5, t6, c0)
  1200  		t7, c0 = bits.Add64(u6, t7, c0)
  1201  		t8, c0 = bits.Add64(u7, t8, c0)
  1202  		t9, c0 = bits.Add64(u8, t9, c0)
  1203  		t10, c0 = bits.Add64(u9, t10, c0)
  1204  		t11, c0 = bits.Add64(u10, t11, c0)
  1205  		c2, _ = bits.Add64(u11, 0, c0)
  1206  
  1207  		m := qInvNeg * t0
  1208  
  1209  		u0, c1 = bits.Mul64(m, q0)
  1210  		_, c0 = bits.Add64(t0, c1, 0)
  1211  		u1, c1 = bits.Mul64(m, q1)
  1212  		t0, c0 = bits.Add64(t1, c1, c0)
  1213  		u2, c1 = bits.Mul64(m, q2)
  1214  		t1, c0 = bits.Add64(t2, c1, c0)
  1215  		u3, c1 = bits.Mul64(m, q3)
  1216  		t2, c0 = bits.Add64(t3, c1, c0)
  1217  		u4, c1 = bits.Mul64(m, q4)
  1218  		t3, c0 = bits.Add64(t4, c1, c0)
  1219  		u5, c1 = bits.Mul64(m, q5)
  1220  		t4, c0 = bits.Add64(t5, c1, c0)
  1221  		u6, c1 = bits.Mul64(m, q6)
  1222  		t5, c0 = bits.Add64(t6, c1, c0)
  1223  		u7, c1 = bits.Mul64(m, q7)
  1224  		t6, c0 = bits.Add64(t7, c1, c0)
  1225  		u8, c1 = bits.Mul64(m, q8)
  1226  		t7, c0 = bits.Add64(t8, c1, c0)
  1227  		u9, c1 = bits.Mul64(m, q9)
  1228  		t8, c0 = bits.Add64(t9, c1, c0)
  1229  		u10, c1 = bits.Mul64(m, q10)
  1230  		t9, c0 = bits.Add64(t10, c1, c0)
  1231  		u11, c1 = bits.Mul64(m, q11)
  1232  
  1233  		t10, c0 = bits.Add64(0, c1, c0)
  1234  		u11, _ = bits.Add64(u11, 0, c0)
  1235  		t0, c0 = bits.Add64(u0, t0, 0)
  1236  		t1, c0 = bits.Add64(u1, t1, c0)
  1237  		t2, c0 = bits.Add64(u2, t2, c0)
  1238  		t3, c0 = bits.Add64(u3, t3, c0)
  1239  		t4, c0 = bits.Add64(u4, t4, c0)
  1240  		t5, c0 = bits.Add64(u5, t5, c0)
  1241  		t6, c0 = bits.Add64(u6, t6, c0)
  1242  		t7, c0 = bits.Add64(u7, t7, c0)
  1243  		t8, c0 = bits.Add64(u8, t8, c0)
  1244  		t9, c0 = bits.Add64(u9, t9, c0)
  1245  		t10, c0 = bits.Add64(u10, t10, c0)
  1246  		c2, _ = bits.Add64(c2, 0, c0)
  1247  		t10, c0 = bits.Add64(t11, t10, 0)
  1248  		t11, _ = bits.Add64(u11, c2, c0)
  1249  
  1250  	}
  1251  	{
  1252  		var c0, c1, c2 uint64
  1253  		v := x[1]
  1254  		u0, c1 = bits.Mul64(v, x[0])
  1255  		t0, c0 = bits.Add64(c1, t0, 0)
  1256  		u1, c1 = bits.Mul64(v, x[1])
  1257  		t1, c0 = bits.Add64(c1, t1, c0)
  1258  		u2, c1 = bits.Mul64(v, x[2])
  1259  		t2, c0 = bits.Add64(c1, t2, c0)
  1260  		u3, c1 = bits.Mul64(v, x[3])
  1261  		t3, c0 = bits.Add64(c1, t3, c0)
  1262  		u4, c1 = bits.Mul64(v, x[4])
  1263  		t4, c0 = bits.Add64(c1, t4, c0)
  1264  		u5, c1 = bits.Mul64(v, x[5])
  1265  		t5, c0 = bits.Add64(c1, t5, c0)
  1266  		u6, c1 = bits.Mul64(v, x[6])
  1267  		t6, c0 = bits.Add64(c1, t6, c0)
  1268  		u7, c1 = bits.Mul64(v, x[7])
  1269  		t7, c0 = bits.Add64(c1, t7, c0)
  1270  		u8, c1 = bits.Mul64(v, x[8])
  1271  		t8, c0 = bits.Add64(c1, t8, c0)
  1272  		u9, c1 = bits.Mul64(v, x[9])
  1273  		t9, c0 = bits.Add64(c1, t9, c0)
  1274  		u10, c1 = bits.Mul64(v, x[10])
  1275  		t10, c0 = bits.Add64(c1, t10, c0)
  1276  		u11, c1 = bits.Mul64(v, x[11])
  1277  		t11, c0 = bits.Add64(c1, t11, c0)
  1278  
  1279  		c2, _ = bits.Add64(0, 0, c0)
  1280  		t1, c0 = bits.Add64(u0, t1, 0)
  1281  		t2, c0 = bits.Add64(u1, t2, c0)
  1282  		t3, c0 = bits.Add64(u2, t3, c0)
  1283  		t4, c0 = bits.Add64(u3, t4, c0)
  1284  		t5, c0 = bits.Add64(u4, t5, c0)
  1285  		t6, c0 = bits.Add64(u5, t6, c0)
  1286  		t7, c0 = bits.Add64(u6, t7, c0)
  1287  		t8, c0 = bits.Add64(u7, t8, c0)
  1288  		t9, c0 = bits.Add64(u8, t9, c0)
  1289  		t10, c0 = bits.Add64(u9, t10, c0)
  1290  		t11, c0 = bits.Add64(u10, t11, c0)
  1291  		c2, _ = bits.Add64(u11, c2, c0)
  1292  
  1293  		m := qInvNeg * t0
  1294  
  1295  		u0, c1 = bits.Mul64(m, q0)
  1296  		_, c0 = bits.Add64(t0, c1, 0)
  1297  		u1, c1 = bits.Mul64(m, q1)
  1298  		t0, c0 = bits.Add64(t1, c1, c0)
  1299  		u2, c1 = bits.Mul64(m, q2)
  1300  		t1, c0 = bits.Add64(t2, c1, c0)
  1301  		u3, c1 = bits.Mul64(m, q3)
  1302  		t2, c0 = bits.Add64(t3, c1, c0)
  1303  		u4, c1 = bits.Mul64(m, q4)
  1304  		t3, c0 = bits.Add64(t4, c1, c0)
  1305  		u5, c1 = bits.Mul64(m, q5)
  1306  		t4, c0 = bits.Add64(t5, c1, c0)
  1307  		u6, c1 = bits.Mul64(m, q6)
  1308  		t5, c0 = bits.Add64(t6, c1, c0)
  1309  		u7, c1 = bits.Mul64(m, q7)
  1310  		t6, c0 = bits.Add64(t7, c1, c0)
  1311  		u8, c1 = bits.Mul64(m, q8)
  1312  		t7, c0 = bits.Add64(t8, c1, c0)
  1313  		u9, c1 = bits.Mul64(m, q9)
  1314  		t8, c0 = bits.Add64(t9, c1, c0)
  1315  		u10, c1 = bits.Mul64(m, q10)
  1316  		t9, c0 = bits.Add64(t10, c1, c0)
  1317  		u11, c1 = bits.Mul64(m, q11)
  1318  
  1319  		t10, c0 = bits.Add64(0, c1, c0)
  1320  		u11, _ = bits.Add64(u11, 0, c0)
  1321  		t0, c0 = bits.Add64(u0, t0, 0)
  1322  		t1, c0 = bits.Add64(u1, t1, c0)
  1323  		t2, c0 = bits.Add64(u2, t2, c0)
  1324  		t3, c0 = bits.Add64(u3, t3, c0)
  1325  		t4, c0 = bits.Add64(u4, t4, c0)
  1326  		t5, c0 = bits.Add64(u5, t5, c0)
  1327  		t6, c0 = bits.Add64(u6, t6, c0)
  1328  		t7, c0 = bits.Add64(u7, t7, c0)
  1329  		t8, c0 = bits.Add64(u8, t8, c0)
  1330  		t9, c0 = bits.Add64(u9, t9, c0)
  1331  		t10, c0 = bits.Add64(u10, t10, c0)
  1332  		c2, _ = bits.Add64(c2, 0, c0)
  1333  		t10, c0 = bits.Add64(t11, t10, 0)
  1334  		t11, _ = bits.Add64(u11, c2, c0)
  1335  
  1336  	}
  1337  	{
  1338  		var c0, c1, c2 uint64
  1339  		v := x[2]
  1340  		u0, c1 = bits.Mul64(v, x[0])
  1341  		t0, c0 = bits.Add64(c1, t0, 0)
  1342  		u1, c1 = bits.Mul64(v, x[1])
  1343  		t1, c0 = bits.Add64(c1, t1, c0)
  1344  		u2, c1 = bits.Mul64(v, x[2])
  1345  		t2, c0 = bits.Add64(c1, t2, c0)
  1346  		u3, c1 = bits.Mul64(v, x[3])
  1347  		t3, c0 = bits.Add64(c1, t3, c0)
  1348  		u4, c1 = bits.Mul64(v, x[4])
  1349  		t4, c0 = bits.Add64(c1, t4, c0)
  1350  		u5, c1 = bits.Mul64(v, x[5])
  1351  		t5, c0 = bits.Add64(c1, t5, c0)
  1352  		u6, c1 = bits.Mul64(v, x[6])
  1353  		t6, c0 = bits.Add64(c1, t6, c0)
  1354  		u7, c1 = bits.Mul64(v, x[7])
  1355  		t7, c0 = bits.Add64(c1, t7, c0)
  1356  		u8, c1 = bits.Mul64(v, x[8])
  1357  		t8, c0 = bits.Add64(c1, t8, c0)
  1358  		u9, c1 = bits.Mul64(v, x[9])
  1359  		t9, c0 = bits.Add64(c1, t9, c0)
  1360  		u10, c1 = bits.Mul64(v, x[10])
  1361  		t10, c0 = bits.Add64(c1, t10, c0)
  1362  		u11, c1 = bits.Mul64(v, x[11])
  1363  		t11, c0 = bits.Add64(c1, t11, c0)
  1364  
  1365  		c2, _ = bits.Add64(0, 0, c0)
  1366  		t1, c0 = bits.Add64(u0, t1, 0)
  1367  		t2, c0 = bits.Add64(u1, t2, c0)
  1368  		t3, c0 = bits.Add64(u2, t3, c0)
  1369  		t4, c0 = bits.Add64(u3, t4, c0)
  1370  		t5, c0 = bits.Add64(u4, t5, c0)
  1371  		t6, c0 = bits.Add64(u5, t6, c0)
  1372  		t7, c0 = bits.Add64(u6, t7, c0)
  1373  		t8, c0 = bits.Add64(u7, t8, c0)
  1374  		t9, c0 = bits.Add64(u8, t9, c0)
  1375  		t10, c0 = bits.Add64(u9, t10, c0)
  1376  		t11, c0 = bits.Add64(u10, t11, c0)
  1377  		c2, _ = bits.Add64(u11, c2, c0)
  1378  
  1379  		m := qInvNeg * t0
  1380  
  1381  		u0, c1 = bits.Mul64(m, q0)
  1382  		_, c0 = bits.Add64(t0, c1, 0)
  1383  		u1, c1 = bits.Mul64(m, q1)
  1384  		t0, c0 = bits.Add64(t1, c1, c0)
  1385  		u2, c1 = bits.Mul64(m, q2)
  1386  		t1, c0 = bits.Add64(t2, c1, c0)
  1387  		u3, c1 = bits.Mul64(m, q3)
  1388  		t2, c0 = bits.Add64(t3, c1, c0)
  1389  		u4, c1 = bits.Mul64(m, q4)
  1390  		t3, c0 = bits.Add64(t4, c1, c0)
  1391  		u5, c1 = bits.Mul64(m, q5)
  1392  		t4, c0 = bits.Add64(t5, c1, c0)
  1393  		u6, c1 = bits.Mul64(m, q6)
  1394  		t5, c0 = bits.Add64(t6, c1, c0)
  1395  		u7, c1 = bits.Mul64(m, q7)
  1396  		t6, c0 = bits.Add64(t7, c1, c0)
  1397  		u8, c1 = bits.Mul64(m, q8)
  1398  		t7, c0 = bits.Add64(t8, c1, c0)
  1399  		u9, c1 = bits.Mul64(m, q9)
  1400  		t8, c0 = bits.Add64(t9, c1, c0)
  1401  		u10, c1 = bits.Mul64(m, q10)
  1402  		t9, c0 = bits.Add64(t10, c1, c0)
  1403  		u11, c1 = bits.Mul64(m, q11)
  1404  
  1405  		t10, c0 = bits.Add64(0, c1, c0)
  1406  		u11, _ = bits.Add64(u11, 0, c0)
  1407  		t0, c0 = bits.Add64(u0, t0, 0)
  1408  		t1, c0 = bits.Add64(u1, t1, c0)
  1409  		t2, c0 = bits.Add64(u2, t2, c0)
  1410  		t3, c0 = bits.Add64(u3, t3, c0)
  1411  		t4, c0 = bits.Add64(u4, t4, c0)
  1412  		t5, c0 = bits.Add64(u5, t5, c0)
  1413  		t6, c0 = bits.Add64(u6, t6, c0)
  1414  		t7, c0 = bits.Add64(u7, t7, c0)
  1415  		t8, c0 = bits.Add64(u8, t8, c0)
  1416  		t9, c0 = bits.Add64(u9, t9, c0)
  1417  		t10, c0 = bits.Add64(u10, t10, c0)
  1418  		c2, _ = bits.Add64(c2, 0, c0)
  1419  		t10, c0 = bits.Add64(t11, t10, 0)
  1420  		t11, _ = bits.Add64(u11, c2, c0)
  1421  
  1422  	}
  1423  	{
  1424  		var c0, c1, c2 uint64
  1425  		v := x[3]
  1426  		u0, c1 = bits.Mul64(v, x[0])
  1427  		t0, c0 = bits.Add64(c1, t0, 0)
  1428  		u1, c1 = bits.Mul64(v, x[1])
  1429  		t1, c0 = bits.Add64(c1, t1, c0)
  1430  		u2, c1 = bits.Mul64(v, x[2])
  1431  		t2, c0 = bits.Add64(c1, t2, c0)
  1432  		u3, c1 = bits.Mul64(v, x[3])
  1433  		t3, c0 = bits.Add64(c1, t3, c0)
  1434  		u4, c1 = bits.Mul64(v, x[4])
  1435  		t4, c0 = bits.Add64(c1, t4, c0)
  1436  		u5, c1 = bits.Mul64(v, x[5])
  1437  		t5, c0 = bits.Add64(c1, t5, c0)
  1438  		u6, c1 = bits.Mul64(v, x[6])
  1439  		t6, c0 = bits.Add64(c1, t6, c0)
  1440  		u7, c1 = bits.Mul64(v, x[7])
  1441  		t7, c0 = bits.Add64(c1, t7, c0)
  1442  		u8, c1 = bits.Mul64(v, x[8])
  1443  		t8, c0 = bits.Add64(c1, t8, c0)
  1444  		u9, c1 = bits.Mul64(v, x[9])
  1445  		t9, c0 = bits.Add64(c1, t9, c0)
  1446  		u10, c1 = bits.Mul64(v, x[10])
  1447  		t10, c0 = bits.Add64(c1, t10, c0)
  1448  		u11, c1 = bits.Mul64(v, x[11])
  1449  		t11, c0 = bits.Add64(c1, t11, c0)
  1450  
  1451  		c2, _ = bits.Add64(0, 0, c0)
  1452  		t1, c0 = bits.Add64(u0, t1, 0)
  1453  		t2, c0 = bits.Add64(u1, t2, c0)
  1454  		t3, c0 = bits.Add64(u2, t3, c0)
  1455  		t4, c0 = bits.Add64(u3, t4, c0)
  1456  		t5, c0 = bits.Add64(u4, t5, c0)
  1457  		t6, c0 = bits.Add64(u5, t6, c0)
  1458  		t7, c0 = bits.Add64(u6, t7, c0)
  1459  		t8, c0 = bits.Add64(u7, t8, c0)
  1460  		t9, c0 = bits.Add64(u8, t9, c0)
  1461  		t10, c0 = bits.Add64(u9, t10, c0)
  1462  		t11, c0 = bits.Add64(u10, t11, c0)
  1463  		c2, _ = bits.Add64(u11, c2, c0)
  1464  
  1465  		m := qInvNeg * t0
  1466  
  1467  		u0, c1 = bits.Mul64(m, q0)
  1468  		_, c0 = bits.Add64(t0, c1, 0)
  1469  		u1, c1 = bits.Mul64(m, q1)
  1470  		t0, c0 = bits.Add64(t1, c1, c0)
  1471  		u2, c1 = bits.Mul64(m, q2)
  1472  		t1, c0 = bits.Add64(t2, c1, c0)
  1473  		u3, c1 = bits.Mul64(m, q3)
  1474  		t2, c0 = bits.Add64(t3, c1, c0)
  1475  		u4, c1 = bits.Mul64(m, q4)
  1476  		t3, c0 = bits.Add64(t4, c1, c0)
  1477  		u5, c1 = bits.Mul64(m, q5)
  1478  		t4, c0 = bits.Add64(t5, c1, c0)
  1479  		u6, c1 = bits.Mul64(m, q6)
  1480  		t5, c0 = bits.Add64(t6, c1, c0)
  1481  		u7, c1 = bits.Mul64(m, q7)
  1482  		t6, c0 = bits.Add64(t7, c1, c0)
  1483  		u8, c1 = bits.Mul64(m, q8)
  1484  		t7, c0 = bits.Add64(t8, c1, c0)
  1485  		u9, c1 = bits.Mul64(m, q9)
  1486  		t8, c0 = bits.Add64(t9, c1, c0)
  1487  		u10, c1 = bits.Mul64(m, q10)
  1488  		t9, c0 = bits.Add64(t10, c1, c0)
  1489  		u11, c1 = bits.Mul64(m, q11)
  1490  
  1491  		t10, c0 = bits.Add64(0, c1, c0)
  1492  		u11, _ = bits.Add64(u11, 0, c0)
  1493  		t0, c0 = bits.Add64(u0, t0, 0)
  1494  		t1, c0 = bits.Add64(u1, t1, c0)
  1495  		t2, c0 = bits.Add64(u2, t2, c0)
  1496  		t3, c0 = bits.Add64(u3, t3, c0)
  1497  		t4, c0 = bits.Add64(u4, t4, c0)
  1498  		t5, c0 = bits.Add64(u5, t5, c0)
  1499  		t6, c0 = bits.Add64(u6, t6, c0)
  1500  		t7, c0 = bits.Add64(u7, t7, c0)
  1501  		t8, c0 = bits.Add64(u8, t8, c0)
  1502  		t9, c0 = bits.Add64(u9, t9, c0)
  1503  		t10, c0 = bits.Add64(u10, t10, c0)
  1504  		c2, _ = bits.Add64(c2, 0, c0)
  1505  		t10, c0 = bits.Add64(t11, t10, 0)
  1506  		t11, _ = bits.Add64(u11, c2, c0)
  1507  
  1508  	}
  1509  	{
  1510  		var c0, c1, c2 uint64
  1511  		v := x[4]
  1512  		u0, c1 = bits.Mul64(v, x[0])
  1513  		t0, c0 = bits.Add64(c1, t0, 0)
  1514  		u1, c1 = bits.Mul64(v, x[1])
  1515  		t1, c0 = bits.Add64(c1, t1, c0)
  1516  		u2, c1 = bits.Mul64(v, x[2])
  1517  		t2, c0 = bits.Add64(c1, t2, c0)
  1518  		u3, c1 = bits.Mul64(v, x[3])
  1519  		t3, c0 = bits.Add64(c1, t3, c0)
  1520  		u4, c1 = bits.Mul64(v, x[4])
  1521  		t4, c0 = bits.Add64(c1, t4, c0)
  1522  		u5, c1 = bits.Mul64(v, x[5])
  1523  		t5, c0 = bits.Add64(c1, t5, c0)
  1524  		u6, c1 = bits.Mul64(v, x[6])
  1525  		t6, c0 = bits.Add64(c1, t6, c0)
  1526  		u7, c1 = bits.Mul64(v, x[7])
  1527  		t7, c0 = bits.Add64(c1, t7, c0)
  1528  		u8, c1 = bits.Mul64(v, x[8])
  1529  		t8, c0 = bits.Add64(c1, t8, c0)
  1530  		u9, c1 = bits.Mul64(v, x[9])
  1531  		t9, c0 = bits.Add64(c1, t9, c0)
  1532  		u10, c1 = bits.Mul64(v, x[10])
  1533  		t10, c0 = bits.Add64(c1, t10, c0)
  1534  		u11, c1 = bits.Mul64(v, x[11])
  1535  		t11, c0 = bits.Add64(c1, t11, c0)
  1536  
  1537  		c2, _ = bits.Add64(0, 0, c0)
  1538  		t1, c0 = bits.Add64(u0, t1, 0)
  1539  		t2, c0 = bits.Add64(u1, t2, c0)
  1540  		t3, c0 = bits.Add64(u2, t3, c0)
  1541  		t4, c0 = bits.Add64(u3, t4, c0)
  1542  		t5, c0 = bits.Add64(u4, t5, c0)
  1543  		t6, c0 = bits.Add64(u5, t6, c0)
  1544  		t7, c0 = bits.Add64(u6, t7, c0)
  1545  		t8, c0 = bits.Add64(u7, t8, c0)
  1546  		t9, c0 = bits.Add64(u8, t9, c0)
  1547  		t10, c0 = bits.Add64(u9, t10, c0)
  1548  		t11, c0 = bits.Add64(u10, t11, c0)
  1549  		c2, _ = bits.Add64(u11, c2, c0)
  1550  
  1551  		m := qInvNeg * t0
  1552  
  1553  		u0, c1 = bits.Mul64(m, q0)
  1554  		_, c0 = bits.Add64(t0, c1, 0)
  1555  		u1, c1 = bits.Mul64(m, q1)
  1556  		t0, c0 = bits.Add64(t1, c1, c0)
  1557  		u2, c1 = bits.Mul64(m, q2)
  1558  		t1, c0 = bits.Add64(t2, c1, c0)
  1559  		u3, c1 = bits.Mul64(m, q3)
  1560  		t2, c0 = bits.Add64(t3, c1, c0)
  1561  		u4, c1 = bits.Mul64(m, q4)
  1562  		t3, c0 = bits.Add64(t4, c1, c0)
  1563  		u5, c1 = bits.Mul64(m, q5)
  1564  		t4, c0 = bits.Add64(t5, c1, c0)
  1565  		u6, c1 = bits.Mul64(m, q6)
  1566  		t5, c0 = bits.Add64(t6, c1, c0)
  1567  		u7, c1 = bits.Mul64(m, q7)
  1568  		t6, c0 = bits.Add64(t7, c1, c0)
  1569  		u8, c1 = bits.Mul64(m, q8)
  1570  		t7, c0 = bits.Add64(t8, c1, c0)
  1571  		u9, c1 = bits.Mul64(m, q9)
  1572  		t8, c0 = bits.Add64(t9, c1, c0)
  1573  		u10, c1 = bits.Mul64(m, q10)
  1574  		t9, c0 = bits.Add64(t10, c1, c0)
  1575  		u11, c1 = bits.Mul64(m, q11)
  1576  
  1577  		t10, c0 = bits.Add64(0, c1, c0)
  1578  		u11, _ = bits.Add64(u11, 0, c0)
  1579  		t0, c0 = bits.Add64(u0, t0, 0)
  1580  		t1, c0 = bits.Add64(u1, t1, c0)
  1581  		t2, c0 = bits.Add64(u2, t2, c0)
  1582  		t3, c0 = bits.Add64(u3, t3, c0)
  1583  		t4, c0 = bits.Add64(u4, t4, c0)
  1584  		t5, c0 = bits.Add64(u5, t5, c0)
  1585  		t6, c0 = bits.Add64(u6, t6, c0)
  1586  		t7, c0 = bits.Add64(u7, t7, c0)
  1587  		t8, c0 = bits.Add64(u8, t8, c0)
  1588  		t9, c0 = bits.Add64(u9, t9, c0)
  1589  		t10, c0 = bits.Add64(u10, t10, c0)
  1590  		c2, _ = bits.Add64(c2, 0, c0)
  1591  		t10, c0 = bits.Add64(t11, t10, 0)
  1592  		t11, _ = bits.Add64(u11, c2, c0)
  1593  
  1594  	}
  1595  	{
  1596  		var c0, c1, c2 uint64
  1597  		v := x[5]
  1598  		u0, c1 = bits.Mul64(v, x[0])
  1599  		t0, c0 = bits.Add64(c1, t0, 0)
  1600  		u1, c1 = bits.Mul64(v, x[1])
  1601  		t1, c0 = bits.Add64(c1, t1, c0)
  1602  		u2, c1 = bits.Mul64(v, x[2])
  1603  		t2, c0 = bits.Add64(c1, t2, c0)
  1604  		u3, c1 = bits.Mul64(v, x[3])
  1605  		t3, c0 = bits.Add64(c1, t3, c0)
  1606  		u4, c1 = bits.Mul64(v, x[4])
  1607  		t4, c0 = bits.Add64(c1, t4, c0)
  1608  		u5, c1 = bits.Mul64(v, x[5])
  1609  		t5, c0 = bits.Add64(c1, t5, c0)
  1610  		u6, c1 = bits.Mul64(v, x[6])
  1611  		t6, c0 = bits.Add64(c1, t6, c0)
  1612  		u7, c1 = bits.Mul64(v, x[7])
  1613  		t7, c0 = bits.Add64(c1, t7, c0)
  1614  		u8, c1 = bits.Mul64(v, x[8])
  1615  		t8, c0 = bits.Add64(c1, t8, c0)
  1616  		u9, c1 = bits.Mul64(v, x[9])
  1617  		t9, c0 = bits.Add64(c1, t9, c0)
  1618  		u10, c1 = bits.Mul64(v, x[10])
  1619  		t10, c0 = bits.Add64(c1, t10, c0)
  1620  		u11, c1 = bits.Mul64(v, x[11])
  1621  		t11, c0 = bits.Add64(c1, t11, c0)
  1622  
  1623  		c2, _ = bits.Add64(0, 0, c0)
  1624  		t1, c0 = bits.Add64(u0, t1, 0)
  1625  		t2, c0 = bits.Add64(u1, t2, c0)
  1626  		t3, c0 = bits.Add64(u2, t3, c0)
  1627  		t4, c0 = bits.Add64(u3, t4, c0)
  1628  		t5, c0 = bits.Add64(u4, t5, c0)
  1629  		t6, c0 = bits.Add64(u5, t6, c0)
  1630  		t7, c0 = bits.Add64(u6, t7, c0)
  1631  		t8, c0 = bits.Add64(u7, t8, c0)
  1632  		t9, c0 = bits.Add64(u8, t9, c0)
  1633  		t10, c0 = bits.Add64(u9, t10, c0)
  1634  		t11, c0 = bits.Add64(u10, t11, c0)
  1635  		c2, _ = bits.Add64(u11, c2, c0)
  1636  
  1637  		m := qInvNeg * t0
  1638  
  1639  		u0, c1 = bits.Mul64(m, q0)
  1640  		_, c0 = bits.Add64(t0, c1, 0)
  1641  		u1, c1 = bits.Mul64(m, q1)
  1642  		t0, c0 = bits.Add64(t1, c1, c0)
  1643  		u2, c1 = bits.Mul64(m, q2)
  1644  		t1, c0 = bits.Add64(t2, c1, c0)
  1645  		u3, c1 = bits.Mul64(m, q3)
  1646  		t2, c0 = bits.Add64(t3, c1, c0)
  1647  		u4, c1 = bits.Mul64(m, q4)
  1648  		t3, c0 = bits.Add64(t4, c1, c0)
  1649  		u5, c1 = bits.Mul64(m, q5)
  1650  		t4, c0 = bits.Add64(t5, c1, c0)
  1651  		u6, c1 = bits.Mul64(m, q6)
  1652  		t5, c0 = bits.Add64(t6, c1, c0)
  1653  		u7, c1 = bits.Mul64(m, q7)
  1654  		t6, c0 = bits.Add64(t7, c1, c0)
  1655  		u8, c1 = bits.Mul64(m, q8)
  1656  		t7, c0 = bits.Add64(t8, c1, c0)
  1657  		u9, c1 = bits.Mul64(m, q9)
  1658  		t8, c0 = bits.Add64(t9, c1, c0)
  1659  		u10, c1 = bits.Mul64(m, q10)
  1660  		t9, c0 = bits.Add64(t10, c1, c0)
  1661  		u11, c1 = bits.Mul64(m, q11)
  1662  
  1663  		t10, c0 = bits.Add64(0, c1, c0)
  1664  		u11, _ = bits.Add64(u11, 0, c0)
  1665  		t0, c0 = bits.Add64(u0, t0, 0)
  1666  		t1, c0 = bits.Add64(u1, t1, c0)
  1667  		t2, c0 = bits.Add64(u2, t2, c0)
  1668  		t3, c0 = bits.Add64(u3, t3, c0)
  1669  		t4, c0 = bits.Add64(u4, t4, c0)
  1670  		t5, c0 = bits.Add64(u5, t5, c0)
  1671  		t6, c0 = bits.Add64(u6, t6, c0)
  1672  		t7, c0 = bits.Add64(u7, t7, c0)
  1673  		t8, c0 = bits.Add64(u8, t8, c0)
  1674  		t9, c0 = bits.Add64(u9, t9, c0)
  1675  		t10, c0 = bits.Add64(u10, t10, c0)
  1676  		c2, _ = bits.Add64(c2, 0, c0)
  1677  		t10, c0 = bits.Add64(t11, t10, 0)
  1678  		t11, _ = bits.Add64(u11, c2, c0)
  1679  
  1680  	}
  1681  	{
  1682  		var c0, c1, c2 uint64
  1683  		v := x[6]
  1684  		u0, c1 = bits.Mul64(v, x[0])
  1685  		t0, c0 = bits.Add64(c1, t0, 0)
  1686  		u1, c1 = bits.Mul64(v, x[1])
  1687  		t1, c0 = bits.Add64(c1, t1, c0)
  1688  		u2, c1 = bits.Mul64(v, x[2])
  1689  		t2, c0 = bits.Add64(c1, t2, c0)
  1690  		u3, c1 = bits.Mul64(v, x[3])
  1691  		t3, c0 = bits.Add64(c1, t3, c0)
  1692  		u4, c1 = bits.Mul64(v, x[4])
  1693  		t4, c0 = bits.Add64(c1, t4, c0)
  1694  		u5, c1 = bits.Mul64(v, x[5])
  1695  		t5, c0 = bits.Add64(c1, t5, c0)
  1696  		u6, c1 = bits.Mul64(v, x[6])
  1697  		t6, c0 = bits.Add64(c1, t6, c0)
  1698  		u7, c1 = bits.Mul64(v, x[7])
  1699  		t7, c0 = bits.Add64(c1, t7, c0)
  1700  		u8, c1 = bits.Mul64(v, x[8])
  1701  		t8, c0 = bits.Add64(c1, t8, c0)
  1702  		u9, c1 = bits.Mul64(v, x[9])
  1703  		t9, c0 = bits.Add64(c1, t9, c0)
  1704  		u10, c1 = bits.Mul64(v, x[10])
  1705  		t10, c0 = bits.Add64(c1, t10, c0)
  1706  		u11, c1 = bits.Mul64(v, x[11])
  1707  		t11, c0 = bits.Add64(c1, t11, c0)
  1708  
  1709  		c2, _ = bits.Add64(0, 0, c0)
  1710  		t1, c0 = bits.Add64(u0, t1, 0)
  1711  		t2, c0 = bits.Add64(u1, t2, c0)
  1712  		t3, c0 = bits.Add64(u2, t3, c0)
  1713  		t4, c0 = bits.Add64(u3, t4, c0)
  1714  		t5, c0 = bits.Add64(u4, t5, c0)
  1715  		t6, c0 = bits.Add64(u5, t6, c0)
  1716  		t7, c0 = bits.Add64(u6, t7, c0)
  1717  		t8, c0 = bits.Add64(u7, t8, c0)
  1718  		t9, c0 = bits.Add64(u8, t9, c0)
  1719  		t10, c0 = bits.Add64(u9, t10, c0)
  1720  		t11, c0 = bits.Add64(u10, t11, c0)
  1721  		c2, _ = bits.Add64(u11, c2, c0)
  1722  
  1723  		m := qInvNeg * t0
  1724  
  1725  		u0, c1 = bits.Mul64(m, q0)
  1726  		_, c0 = bits.Add64(t0, c1, 0)
  1727  		u1, c1 = bits.Mul64(m, q1)
  1728  		t0, c0 = bits.Add64(t1, c1, c0)
  1729  		u2, c1 = bits.Mul64(m, q2)
  1730  		t1, c0 = bits.Add64(t2, c1, c0)
  1731  		u3, c1 = bits.Mul64(m, q3)
  1732  		t2, c0 = bits.Add64(t3, c1, c0)
  1733  		u4, c1 = bits.Mul64(m, q4)
  1734  		t3, c0 = bits.Add64(t4, c1, c0)
  1735  		u5, c1 = bits.Mul64(m, q5)
  1736  		t4, c0 = bits.Add64(t5, c1, c0)
  1737  		u6, c1 = bits.Mul64(m, q6)
  1738  		t5, c0 = bits.Add64(t6, c1, c0)
  1739  		u7, c1 = bits.Mul64(m, q7)
  1740  		t6, c0 = bits.Add64(t7, c1, c0)
  1741  		u8, c1 = bits.Mul64(m, q8)
  1742  		t7, c0 = bits.Add64(t8, c1, c0)
  1743  		u9, c1 = bits.Mul64(m, q9)
  1744  		t8, c0 = bits.Add64(t9, c1, c0)
  1745  		u10, c1 = bits.Mul64(m, q10)
  1746  		t9, c0 = bits.Add64(t10, c1, c0)
  1747  		u11, c1 = bits.Mul64(m, q11)
  1748  
  1749  		t10, c0 = bits.Add64(0, c1, c0)
  1750  		u11, _ = bits.Add64(u11, 0, c0)
  1751  		t0, c0 = bits.Add64(u0, t0, 0)
  1752  		t1, c0 = bits.Add64(u1, t1, c0)
  1753  		t2, c0 = bits.Add64(u2, t2, c0)
  1754  		t3, c0 = bits.Add64(u3, t3, c0)
  1755  		t4, c0 = bits.Add64(u4, t4, c0)
  1756  		t5, c0 = bits.Add64(u5, t5, c0)
  1757  		t6, c0 = bits.Add64(u6, t6, c0)
  1758  		t7, c0 = bits.Add64(u7, t7, c0)
  1759  		t8, c0 = bits.Add64(u8, t8, c0)
  1760  		t9, c0 = bits.Add64(u9, t9, c0)
  1761  		t10, c0 = bits.Add64(u10, t10, c0)
  1762  		c2, _ = bits.Add64(c2, 0, c0)
  1763  		t10, c0 = bits.Add64(t11, t10, 0)
  1764  		t11, _ = bits.Add64(u11, c2, c0)
  1765  
  1766  	}
  1767  	{
  1768  		var c0, c1, c2 uint64
  1769  		v := x[7]
  1770  		u0, c1 = bits.Mul64(v, x[0])
  1771  		t0, c0 = bits.Add64(c1, t0, 0)
  1772  		u1, c1 = bits.Mul64(v, x[1])
  1773  		t1, c0 = bits.Add64(c1, t1, c0)
  1774  		u2, c1 = bits.Mul64(v, x[2])
  1775  		t2, c0 = bits.Add64(c1, t2, c0)
  1776  		u3, c1 = bits.Mul64(v, x[3])
  1777  		t3, c0 = bits.Add64(c1, t3, c0)
  1778  		u4, c1 = bits.Mul64(v, x[4])
  1779  		t4, c0 = bits.Add64(c1, t4, c0)
  1780  		u5, c1 = bits.Mul64(v, x[5])
  1781  		t5, c0 = bits.Add64(c1, t5, c0)
  1782  		u6, c1 = bits.Mul64(v, x[6])
  1783  		t6, c0 = bits.Add64(c1, t6, c0)
  1784  		u7, c1 = bits.Mul64(v, x[7])
  1785  		t7, c0 = bits.Add64(c1, t7, c0)
  1786  		u8, c1 = bits.Mul64(v, x[8])
  1787  		t8, c0 = bits.Add64(c1, t8, c0)
  1788  		u9, c1 = bits.Mul64(v, x[9])
  1789  		t9, c0 = bits.Add64(c1, t9, c0)
  1790  		u10, c1 = bits.Mul64(v, x[10])
  1791  		t10, c0 = bits.Add64(c1, t10, c0)
  1792  		u11, c1 = bits.Mul64(v, x[11])
  1793  		t11, c0 = bits.Add64(c1, t11, c0)
  1794  
  1795  		c2, _ = bits.Add64(0, 0, c0)
  1796  		t1, c0 = bits.Add64(u0, t1, 0)
  1797  		t2, c0 = bits.Add64(u1, t2, c0)
  1798  		t3, c0 = bits.Add64(u2, t3, c0)
  1799  		t4, c0 = bits.Add64(u3, t4, c0)
  1800  		t5, c0 = bits.Add64(u4, t5, c0)
  1801  		t6, c0 = bits.Add64(u5, t6, c0)
  1802  		t7, c0 = bits.Add64(u6, t7, c0)
  1803  		t8, c0 = bits.Add64(u7, t8, c0)
  1804  		t9, c0 = bits.Add64(u8, t9, c0)
  1805  		t10, c0 = bits.Add64(u9, t10, c0)
  1806  		t11, c0 = bits.Add64(u10, t11, c0)
  1807  		c2, _ = bits.Add64(u11, c2, c0)
  1808  
  1809  		m := qInvNeg * t0
  1810  
  1811  		u0, c1 = bits.Mul64(m, q0)
  1812  		_, c0 = bits.Add64(t0, c1, 0)
  1813  		u1, c1 = bits.Mul64(m, q1)
  1814  		t0, c0 = bits.Add64(t1, c1, c0)
  1815  		u2, c1 = bits.Mul64(m, q2)
  1816  		t1, c0 = bits.Add64(t2, c1, c0)
  1817  		u3, c1 = bits.Mul64(m, q3)
  1818  		t2, c0 = bits.Add64(t3, c1, c0)
  1819  		u4, c1 = bits.Mul64(m, q4)
  1820  		t3, c0 = bits.Add64(t4, c1, c0)
  1821  		u5, c1 = bits.Mul64(m, q5)
  1822  		t4, c0 = bits.Add64(t5, c1, c0)
  1823  		u6, c1 = bits.Mul64(m, q6)
  1824  		t5, c0 = bits.Add64(t6, c1, c0)
  1825  		u7, c1 = bits.Mul64(m, q7)
  1826  		t6, c0 = bits.Add64(t7, c1, c0)
  1827  		u8, c1 = bits.Mul64(m, q8)
  1828  		t7, c0 = bits.Add64(t8, c1, c0)
  1829  		u9, c1 = bits.Mul64(m, q9)
  1830  		t8, c0 = bits.Add64(t9, c1, c0)
  1831  		u10, c1 = bits.Mul64(m, q10)
  1832  		t9, c0 = bits.Add64(t10, c1, c0)
  1833  		u11, c1 = bits.Mul64(m, q11)
  1834  
  1835  		t10, c0 = bits.Add64(0, c1, c0)
  1836  		u11, _ = bits.Add64(u11, 0, c0)
  1837  		t0, c0 = bits.Add64(u0, t0, 0)
  1838  		t1, c0 = bits.Add64(u1, t1, c0)
  1839  		t2, c0 = bits.Add64(u2, t2, c0)
  1840  		t3, c0 = bits.Add64(u3, t3, c0)
  1841  		t4, c0 = bits.Add64(u4, t4, c0)
  1842  		t5, c0 = bits.Add64(u5, t5, c0)
  1843  		t6, c0 = bits.Add64(u6, t6, c0)
  1844  		t7, c0 = bits.Add64(u7, t7, c0)
  1845  		t8, c0 = bits.Add64(u8, t8, c0)
  1846  		t9, c0 = bits.Add64(u9, t9, c0)
  1847  		t10, c0 = bits.Add64(u10, t10, c0)
  1848  		c2, _ = bits.Add64(c2, 0, c0)
  1849  		t10, c0 = bits.Add64(t11, t10, 0)
  1850  		t11, _ = bits.Add64(u11, c2, c0)
  1851  
  1852  	}
  1853  	{
  1854  		var c0, c1, c2 uint64
  1855  		v := x[8]
  1856  		u0, c1 = bits.Mul64(v, x[0])
  1857  		t0, c0 = bits.Add64(c1, t0, 0)
  1858  		u1, c1 = bits.Mul64(v, x[1])
  1859  		t1, c0 = bits.Add64(c1, t1, c0)
  1860  		u2, c1 = bits.Mul64(v, x[2])
  1861  		t2, c0 = bits.Add64(c1, t2, c0)
  1862  		u3, c1 = bits.Mul64(v, x[3])
  1863  		t3, c0 = bits.Add64(c1, t3, c0)
  1864  		u4, c1 = bits.Mul64(v, x[4])
  1865  		t4, c0 = bits.Add64(c1, t4, c0)
  1866  		u5, c1 = bits.Mul64(v, x[5])
  1867  		t5, c0 = bits.Add64(c1, t5, c0)
  1868  		u6, c1 = bits.Mul64(v, x[6])
  1869  		t6, c0 = bits.Add64(c1, t6, c0)
  1870  		u7, c1 = bits.Mul64(v, x[7])
  1871  		t7, c0 = bits.Add64(c1, t7, c0)
  1872  		u8, c1 = bits.Mul64(v, x[8])
  1873  		t8, c0 = bits.Add64(c1, t8, c0)
  1874  		u9, c1 = bits.Mul64(v, x[9])
  1875  		t9, c0 = bits.Add64(c1, t9, c0)
  1876  		u10, c1 = bits.Mul64(v, x[10])
  1877  		t10, c0 = bits.Add64(c1, t10, c0)
  1878  		u11, c1 = bits.Mul64(v, x[11])
  1879  		t11, c0 = bits.Add64(c1, t11, c0)
  1880  
  1881  		c2, _ = bits.Add64(0, 0, c0)
  1882  		t1, c0 = bits.Add64(u0, t1, 0)
  1883  		t2, c0 = bits.Add64(u1, t2, c0)
  1884  		t3, c0 = bits.Add64(u2, t3, c0)
  1885  		t4, c0 = bits.Add64(u3, t4, c0)
  1886  		t5, c0 = bits.Add64(u4, t5, c0)
  1887  		t6, c0 = bits.Add64(u5, t6, c0)
  1888  		t7, c0 = bits.Add64(u6, t7, c0)
  1889  		t8, c0 = bits.Add64(u7, t8, c0)
  1890  		t9, c0 = bits.Add64(u8, t9, c0)
  1891  		t10, c0 = bits.Add64(u9, t10, c0)
  1892  		t11, c0 = bits.Add64(u10, t11, c0)
  1893  		c2, _ = bits.Add64(u11, c2, c0)
  1894  
  1895  		m := qInvNeg * t0
  1896  
  1897  		u0, c1 = bits.Mul64(m, q0)
  1898  		_, c0 = bits.Add64(t0, c1, 0)
  1899  		u1, c1 = bits.Mul64(m, q1)
  1900  		t0, c0 = bits.Add64(t1, c1, c0)
  1901  		u2, c1 = bits.Mul64(m, q2)
  1902  		t1, c0 = bits.Add64(t2, c1, c0)
  1903  		u3, c1 = bits.Mul64(m, q3)
  1904  		t2, c0 = bits.Add64(t3, c1, c0)
  1905  		u4, c1 = bits.Mul64(m, q4)
  1906  		t3, c0 = bits.Add64(t4, c1, c0)
  1907  		u5, c1 = bits.Mul64(m, q5)
  1908  		t4, c0 = bits.Add64(t5, c1, c0)
  1909  		u6, c1 = bits.Mul64(m, q6)
  1910  		t5, c0 = bits.Add64(t6, c1, c0)
  1911  		u7, c1 = bits.Mul64(m, q7)
  1912  		t6, c0 = bits.Add64(t7, c1, c0)
  1913  		u8, c1 = bits.Mul64(m, q8)
  1914  		t7, c0 = bits.Add64(t8, c1, c0)
  1915  		u9, c1 = bits.Mul64(m, q9)
  1916  		t8, c0 = bits.Add64(t9, c1, c0)
  1917  		u10, c1 = bits.Mul64(m, q10)
  1918  		t9, c0 = bits.Add64(t10, c1, c0)
  1919  		u11, c1 = bits.Mul64(m, q11)
  1920  
  1921  		t10, c0 = bits.Add64(0, c1, c0)
  1922  		u11, _ = bits.Add64(u11, 0, c0)
  1923  		t0, c0 = bits.Add64(u0, t0, 0)
  1924  		t1, c0 = bits.Add64(u1, t1, c0)
  1925  		t2, c0 = bits.Add64(u2, t2, c0)
  1926  		t3, c0 = bits.Add64(u3, t3, c0)
  1927  		t4, c0 = bits.Add64(u4, t4, c0)
  1928  		t5, c0 = bits.Add64(u5, t5, c0)
  1929  		t6, c0 = bits.Add64(u6, t6, c0)
  1930  		t7, c0 = bits.Add64(u7, t7, c0)
  1931  		t8, c0 = bits.Add64(u8, t8, c0)
  1932  		t9, c0 = bits.Add64(u9, t9, c0)
  1933  		t10, c0 = bits.Add64(u10, t10, c0)
  1934  		c2, _ = bits.Add64(c2, 0, c0)
  1935  		t10, c0 = bits.Add64(t11, t10, 0)
  1936  		t11, _ = bits.Add64(u11, c2, c0)
  1937  
  1938  	}
  1939  	{
  1940  		var c0, c1, c2 uint64
  1941  		v := x[9]
  1942  		u0, c1 = bits.Mul64(v, x[0])
  1943  		t0, c0 = bits.Add64(c1, t0, 0)
  1944  		u1, c1 = bits.Mul64(v, x[1])
  1945  		t1, c0 = bits.Add64(c1, t1, c0)
  1946  		u2, c1 = bits.Mul64(v, x[2])
  1947  		t2, c0 = bits.Add64(c1, t2, c0)
  1948  		u3, c1 = bits.Mul64(v, x[3])
  1949  		t3, c0 = bits.Add64(c1, t3, c0)
  1950  		u4, c1 = bits.Mul64(v, x[4])
  1951  		t4, c0 = bits.Add64(c1, t4, c0)
  1952  		u5, c1 = bits.Mul64(v, x[5])
  1953  		t5, c0 = bits.Add64(c1, t5, c0)
  1954  		u6, c1 = bits.Mul64(v, x[6])
  1955  		t6, c0 = bits.Add64(c1, t6, c0)
  1956  		u7, c1 = bits.Mul64(v, x[7])
  1957  		t7, c0 = bits.Add64(c1, t7, c0)
  1958  		u8, c1 = bits.Mul64(v, x[8])
  1959  		t8, c0 = bits.Add64(c1, t8, c0)
  1960  		u9, c1 = bits.Mul64(v, x[9])
  1961  		t9, c0 = bits.Add64(c1, t9, c0)
  1962  		u10, c1 = bits.Mul64(v, x[10])
  1963  		t10, c0 = bits.Add64(c1, t10, c0)
  1964  		u11, c1 = bits.Mul64(v, x[11])
  1965  		t11, c0 = bits.Add64(c1, t11, c0)
  1966  
  1967  		c2, _ = bits.Add64(0, 0, c0)
  1968  		t1, c0 = bits.Add64(u0, t1, 0)
  1969  		t2, c0 = bits.Add64(u1, t2, c0)
  1970  		t3, c0 = bits.Add64(u2, t3, c0)
  1971  		t4, c0 = bits.Add64(u3, t4, c0)
  1972  		t5, c0 = bits.Add64(u4, t5, c0)
  1973  		t6, c0 = bits.Add64(u5, t6, c0)
  1974  		t7, c0 = bits.Add64(u6, t7, c0)
  1975  		t8, c0 = bits.Add64(u7, t8, c0)
  1976  		t9, c0 = bits.Add64(u8, t9, c0)
  1977  		t10, c0 = bits.Add64(u9, t10, c0)
  1978  		t11, c0 = bits.Add64(u10, t11, c0)
  1979  		c2, _ = bits.Add64(u11, c2, c0)
  1980  
  1981  		m := qInvNeg * t0
  1982  
  1983  		u0, c1 = bits.Mul64(m, q0)
  1984  		_, c0 = bits.Add64(t0, c1, 0)
  1985  		u1, c1 = bits.Mul64(m, q1)
  1986  		t0, c0 = bits.Add64(t1, c1, c0)
  1987  		u2, c1 = bits.Mul64(m, q2)
  1988  		t1, c0 = bits.Add64(t2, c1, c0)
  1989  		u3, c1 = bits.Mul64(m, q3)
  1990  		t2, c0 = bits.Add64(t3, c1, c0)
  1991  		u4, c1 = bits.Mul64(m, q4)
  1992  		t3, c0 = bits.Add64(t4, c1, c0)
  1993  		u5, c1 = bits.Mul64(m, q5)
  1994  		t4, c0 = bits.Add64(t5, c1, c0)
  1995  		u6, c1 = bits.Mul64(m, q6)
  1996  		t5, c0 = bits.Add64(t6, c1, c0)
  1997  		u7, c1 = bits.Mul64(m, q7)
  1998  		t6, c0 = bits.Add64(t7, c1, c0)
  1999  		u8, c1 = bits.Mul64(m, q8)
  2000  		t7, c0 = bits.Add64(t8, c1, c0)
  2001  		u9, c1 = bits.Mul64(m, q9)
  2002  		t8, c0 = bits.Add64(t9, c1, c0)
  2003  		u10, c1 = bits.Mul64(m, q10)
  2004  		t9, c0 = bits.Add64(t10, c1, c0)
  2005  		u11, c1 = bits.Mul64(m, q11)
  2006  
  2007  		t10, c0 = bits.Add64(0, c1, c0)
  2008  		u11, _ = bits.Add64(u11, 0, c0)
  2009  		t0, c0 = bits.Add64(u0, t0, 0)
  2010  		t1, c0 = bits.Add64(u1, t1, c0)
  2011  		t2, c0 = bits.Add64(u2, t2, c0)
  2012  		t3, c0 = bits.Add64(u3, t3, c0)
  2013  		t4, c0 = bits.Add64(u4, t4, c0)
  2014  		t5, c0 = bits.Add64(u5, t5, c0)
  2015  		t6, c0 = bits.Add64(u6, t6, c0)
  2016  		t7, c0 = bits.Add64(u7, t7, c0)
  2017  		t8, c0 = bits.Add64(u8, t8, c0)
  2018  		t9, c0 = bits.Add64(u9, t9, c0)
  2019  		t10, c0 = bits.Add64(u10, t10, c0)
  2020  		c2, _ = bits.Add64(c2, 0, c0)
  2021  		t10, c0 = bits.Add64(t11, t10, 0)
  2022  		t11, _ = bits.Add64(u11, c2, c0)
  2023  
  2024  	}
  2025  	{
  2026  		var c0, c1, c2 uint64
  2027  		v := x[10]
  2028  		u0, c1 = bits.Mul64(v, x[0])
  2029  		t0, c0 = bits.Add64(c1, t0, 0)
  2030  		u1, c1 = bits.Mul64(v, x[1])
  2031  		t1, c0 = bits.Add64(c1, t1, c0)
  2032  		u2, c1 = bits.Mul64(v, x[2])
  2033  		t2, c0 = bits.Add64(c1, t2, c0)
  2034  		u3, c1 = bits.Mul64(v, x[3])
  2035  		t3, c0 = bits.Add64(c1, t3, c0)
  2036  		u4, c1 = bits.Mul64(v, x[4])
  2037  		t4, c0 = bits.Add64(c1, t4, c0)
  2038  		u5, c1 = bits.Mul64(v, x[5])
  2039  		t5, c0 = bits.Add64(c1, t5, c0)
  2040  		u6, c1 = bits.Mul64(v, x[6])
  2041  		t6, c0 = bits.Add64(c1, t6, c0)
  2042  		u7, c1 = bits.Mul64(v, x[7])
  2043  		t7, c0 = bits.Add64(c1, t7, c0)
  2044  		u8, c1 = bits.Mul64(v, x[8])
  2045  		t8, c0 = bits.Add64(c1, t8, c0)
  2046  		u9, c1 = bits.Mul64(v, x[9])
  2047  		t9, c0 = bits.Add64(c1, t9, c0)
  2048  		u10, c1 = bits.Mul64(v, x[10])
  2049  		t10, c0 = bits.Add64(c1, t10, c0)
  2050  		u11, c1 = bits.Mul64(v, x[11])
  2051  		t11, c0 = bits.Add64(c1, t11, c0)
  2052  
  2053  		c2, _ = bits.Add64(0, 0, c0)
  2054  		t1, c0 = bits.Add64(u0, t1, 0)
  2055  		t2, c0 = bits.Add64(u1, t2, c0)
  2056  		t3, c0 = bits.Add64(u2, t3, c0)
  2057  		t4, c0 = bits.Add64(u3, t4, c0)
  2058  		t5, c0 = bits.Add64(u4, t5, c0)
  2059  		t6, c0 = bits.Add64(u5, t6, c0)
  2060  		t7, c0 = bits.Add64(u6, t7, c0)
  2061  		t8, c0 = bits.Add64(u7, t8, c0)
  2062  		t9, c0 = bits.Add64(u8, t9, c0)
  2063  		t10, c0 = bits.Add64(u9, t10, c0)
  2064  		t11, c0 = bits.Add64(u10, t11, c0)
  2065  		c2, _ = bits.Add64(u11, c2, c0)
  2066  
  2067  		m := qInvNeg * t0
  2068  
  2069  		u0, c1 = bits.Mul64(m, q0)
  2070  		_, c0 = bits.Add64(t0, c1, 0)
  2071  		u1, c1 = bits.Mul64(m, q1)
  2072  		t0, c0 = bits.Add64(t1, c1, c0)
  2073  		u2, c1 = bits.Mul64(m, q2)
  2074  		t1, c0 = bits.Add64(t2, c1, c0)
  2075  		u3, c1 = bits.Mul64(m, q3)
  2076  		t2, c0 = bits.Add64(t3, c1, c0)
  2077  		u4, c1 = bits.Mul64(m, q4)
  2078  		t3, c0 = bits.Add64(t4, c1, c0)
  2079  		u5, c1 = bits.Mul64(m, q5)
  2080  		t4, c0 = bits.Add64(t5, c1, c0)
  2081  		u6, c1 = bits.Mul64(m, q6)
  2082  		t5, c0 = bits.Add64(t6, c1, c0)
  2083  		u7, c1 = bits.Mul64(m, q7)
  2084  		t6, c0 = bits.Add64(t7, c1, c0)
  2085  		u8, c1 = bits.Mul64(m, q8)
  2086  		t7, c0 = bits.Add64(t8, c1, c0)
  2087  		u9, c1 = bits.Mul64(m, q9)
  2088  		t8, c0 = bits.Add64(t9, c1, c0)
  2089  		u10, c1 = bits.Mul64(m, q10)
  2090  		t9, c0 = bits.Add64(t10, c1, c0)
  2091  		u11, c1 = bits.Mul64(m, q11)
  2092  
  2093  		t10, c0 = bits.Add64(0, c1, c0)
  2094  		u11, _ = bits.Add64(u11, 0, c0)
  2095  		t0, c0 = bits.Add64(u0, t0, 0)
  2096  		t1, c0 = bits.Add64(u1, t1, c0)
  2097  		t2, c0 = bits.Add64(u2, t2, c0)
  2098  		t3, c0 = bits.Add64(u3, t3, c0)
  2099  		t4, c0 = bits.Add64(u4, t4, c0)
  2100  		t5, c0 = bits.Add64(u5, t5, c0)
  2101  		t6, c0 = bits.Add64(u6, t6, c0)
  2102  		t7, c0 = bits.Add64(u7, t7, c0)
  2103  		t8, c0 = bits.Add64(u8, t8, c0)
  2104  		t9, c0 = bits.Add64(u9, t9, c0)
  2105  		t10, c0 = bits.Add64(u10, t10, c0)
  2106  		c2, _ = bits.Add64(c2, 0, c0)
  2107  		t10, c0 = bits.Add64(t11, t10, 0)
  2108  		t11, _ = bits.Add64(u11, c2, c0)
  2109  
  2110  	}
  2111  	{
  2112  		var c0, c1, c2 uint64
  2113  		v := x[11]
  2114  		u0, c1 = bits.Mul64(v, x[0])
  2115  		t0, c0 = bits.Add64(c1, t0, 0)
  2116  		u1, c1 = bits.Mul64(v, x[1])
  2117  		t1, c0 = bits.Add64(c1, t1, c0)
  2118  		u2, c1 = bits.Mul64(v, x[2])
  2119  		t2, c0 = bits.Add64(c1, t2, c0)
  2120  		u3, c1 = bits.Mul64(v, x[3])
  2121  		t3, c0 = bits.Add64(c1, t3, c0)
  2122  		u4, c1 = bits.Mul64(v, x[4])
  2123  		t4, c0 = bits.Add64(c1, t4, c0)
  2124  		u5, c1 = bits.Mul64(v, x[5])
  2125  		t5, c0 = bits.Add64(c1, t5, c0)
  2126  		u6, c1 = bits.Mul64(v, x[6])
  2127  		t6, c0 = bits.Add64(c1, t6, c0)
  2128  		u7, c1 = bits.Mul64(v, x[7])
  2129  		t7, c0 = bits.Add64(c1, t7, c0)
  2130  		u8, c1 = bits.Mul64(v, x[8])
  2131  		t8, c0 = bits.Add64(c1, t8, c0)
  2132  		u9, c1 = bits.Mul64(v, x[9])
  2133  		t9, c0 = bits.Add64(c1, t9, c0)
  2134  		u10, c1 = bits.Mul64(v, x[10])
  2135  		t10, c0 = bits.Add64(c1, t10, c0)
  2136  		u11, c1 = bits.Mul64(v, x[11])
  2137  		t11, c0 = bits.Add64(c1, t11, c0)
  2138  
  2139  		c2, _ = bits.Add64(0, 0, c0)
  2140  		t1, c0 = bits.Add64(u0, t1, 0)
  2141  		t2, c0 = bits.Add64(u1, t2, c0)
  2142  		t3, c0 = bits.Add64(u2, t3, c0)
  2143  		t4, c0 = bits.Add64(u3, t4, c0)
  2144  		t5, c0 = bits.Add64(u4, t5, c0)
  2145  		t6, c0 = bits.Add64(u5, t6, c0)
  2146  		t7, c0 = bits.Add64(u6, t7, c0)
  2147  		t8, c0 = bits.Add64(u7, t8, c0)
  2148  		t9, c0 = bits.Add64(u8, t9, c0)
  2149  		t10, c0 = bits.Add64(u9, t10, c0)
  2150  		t11, c0 = bits.Add64(u10, t11, c0)
  2151  		c2, _ = bits.Add64(u11, c2, c0)
  2152  
  2153  		m := qInvNeg * t0
  2154  
  2155  		u0, c1 = bits.Mul64(m, q0)
  2156  		_, c0 = bits.Add64(t0, c1, 0)
  2157  		u1, c1 = bits.Mul64(m, q1)
  2158  		t0, c0 = bits.Add64(t1, c1, c0)
  2159  		u2, c1 = bits.Mul64(m, q2)
  2160  		t1, c0 = bits.Add64(t2, c1, c0)
  2161  		u3, c1 = bits.Mul64(m, q3)
  2162  		t2, c0 = bits.Add64(t3, c1, c0)
  2163  		u4, c1 = bits.Mul64(m, q4)
  2164  		t3, c0 = bits.Add64(t4, c1, c0)
  2165  		u5, c1 = bits.Mul64(m, q5)
  2166  		t4, c0 = bits.Add64(t5, c1, c0)
  2167  		u6, c1 = bits.Mul64(m, q6)
  2168  		t5, c0 = bits.Add64(t6, c1, c0)
  2169  		u7, c1 = bits.Mul64(m, q7)
  2170  		t6, c0 = bits.Add64(t7, c1, c0)
  2171  		u8, c1 = bits.Mul64(m, q8)
  2172  		t7, c0 = bits.Add64(t8, c1, c0)
  2173  		u9, c1 = bits.Mul64(m, q9)
  2174  		t8, c0 = bits.Add64(t9, c1, c0)
  2175  		u10, c1 = bits.Mul64(m, q10)
  2176  		t9, c0 = bits.Add64(t10, c1, c0)
  2177  		u11, c1 = bits.Mul64(m, q11)
  2178  
  2179  		t10, c0 = bits.Add64(0, c1, c0)
  2180  		u11, _ = bits.Add64(u11, 0, c0)
  2181  		t0, c0 = bits.Add64(u0, t0, 0)
  2182  		t1, c0 = bits.Add64(u1, t1, c0)
  2183  		t2, c0 = bits.Add64(u2, t2, c0)
  2184  		t3, c0 = bits.Add64(u3, t3, c0)
  2185  		t4, c0 = bits.Add64(u4, t4, c0)
  2186  		t5, c0 = bits.Add64(u5, t5, c0)
  2187  		t6, c0 = bits.Add64(u6, t6, c0)
  2188  		t7, c0 = bits.Add64(u7, t7, c0)
  2189  		t8, c0 = bits.Add64(u8, t8, c0)
  2190  		t9, c0 = bits.Add64(u9, t9, c0)
  2191  		t10, c0 = bits.Add64(u10, t10, c0)
  2192  		c2, _ = bits.Add64(c2, 0, c0)
  2193  		t10, c0 = bits.Add64(t11, t10, 0)
  2194  		t11, _ = bits.Add64(u11, c2, c0)
  2195  
  2196  	}
  2197  	z[0] = t0
  2198  	z[1] = t1
  2199  	z[2] = t2
  2200  	z[3] = t3
  2201  	z[4] = t4
  2202  	z[5] = t5
  2203  	z[6] = t6
  2204  	z[7] = t7
  2205  	z[8] = t8
  2206  	z[9] = t9
  2207  	z[10] = t10
  2208  	z[11] = t11
  2209  
  2210  	// if z ⩾ q → z -= q
  2211  	if !z.smallerThanModulus() {
  2212  		var b uint64
  2213  		z[0], b = bits.Sub64(z[0], q0, 0)
  2214  		z[1], b = bits.Sub64(z[1], q1, b)
  2215  		z[2], b = bits.Sub64(z[2], q2, b)
  2216  		z[3], b = bits.Sub64(z[3], q3, b)
  2217  		z[4], b = bits.Sub64(z[4], q4, b)
  2218  		z[5], b = bits.Sub64(z[5], q5, b)
  2219  		z[6], b = bits.Sub64(z[6], q6, b)
  2220  		z[7], b = bits.Sub64(z[7], q7, b)
  2221  		z[8], b = bits.Sub64(z[8], q8, b)
  2222  		z[9], b = bits.Sub64(z[9], q9, b)
  2223  		z[10], b = bits.Sub64(z[10], q10, b)
  2224  		z[11], _ = bits.Sub64(z[11], q11, b)
  2225  	}
  2226  	return z
  2227  }