github.com/consensys/gnark-crypto@v0.14.0/ecc/bw6-633/fr/element_ops_purego.go (about)

     1  //go:build !amd64 || purego
     2  // +build !amd64 purego
     3  
     4  // Copyright 2020 ConsenSys Software Inc.
     5  //
     6  // Licensed under the Apache License, Version 2.0 (the "License");
     7  // you may not use this file except in compliance with the License.
     8  // You may obtain a copy of the License at
     9  //
    10  //     http://www.apache.org/licenses/LICENSE-2.0
    11  //
    12  // Unless required by applicable law or agreed to in writing, software
    13  // distributed under the License is distributed on an "AS IS" BASIS,
    14  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  // See the License for the specific language governing permissions and
    16  // limitations under the License.
    17  
    18  // Code generated by consensys/gnark-crypto DO NOT EDIT
    19  
    20  package fr
    21  
    22  import "math/bits"
    23  
    24  // MulBy3 x *= 3 (mod q)
    25  func MulBy3(x *Element) {
    26  	_x := *x
    27  	x.Double(x).Add(x, &_x)
    28  }
    29  
    30  // MulBy5 x *= 5 (mod q)
    31  func MulBy5(x *Element) {
    32  	_x := *x
    33  	x.Double(x).Double(x).Add(x, &_x)
    34  }
    35  
    36  // MulBy13 x *= 13 (mod q)
    37  func MulBy13(x *Element) {
    38  	var y = Element{
    39  		8178485296672800069,
    40  		8476448362227282520,
    41  		14180928431697993131,
    42  		4308307642551989706,
    43  		120359802761433421,
    44  	}
    45  	x.Mul(x, &y)
    46  }
    47  
    48  // Butterfly sets
    49  //
    50  //	a = a + b (mod q)
    51  //	b = a - b (mod q)
    52  func Butterfly(a, b *Element) {
    53  	_butterflyGeneric(a, b)
    54  }
    55  
    56  func fromMont(z *Element) {
    57  	_fromMontGeneric(z)
    58  }
    59  
    60  func reduce(z *Element) {
    61  	_reduceGeneric(z)
    62  }
    63  
    64  // Mul z = x * y (mod q)
    65  //
    66  // x and y must be less than q
    67  func (z *Element) Mul(x, y *Element) *Element {
    68  
    69  	// Implements CIOS multiplication -- section 2.3.2 of Tolga Acar's thesis
    70  	// https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf
    71  	//
    72  	// The algorithm:
    73  	//
    74  	// for i=0 to N-1
    75  	// 		C := 0
    76  	// 		for j=0 to N-1
    77  	// 			(C,t[j]) := t[j] + x[j]*y[i] + C
    78  	// 		(t[N+1],t[N]) := t[N] + C
    79  	//
    80  	// 		C := 0
    81  	// 		m := t[0]*q'[0] mod D
    82  	// 		(C,_) := t[0] + m*q[0]
    83  	// 		for j=1 to N-1
    84  	// 			(C,t[j-1]) := t[j] + m*q[j] + C
    85  	//
    86  	// 		(C,t[N-1]) := t[N] + C
    87  	// 		t[N] := t[N+1] + C
    88  	//
    89  	// → N is the number of machine words needed to store the modulus q
    90  	// → D is the word size. For example, on a 64-bit architecture D is 2	64
    91  	// → x[i], y[i], q[i] is the ith word of the numbers x,y,q
    92  	// → q'[0] is the lowest word of the number -q⁻¹ mod r. This quantity is pre-computed, as it does not depend on the inputs.
    93  	// → t is a temporary array of size N+2
    94  	// → C, S are machine words. A pair (C,S) refers to (hi-bits, lo-bits) of a two-word number
    95  	//
    96  	// As described here https://hackmd.io/@gnark/modular_multiplication we can get rid of one carry chain and simplify:
    97  	// (also described in https://eprint.iacr.org/2022/1400.pdf annex)
    98  	//
    99  	// for i=0 to N-1
   100  	// 		(A,t[0]) := t[0] + x[0]*y[i]
   101  	// 		m := t[0]*q'[0] mod W
   102  	// 		C,_ := t[0] + m*q[0]
   103  	// 		for j=1 to N-1
   104  	// 			(A,t[j])  := t[j] + x[j]*y[i] + A
   105  	// 			(C,t[j-1]) := t[j] + m*q[j] + C
   106  	//
   107  	// 		t[N-1] = C + A
   108  	//
   109  	// This optimization saves 5N + 2 additions in the algorithm, and can be used whenever the highest bit
   110  	// of the modulus is zero (and not all of the remaining bits are set).
   111  
   112  	var t0, t1, t2, t3, t4 uint64
   113  	var u0, u1, u2, u3, u4 uint64
   114  	{
   115  		var c0, c1, c2 uint64
   116  		v := x[0]
   117  		u0, t0 = bits.Mul64(v, y[0])
   118  		u1, t1 = bits.Mul64(v, y[1])
   119  		u2, t2 = bits.Mul64(v, y[2])
   120  		u3, t3 = bits.Mul64(v, y[3])
   121  		u4, t4 = bits.Mul64(v, y[4])
   122  		t1, c0 = bits.Add64(u0, t1, 0)
   123  		t2, c0 = bits.Add64(u1, t2, c0)
   124  		t3, c0 = bits.Add64(u2, t3, c0)
   125  		t4, c0 = bits.Add64(u3, t4, c0)
   126  		c2, _ = bits.Add64(u4, 0, c0)
   127  
   128  		m := qInvNeg * t0
   129  
   130  		u0, c1 = bits.Mul64(m, q0)
   131  		_, c0 = bits.Add64(t0, c1, 0)
   132  		u1, c1 = bits.Mul64(m, q1)
   133  		t0, c0 = bits.Add64(t1, c1, c0)
   134  		u2, c1 = bits.Mul64(m, q2)
   135  		t1, c0 = bits.Add64(t2, c1, c0)
   136  		u3, c1 = bits.Mul64(m, q3)
   137  		t2, c0 = bits.Add64(t3, c1, c0)
   138  		u4, c1 = bits.Mul64(m, q4)
   139  
   140  		t3, c0 = bits.Add64(0, c1, c0)
   141  		u4, _ = bits.Add64(u4, 0, c0)
   142  		t0, c0 = bits.Add64(u0, t0, 0)
   143  		t1, c0 = bits.Add64(u1, t1, c0)
   144  		t2, c0 = bits.Add64(u2, t2, c0)
   145  		t3, c0 = bits.Add64(u3, t3, c0)
   146  		c2, _ = bits.Add64(c2, 0, c0)
   147  		t3, c0 = bits.Add64(t4, t3, 0)
   148  		t4, _ = bits.Add64(u4, c2, c0)
   149  
   150  	}
   151  	{
   152  		var c0, c1, c2 uint64
   153  		v := x[1]
   154  		u0, c1 = bits.Mul64(v, y[0])
   155  		t0, c0 = bits.Add64(c1, t0, 0)
   156  		u1, c1 = bits.Mul64(v, y[1])
   157  		t1, c0 = bits.Add64(c1, t1, c0)
   158  		u2, c1 = bits.Mul64(v, y[2])
   159  		t2, c0 = bits.Add64(c1, t2, c0)
   160  		u3, c1 = bits.Mul64(v, y[3])
   161  		t3, c0 = bits.Add64(c1, t3, c0)
   162  		u4, c1 = bits.Mul64(v, y[4])
   163  		t4, c0 = bits.Add64(c1, t4, c0)
   164  
   165  		c2, _ = bits.Add64(0, 0, c0)
   166  		t1, c0 = bits.Add64(u0, t1, 0)
   167  		t2, c0 = bits.Add64(u1, t2, c0)
   168  		t3, c0 = bits.Add64(u2, t3, c0)
   169  		t4, c0 = bits.Add64(u3, t4, c0)
   170  		c2, _ = bits.Add64(u4, c2, c0)
   171  
   172  		m := qInvNeg * t0
   173  
   174  		u0, c1 = bits.Mul64(m, q0)
   175  		_, c0 = bits.Add64(t0, c1, 0)
   176  		u1, c1 = bits.Mul64(m, q1)
   177  		t0, c0 = bits.Add64(t1, c1, c0)
   178  		u2, c1 = bits.Mul64(m, q2)
   179  		t1, c0 = bits.Add64(t2, c1, c0)
   180  		u3, c1 = bits.Mul64(m, q3)
   181  		t2, c0 = bits.Add64(t3, c1, c0)
   182  		u4, c1 = bits.Mul64(m, q4)
   183  
   184  		t3, c0 = bits.Add64(0, c1, c0)
   185  		u4, _ = bits.Add64(u4, 0, c0)
   186  		t0, c0 = bits.Add64(u0, t0, 0)
   187  		t1, c0 = bits.Add64(u1, t1, c0)
   188  		t2, c0 = bits.Add64(u2, t2, c0)
   189  		t3, c0 = bits.Add64(u3, t3, c0)
   190  		c2, _ = bits.Add64(c2, 0, c0)
   191  		t3, c0 = bits.Add64(t4, t3, 0)
   192  		t4, _ = bits.Add64(u4, c2, c0)
   193  
   194  	}
   195  	{
   196  		var c0, c1, c2 uint64
   197  		v := x[2]
   198  		u0, c1 = bits.Mul64(v, y[0])
   199  		t0, c0 = bits.Add64(c1, t0, 0)
   200  		u1, c1 = bits.Mul64(v, y[1])
   201  		t1, c0 = bits.Add64(c1, t1, c0)
   202  		u2, c1 = bits.Mul64(v, y[2])
   203  		t2, c0 = bits.Add64(c1, t2, c0)
   204  		u3, c1 = bits.Mul64(v, y[3])
   205  		t3, c0 = bits.Add64(c1, t3, c0)
   206  		u4, c1 = bits.Mul64(v, y[4])
   207  		t4, c0 = bits.Add64(c1, t4, c0)
   208  
   209  		c2, _ = bits.Add64(0, 0, c0)
   210  		t1, c0 = bits.Add64(u0, t1, 0)
   211  		t2, c0 = bits.Add64(u1, t2, c0)
   212  		t3, c0 = bits.Add64(u2, t3, c0)
   213  		t4, c0 = bits.Add64(u3, t4, c0)
   214  		c2, _ = bits.Add64(u4, c2, c0)
   215  
   216  		m := qInvNeg * t0
   217  
   218  		u0, c1 = bits.Mul64(m, q0)
   219  		_, c0 = bits.Add64(t0, c1, 0)
   220  		u1, c1 = bits.Mul64(m, q1)
   221  		t0, c0 = bits.Add64(t1, c1, c0)
   222  		u2, c1 = bits.Mul64(m, q2)
   223  		t1, c0 = bits.Add64(t2, c1, c0)
   224  		u3, c1 = bits.Mul64(m, q3)
   225  		t2, c0 = bits.Add64(t3, c1, c0)
   226  		u4, c1 = bits.Mul64(m, q4)
   227  
   228  		t3, c0 = bits.Add64(0, c1, c0)
   229  		u4, _ = bits.Add64(u4, 0, c0)
   230  		t0, c0 = bits.Add64(u0, t0, 0)
   231  		t1, c0 = bits.Add64(u1, t1, c0)
   232  		t2, c0 = bits.Add64(u2, t2, c0)
   233  		t3, c0 = bits.Add64(u3, t3, c0)
   234  		c2, _ = bits.Add64(c2, 0, c0)
   235  		t3, c0 = bits.Add64(t4, t3, 0)
   236  		t4, _ = bits.Add64(u4, c2, c0)
   237  
   238  	}
   239  	{
   240  		var c0, c1, c2 uint64
   241  		v := x[3]
   242  		u0, c1 = bits.Mul64(v, y[0])
   243  		t0, c0 = bits.Add64(c1, t0, 0)
   244  		u1, c1 = bits.Mul64(v, y[1])
   245  		t1, c0 = bits.Add64(c1, t1, c0)
   246  		u2, c1 = bits.Mul64(v, y[2])
   247  		t2, c0 = bits.Add64(c1, t2, c0)
   248  		u3, c1 = bits.Mul64(v, y[3])
   249  		t3, c0 = bits.Add64(c1, t3, c0)
   250  		u4, c1 = bits.Mul64(v, y[4])
   251  		t4, c0 = bits.Add64(c1, t4, c0)
   252  
   253  		c2, _ = bits.Add64(0, 0, c0)
   254  		t1, c0 = bits.Add64(u0, t1, 0)
   255  		t2, c0 = bits.Add64(u1, t2, c0)
   256  		t3, c0 = bits.Add64(u2, t3, c0)
   257  		t4, c0 = bits.Add64(u3, t4, c0)
   258  		c2, _ = bits.Add64(u4, c2, c0)
   259  
   260  		m := qInvNeg * t0
   261  
   262  		u0, c1 = bits.Mul64(m, q0)
   263  		_, c0 = bits.Add64(t0, c1, 0)
   264  		u1, c1 = bits.Mul64(m, q1)
   265  		t0, c0 = bits.Add64(t1, c1, c0)
   266  		u2, c1 = bits.Mul64(m, q2)
   267  		t1, c0 = bits.Add64(t2, c1, c0)
   268  		u3, c1 = bits.Mul64(m, q3)
   269  		t2, c0 = bits.Add64(t3, c1, c0)
   270  		u4, c1 = bits.Mul64(m, q4)
   271  
   272  		t3, c0 = bits.Add64(0, c1, c0)
   273  		u4, _ = bits.Add64(u4, 0, c0)
   274  		t0, c0 = bits.Add64(u0, t0, 0)
   275  		t1, c0 = bits.Add64(u1, t1, c0)
   276  		t2, c0 = bits.Add64(u2, t2, c0)
   277  		t3, c0 = bits.Add64(u3, t3, c0)
   278  		c2, _ = bits.Add64(c2, 0, c0)
   279  		t3, c0 = bits.Add64(t4, t3, 0)
   280  		t4, _ = bits.Add64(u4, c2, c0)
   281  
   282  	}
   283  	{
   284  		var c0, c1, c2 uint64
   285  		v := x[4]
   286  		u0, c1 = bits.Mul64(v, y[0])
   287  		t0, c0 = bits.Add64(c1, t0, 0)
   288  		u1, c1 = bits.Mul64(v, y[1])
   289  		t1, c0 = bits.Add64(c1, t1, c0)
   290  		u2, c1 = bits.Mul64(v, y[2])
   291  		t2, c0 = bits.Add64(c1, t2, c0)
   292  		u3, c1 = bits.Mul64(v, y[3])
   293  		t3, c0 = bits.Add64(c1, t3, c0)
   294  		u4, c1 = bits.Mul64(v, y[4])
   295  		t4, c0 = bits.Add64(c1, t4, c0)
   296  
   297  		c2, _ = bits.Add64(0, 0, c0)
   298  		t1, c0 = bits.Add64(u0, t1, 0)
   299  		t2, c0 = bits.Add64(u1, t2, c0)
   300  		t3, c0 = bits.Add64(u2, t3, c0)
   301  		t4, c0 = bits.Add64(u3, t4, c0)
   302  		c2, _ = bits.Add64(u4, c2, c0)
   303  
   304  		m := qInvNeg * t0
   305  
   306  		u0, c1 = bits.Mul64(m, q0)
   307  		_, c0 = bits.Add64(t0, c1, 0)
   308  		u1, c1 = bits.Mul64(m, q1)
   309  		t0, c0 = bits.Add64(t1, c1, c0)
   310  		u2, c1 = bits.Mul64(m, q2)
   311  		t1, c0 = bits.Add64(t2, c1, c0)
   312  		u3, c1 = bits.Mul64(m, q3)
   313  		t2, c0 = bits.Add64(t3, c1, c0)
   314  		u4, c1 = bits.Mul64(m, q4)
   315  
   316  		t3, c0 = bits.Add64(0, c1, c0)
   317  		u4, _ = bits.Add64(u4, 0, c0)
   318  		t0, c0 = bits.Add64(u0, t0, 0)
   319  		t1, c0 = bits.Add64(u1, t1, c0)
   320  		t2, c0 = bits.Add64(u2, t2, c0)
   321  		t3, c0 = bits.Add64(u3, t3, c0)
   322  		c2, _ = bits.Add64(c2, 0, c0)
   323  		t3, c0 = bits.Add64(t4, t3, 0)
   324  		t4, _ = bits.Add64(u4, c2, c0)
   325  
   326  	}
   327  	z[0] = t0
   328  	z[1] = t1
   329  	z[2] = t2
   330  	z[3] = t3
   331  	z[4] = t4
   332  
   333  	// if z ⩾ q → z -= q
   334  	if !z.smallerThanModulus() {
   335  		var b uint64
   336  		z[0], b = bits.Sub64(z[0], q0, 0)
   337  		z[1], b = bits.Sub64(z[1], q1, b)
   338  		z[2], b = bits.Sub64(z[2], q2, b)
   339  		z[3], b = bits.Sub64(z[3], q3, b)
   340  		z[4], _ = bits.Sub64(z[4], q4, b)
   341  	}
   342  	return z
   343  }
   344  
   345  // Square z = x * x (mod q)
   346  //
   347  // x must be less than q
   348  func (z *Element) Square(x *Element) *Element {
   349  	// see Mul for algorithm documentation
   350  
   351  	var t0, t1, t2, t3, t4 uint64
   352  	var u0, u1, u2, u3, u4 uint64
   353  	{
   354  		var c0, c1, c2 uint64
   355  		v := x[0]
   356  		u0, t0 = bits.Mul64(v, x[0])
   357  		u1, t1 = bits.Mul64(v, x[1])
   358  		u2, t2 = bits.Mul64(v, x[2])
   359  		u3, t3 = bits.Mul64(v, x[3])
   360  		u4, t4 = bits.Mul64(v, x[4])
   361  		t1, c0 = bits.Add64(u0, t1, 0)
   362  		t2, c0 = bits.Add64(u1, t2, c0)
   363  		t3, c0 = bits.Add64(u2, t3, c0)
   364  		t4, c0 = bits.Add64(u3, t4, c0)
   365  		c2, _ = bits.Add64(u4, 0, c0)
   366  
   367  		m := qInvNeg * t0
   368  
   369  		u0, c1 = bits.Mul64(m, q0)
   370  		_, c0 = bits.Add64(t0, c1, 0)
   371  		u1, c1 = bits.Mul64(m, q1)
   372  		t0, c0 = bits.Add64(t1, c1, c0)
   373  		u2, c1 = bits.Mul64(m, q2)
   374  		t1, c0 = bits.Add64(t2, c1, c0)
   375  		u3, c1 = bits.Mul64(m, q3)
   376  		t2, c0 = bits.Add64(t3, c1, c0)
   377  		u4, c1 = bits.Mul64(m, q4)
   378  
   379  		t3, c0 = bits.Add64(0, c1, c0)
   380  		u4, _ = bits.Add64(u4, 0, c0)
   381  		t0, c0 = bits.Add64(u0, t0, 0)
   382  		t1, c0 = bits.Add64(u1, t1, c0)
   383  		t2, c0 = bits.Add64(u2, t2, c0)
   384  		t3, c0 = bits.Add64(u3, t3, c0)
   385  		c2, _ = bits.Add64(c2, 0, c0)
   386  		t3, c0 = bits.Add64(t4, t3, 0)
   387  		t4, _ = bits.Add64(u4, c2, c0)
   388  
   389  	}
   390  	{
   391  		var c0, c1, c2 uint64
   392  		v := x[1]
   393  		u0, c1 = bits.Mul64(v, x[0])
   394  		t0, c0 = bits.Add64(c1, t0, 0)
   395  		u1, c1 = bits.Mul64(v, x[1])
   396  		t1, c0 = bits.Add64(c1, t1, c0)
   397  		u2, c1 = bits.Mul64(v, x[2])
   398  		t2, c0 = bits.Add64(c1, t2, c0)
   399  		u3, c1 = bits.Mul64(v, x[3])
   400  		t3, c0 = bits.Add64(c1, t3, c0)
   401  		u4, c1 = bits.Mul64(v, x[4])
   402  		t4, c0 = bits.Add64(c1, t4, c0)
   403  
   404  		c2, _ = bits.Add64(0, 0, c0)
   405  		t1, c0 = bits.Add64(u0, t1, 0)
   406  		t2, c0 = bits.Add64(u1, t2, c0)
   407  		t3, c0 = bits.Add64(u2, t3, c0)
   408  		t4, c0 = bits.Add64(u3, t4, c0)
   409  		c2, _ = bits.Add64(u4, c2, c0)
   410  
   411  		m := qInvNeg * t0
   412  
   413  		u0, c1 = bits.Mul64(m, q0)
   414  		_, c0 = bits.Add64(t0, c1, 0)
   415  		u1, c1 = bits.Mul64(m, q1)
   416  		t0, c0 = bits.Add64(t1, c1, c0)
   417  		u2, c1 = bits.Mul64(m, q2)
   418  		t1, c0 = bits.Add64(t2, c1, c0)
   419  		u3, c1 = bits.Mul64(m, q3)
   420  		t2, c0 = bits.Add64(t3, c1, c0)
   421  		u4, c1 = bits.Mul64(m, q4)
   422  
   423  		t3, c0 = bits.Add64(0, c1, c0)
   424  		u4, _ = bits.Add64(u4, 0, c0)
   425  		t0, c0 = bits.Add64(u0, t0, 0)
   426  		t1, c0 = bits.Add64(u1, t1, c0)
   427  		t2, c0 = bits.Add64(u2, t2, c0)
   428  		t3, c0 = bits.Add64(u3, t3, c0)
   429  		c2, _ = bits.Add64(c2, 0, c0)
   430  		t3, c0 = bits.Add64(t4, t3, 0)
   431  		t4, _ = bits.Add64(u4, c2, c0)
   432  
   433  	}
   434  	{
   435  		var c0, c1, c2 uint64
   436  		v := x[2]
   437  		u0, c1 = bits.Mul64(v, x[0])
   438  		t0, c0 = bits.Add64(c1, t0, 0)
   439  		u1, c1 = bits.Mul64(v, x[1])
   440  		t1, c0 = bits.Add64(c1, t1, c0)
   441  		u2, c1 = bits.Mul64(v, x[2])
   442  		t2, c0 = bits.Add64(c1, t2, c0)
   443  		u3, c1 = bits.Mul64(v, x[3])
   444  		t3, c0 = bits.Add64(c1, t3, c0)
   445  		u4, c1 = bits.Mul64(v, x[4])
   446  		t4, c0 = bits.Add64(c1, t4, c0)
   447  
   448  		c2, _ = bits.Add64(0, 0, c0)
   449  		t1, c0 = bits.Add64(u0, t1, 0)
   450  		t2, c0 = bits.Add64(u1, t2, c0)
   451  		t3, c0 = bits.Add64(u2, t3, c0)
   452  		t4, c0 = bits.Add64(u3, t4, c0)
   453  		c2, _ = bits.Add64(u4, c2, c0)
   454  
   455  		m := qInvNeg * t0
   456  
   457  		u0, c1 = bits.Mul64(m, q0)
   458  		_, c0 = bits.Add64(t0, c1, 0)
   459  		u1, c1 = bits.Mul64(m, q1)
   460  		t0, c0 = bits.Add64(t1, c1, c0)
   461  		u2, c1 = bits.Mul64(m, q2)
   462  		t1, c0 = bits.Add64(t2, c1, c0)
   463  		u3, c1 = bits.Mul64(m, q3)
   464  		t2, c0 = bits.Add64(t3, c1, c0)
   465  		u4, c1 = bits.Mul64(m, q4)
   466  
   467  		t3, c0 = bits.Add64(0, c1, c0)
   468  		u4, _ = bits.Add64(u4, 0, c0)
   469  		t0, c0 = bits.Add64(u0, t0, 0)
   470  		t1, c0 = bits.Add64(u1, t1, c0)
   471  		t2, c0 = bits.Add64(u2, t2, c0)
   472  		t3, c0 = bits.Add64(u3, t3, c0)
   473  		c2, _ = bits.Add64(c2, 0, c0)
   474  		t3, c0 = bits.Add64(t4, t3, 0)
   475  		t4, _ = bits.Add64(u4, c2, c0)
   476  
   477  	}
   478  	{
   479  		var c0, c1, c2 uint64
   480  		v := x[3]
   481  		u0, c1 = bits.Mul64(v, x[0])
   482  		t0, c0 = bits.Add64(c1, t0, 0)
   483  		u1, c1 = bits.Mul64(v, x[1])
   484  		t1, c0 = bits.Add64(c1, t1, c0)
   485  		u2, c1 = bits.Mul64(v, x[2])
   486  		t2, c0 = bits.Add64(c1, t2, c0)
   487  		u3, c1 = bits.Mul64(v, x[3])
   488  		t3, c0 = bits.Add64(c1, t3, c0)
   489  		u4, c1 = bits.Mul64(v, x[4])
   490  		t4, c0 = bits.Add64(c1, t4, c0)
   491  
   492  		c2, _ = bits.Add64(0, 0, c0)
   493  		t1, c0 = bits.Add64(u0, t1, 0)
   494  		t2, c0 = bits.Add64(u1, t2, c0)
   495  		t3, c0 = bits.Add64(u2, t3, c0)
   496  		t4, c0 = bits.Add64(u3, t4, c0)
   497  		c2, _ = bits.Add64(u4, c2, c0)
   498  
   499  		m := qInvNeg * t0
   500  
   501  		u0, c1 = bits.Mul64(m, q0)
   502  		_, c0 = bits.Add64(t0, c1, 0)
   503  		u1, c1 = bits.Mul64(m, q1)
   504  		t0, c0 = bits.Add64(t1, c1, c0)
   505  		u2, c1 = bits.Mul64(m, q2)
   506  		t1, c0 = bits.Add64(t2, c1, c0)
   507  		u3, c1 = bits.Mul64(m, q3)
   508  		t2, c0 = bits.Add64(t3, c1, c0)
   509  		u4, c1 = bits.Mul64(m, q4)
   510  
   511  		t3, c0 = bits.Add64(0, c1, c0)
   512  		u4, _ = bits.Add64(u4, 0, c0)
   513  		t0, c0 = bits.Add64(u0, t0, 0)
   514  		t1, c0 = bits.Add64(u1, t1, c0)
   515  		t2, c0 = bits.Add64(u2, t2, c0)
   516  		t3, c0 = bits.Add64(u3, t3, c0)
   517  		c2, _ = bits.Add64(c2, 0, c0)
   518  		t3, c0 = bits.Add64(t4, t3, 0)
   519  		t4, _ = bits.Add64(u4, c2, c0)
   520  
   521  	}
   522  	{
   523  		var c0, c1, c2 uint64
   524  		v := x[4]
   525  		u0, c1 = bits.Mul64(v, x[0])
   526  		t0, c0 = bits.Add64(c1, t0, 0)
   527  		u1, c1 = bits.Mul64(v, x[1])
   528  		t1, c0 = bits.Add64(c1, t1, c0)
   529  		u2, c1 = bits.Mul64(v, x[2])
   530  		t2, c0 = bits.Add64(c1, t2, c0)
   531  		u3, c1 = bits.Mul64(v, x[3])
   532  		t3, c0 = bits.Add64(c1, t3, c0)
   533  		u4, c1 = bits.Mul64(v, x[4])
   534  		t4, c0 = bits.Add64(c1, t4, c0)
   535  
   536  		c2, _ = bits.Add64(0, 0, c0)
   537  		t1, c0 = bits.Add64(u0, t1, 0)
   538  		t2, c0 = bits.Add64(u1, t2, c0)
   539  		t3, c0 = bits.Add64(u2, t3, c0)
   540  		t4, c0 = bits.Add64(u3, t4, c0)
   541  		c2, _ = bits.Add64(u4, c2, c0)
   542  
   543  		m := qInvNeg * t0
   544  
   545  		u0, c1 = bits.Mul64(m, q0)
   546  		_, c0 = bits.Add64(t0, c1, 0)
   547  		u1, c1 = bits.Mul64(m, q1)
   548  		t0, c0 = bits.Add64(t1, c1, c0)
   549  		u2, c1 = bits.Mul64(m, q2)
   550  		t1, c0 = bits.Add64(t2, c1, c0)
   551  		u3, c1 = bits.Mul64(m, q3)
   552  		t2, c0 = bits.Add64(t3, c1, c0)
   553  		u4, c1 = bits.Mul64(m, q4)
   554  
   555  		t3, c0 = bits.Add64(0, c1, c0)
   556  		u4, _ = bits.Add64(u4, 0, c0)
   557  		t0, c0 = bits.Add64(u0, t0, 0)
   558  		t1, c0 = bits.Add64(u1, t1, c0)
   559  		t2, c0 = bits.Add64(u2, t2, c0)
   560  		t3, c0 = bits.Add64(u3, t3, c0)
   561  		c2, _ = bits.Add64(c2, 0, c0)
   562  		t3, c0 = bits.Add64(t4, t3, 0)
   563  		t4, _ = bits.Add64(u4, c2, c0)
   564  
   565  	}
   566  	z[0] = t0
   567  	z[1] = t1
   568  	z[2] = t2
   569  	z[3] = t3
   570  	z[4] = t4
   571  
   572  	// if z ⩾ q → z -= q
   573  	if !z.smallerThanModulus() {
   574  		var b uint64
   575  		z[0], b = bits.Sub64(z[0], q0, 0)
   576  		z[1], b = bits.Sub64(z[1], q1, b)
   577  		z[2], b = bits.Sub64(z[2], q2, b)
   578  		z[3], b = bits.Sub64(z[3], q3, b)
   579  		z[4], _ = bits.Sub64(z[4], q4, b)
   580  	}
   581  	return z
   582  }