github.com/consensys/gnark-crypto@v0.14.0/ecc/bls24-315/fp/element.go

github.com/consensys/gnark-crypto@v0.14.0/ecc/bls24-315/fp/element.go (about)

     1  // Copyright 2020 ConsenSys Software Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Code generated by consensys/gnark-crypto DO NOT EDIT
    16  
    17  package fp
    18  
    19  import (
    20  	"crypto/rand"
    21  	"encoding/binary"
    22  	"errors"
    23  	"io"
    24  	"math/big"
    25  	"math/bits"
    26  	"reflect"
    27  	"strconv"
    28  	"strings"
    29  
    30  	"github.com/bits-and-blooms/bitset"
    31  	"github.com/consensys/gnark-crypto/field/hash"
    32  	"github.com/consensys/gnark-crypto/field/pool"
    33  )
    34  
    35  // Element represents a field element stored on 5 words (uint64)
    36  //
    37  // Element are assumed to be in Montgomery form in all methods.
    38  //
    39  // Modulus q =
    40  //
    41  //	q[base10] = 39705142709513438335025689890408969744933502416914749335064285505637884093126342347073617133569
    42  //	q[base16] = 0x4c23a02b586d650d3f7498be97c5eafdec1d01aa27a1ae0421ee5da52bde5026fe802ff40300001
    43  //
    44  // # Warning
    45  //
    46  // This code has not been audited and is provided as-is. In particular, there is no security guarantees such as constant time implementation or side-channel attack resistance.
    47  type Element [5]uint64
    48  
    49  const (
    50  	Limbs = 5   // number of 64 bits words needed to represent a Element
    51  	Bits  = 315 // number of bits needed to represent a Element
    52  	Bytes = 40  // number of bytes needed to represent a Element
    53  )
    54  
    55  // Field modulus q
    56  const (
    57  	q0 uint64 = 8063698428123676673
    58  	q1 uint64 = 4764498181658371330
    59  	q2 uint64 = 16051339359738796768
    60  	q3 uint64 = 15273757526516850351
    61  	q4 uint64 = 342900304943437392
    62  )
    63  
    64  var qElement = Element{
    65  	q0,
    66  	q1,
    67  	q2,
    68  	q3,
    69  	q4,
    70  }
    71  
    72  var _modulus big.Int // q stored as big.Int
    73  
    74  // Modulus returns q as a big.Int
    75  //
    76  //	q[base10] = 39705142709513438335025689890408969744933502416914749335064285505637884093126342347073617133569
    77  //	q[base16] = 0x4c23a02b586d650d3f7498be97c5eafdec1d01aa27a1ae0421ee5da52bde5026fe802ff40300001
    78  func Modulus() *big.Int {
    79  	return new(big.Int).Set(&_modulus)
    80  }
    81  
    82  // q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r
    83  // used for Montgomery reduction
    84  const qInvNeg uint64 = 8083954730842193919
    85  
    86  func init() {
    87  	_modulus.SetString("4c23a02b586d650d3f7498be97c5eafdec1d01aa27a1ae0421ee5da52bde5026fe802ff40300001", 16)
    88  }
    89  
    90  // NewElement returns a new Element from a uint64 value
    91  //
    92  // it is equivalent to
    93  //
    94  //	var v Element
    95  //	v.SetUint64(...)
    96  func NewElement(v uint64) Element {
    97  	z := Element{v}
    98  	z.Mul(&z, &rSquare)
    99  	return z
   100  }
   101  
   102  // SetUint64 sets z to v and returns z
   103  func (z *Element) SetUint64(v uint64) *Element {
   104  	//  sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
   105  	*z = Element{v}
   106  	return z.Mul(z, &rSquare) // z.toMont()
   107  }
   108  
   109  // SetInt64 sets z to v and returns z
   110  func (z *Element) SetInt64(v int64) *Element {
   111  
   112  	// absolute value of v
   113  	m := v >> 63
   114  	z.SetUint64(uint64((v ^ m) - m))
   115  
   116  	if m != 0 {
   117  		// v is negative
   118  		z.Neg(z)
   119  	}
   120  
   121  	return z
   122  }
   123  
   124  // Set z = x and returns z
   125  func (z *Element) Set(x *Element) *Element {
   126  	z[0] = x[0]
   127  	z[1] = x[1]
   128  	z[2] = x[2]
   129  	z[3] = x[3]
   130  	z[4] = x[4]
   131  	return z
   132  }
   133  
   134  // SetInterface converts provided interface into Element
   135  // returns an error if provided type is not supported
   136  // supported types:
   137  //
   138  //	Element
   139  //	*Element
   140  //	uint64
   141  //	int
   142  //	string (see SetString for valid formats)
   143  //	*big.Int
   144  //	big.Int
   145  //	[]byte
   146  func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
   147  	if i1 == nil {
   148  		return nil, errors.New("can't set fp.Element with <nil>")
   149  	}
   150  
   151  	switch c1 := i1.(type) {
   152  	case Element:
   153  		return z.Set(&c1), nil
   154  	case *Element:
   155  		if c1 == nil {
   156  			return nil, errors.New("can't set fp.Element with <nil>")
   157  		}
   158  		return z.Set(c1), nil
   159  	case uint8:
   160  		return z.SetUint64(uint64(c1)), nil
   161  	case uint16:
   162  		return z.SetUint64(uint64(c1)), nil
   163  	case uint32:
   164  		return z.SetUint64(uint64(c1)), nil
   165  	case uint:
   166  		return z.SetUint64(uint64(c1)), nil
   167  	case uint64:
   168  		return z.SetUint64(c1), nil
   169  	case int8:
   170  		return z.SetInt64(int64(c1)), nil
   171  	case int16:
   172  		return z.SetInt64(int64(c1)), nil
   173  	case int32:
   174  		return z.SetInt64(int64(c1)), nil
   175  	case int64:
   176  		return z.SetInt64(c1), nil
   177  	case int:
   178  		return z.SetInt64(int64(c1)), nil
   179  	case string:
   180  		return z.SetString(c1)
   181  	case *big.Int:
   182  		if c1 == nil {
   183  			return nil, errors.New("can't set fp.Element with <nil>")
   184  		}
   185  		return z.SetBigInt(c1), nil
   186  	case big.Int:
   187  		return z.SetBigInt(&c1), nil
   188  	case []byte:
   189  		return z.SetBytes(c1), nil
   190  	default:
   191  		return nil, errors.New("can't set fp.Element from type " + reflect.TypeOf(i1).String())
   192  	}
   193  }
   194  
   195  // SetZero z = 0
   196  func (z *Element) SetZero() *Element {
   197  	z[0] = 0
   198  	z[1] = 0
   199  	z[2] = 0
   200  	z[3] = 0
   201  	z[4] = 0
   202  	return z
   203  }
   204  
   205  // SetOne z = 1 (in Montgomery form)
   206  func (z *Element) SetOne() *Element {
   207  	z[0] = 15345841078474375115
   208  	z[1] = 5736013404040042110
   209  	z[2] = 16275985398192697234
   210  	z[3] = 2147590337827202454
   211  	z[4] = 273027911707369796
   212  	return z
   213  }
   214  
   215  // Div z = x*y⁻¹ (mod q)
   216  func (z *Element) Div(x, y *Element) *Element {
   217  	var yInv Element
   218  	yInv.Inverse(y)
   219  	z.Mul(x, &yInv)
   220  	return z
   221  }
   222  
   223  // Equal returns z == x; constant-time
   224  func (z *Element) Equal(x *Element) bool {
   225  	return z.NotEqual(x) == 0
   226  }
   227  
   228  // NotEqual returns 0 if and only if z == x; constant-time
   229  func (z *Element) NotEqual(x *Element) uint64 {
   230  	return (z[4] ^ x[4]) | (z[3] ^ x[3]) | (z[2] ^ x[2]) | (z[1] ^ x[1]) | (z[0] ^ x[0])
   231  }
   232  
   233  // IsZero returns z == 0
   234  func (z *Element) IsZero() bool {
   235  	return (z[4] | z[3] | z[2] | z[1] | z[0]) == 0
   236  }
   237  
   238  // IsOne returns z == 1
   239  func (z *Element) IsOne() bool {
   240  	return ((z[4] ^ 273027911707369796) | (z[3] ^ 2147590337827202454) | (z[2] ^ 16275985398192697234) | (z[1] ^ 5736013404040042110) | (z[0] ^ 15345841078474375115)) == 0
   241  }
   242  
   243  // IsUint64 reports whether z can be represented as an uint64.
   244  func (z *Element) IsUint64() bool {
   245  	zz := *z
   246  	zz.fromMont()
   247  	return zz.FitsOnOneWord()
   248  }
   249  
   250  // Uint64 returns the uint64 representation of x. If x cannot be represented in a uint64, the result is undefined.
   251  func (z *Element) Uint64() uint64 {
   252  	return z.Bits()[0]
   253  }
   254  
   255  // FitsOnOneWord reports whether z words (except the least significant word) are 0
   256  //
   257  // It is the responsibility of the caller to convert from Montgomery to Regular form if needed.
   258  func (z *Element) FitsOnOneWord() bool {
   259  	return (z[4] | z[3] | z[2] | z[1]) == 0
   260  }
   261  
   262  // Cmp compares (lexicographic order) z and x and returns:
   263  //
   264  //	-1 if z <  x
   265  //	 0 if z == x
   266  //	+1 if z >  x
   267  func (z *Element) Cmp(x *Element) int {
   268  	_z := z.Bits()
   269  	_x := x.Bits()
   270  	if _z[4] > _x[4] {
   271  		return 1
   272  	} else if _z[4] < _x[4] {
   273  		return -1
   274  	}
   275  	if _z[3] > _x[3] {
   276  		return 1
   277  	} else if _z[3] < _x[3] {
   278  		return -1
   279  	}
   280  	if _z[2] > _x[2] {
   281  		return 1
   282  	} else if _z[2] < _x[2] {
   283  		return -1
   284  	}
   285  	if _z[1] > _x[1] {
   286  		return 1
   287  	} else if _z[1] < _x[1] {
   288  		return -1
   289  	}
   290  	if _z[0] > _x[0] {
   291  		return 1
   292  	} else if _z[0] < _x[0] {
   293  		return -1
   294  	}
   295  	return 0
   296  }
   297  
   298  // LexicographicallyLargest returns true if this element is strictly lexicographically
   299  // larger than its negation, false otherwise
   300  func (z *Element) LexicographicallyLargest() bool {
   301  	// adapted from github.com/zkcrypto/bls12_381
   302  	// we check if the element is larger than (q-1) / 2
   303  	// if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2
   304  
   305  	_z := z.Bits()
   306  
   307  	var b uint64
   308  	_, b = bits.Sub64(_z[0], 4031849214061838337, 0)
   309  	_, b = bits.Sub64(_z[1], 2382249090829185665, b)
   310  	_, b = bits.Sub64(_z[2], 17249041716724174192, b)
   311  	_, b = bits.Sub64(_z[3], 7636878763258425175, b)
   312  	_, b = bits.Sub64(_z[4], 171450152471718696, b)
   313  
   314  	return b == 0
   315  }
   316  
   317  // SetRandom sets z to a uniform random value in [0, q).
   318  //
   319  // This might error only if reading from crypto/rand.Reader errors,
   320  // in which case, value of z is undefined.
   321  func (z *Element) SetRandom() (*Element, error) {
   322  	// this code is generated for all modulus
   323  	// and derived from go/src/crypto/rand/util.go
   324  
   325  	// l is number of limbs * 8; the number of bytes needed to reconstruct 5 uint64
   326  	const l = 40
   327  
   328  	// bitLen is the maximum bit length needed to encode a value < q.
   329  	const bitLen = 315
   330  
   331  	// k is the maximum byte length needed to encode a value < q.
   332  	const k = (bitLen + 7) / 8
   333  
   334  	// b is the number of bits in the most significant byte of q-1.
   335  	b := uint(bitLen % 8)
   336  	if b == 0 {
   337  		b = 8
   338  	}
   339  
   340  	var bytes [l]byte
   341  
   342  	for {
   343  		// note that bytes[k:l] is always 0
   344  		if _, err := io.ReadFull(rand.Reader, bytes[:k]); err != nil {
   345  			return nil, err
   346  		}
   347  
   348  		// Clear unused bits in in the most significant byte to increase probability
   349  		// that the candidate is < q.
   350  		bytes[k-1] &= uint8(int(1<<b) - 1)
   351  		z[0] = binary.LittleEndian.Uint64(bytes[0:8])
   352  		z[1] = binary.LittleEndian.Uint64(bytes[8:16])
   353  		z[2] = binary.LittleEndian.Uint64(bytes[16:24])
   354  		z[3] = binary.LittleEndian.Uint64(bytes[24:32])
   355  		z[4] = binary.LittleEndian.Uint64(bytes[32:40])
   356  
   357  		if !z.smallerThanModulus() {
   358  			continue // ignore the candidate and re-sample
   359  		}
   360  
   361  		return z, nil
   362  	}
   363  }
   364  
   365  // smallerThanModulus returns true if z < q
   366  // This is not constant time
   367  func (z *Element) smallerThanModulus() bool {
   368  	return (z[4] < q4 || (z[4] == q4 && (z[3] < q3 || (z[3] == q3 && (z[2] < q2 || (z[2] == q2 && (z[1] < q1 || (z[1] == q1 && (z[0] < q0)))))))))
   369  }
   370  
   371  // One returns 1
   372  func One() Element {
   373  	var one Element
   374  	one.SetOne()
   375  	return one
   376  }
   377  
   378  // Halve sets z to z / 2 (mod q)
   379  func (z *Element) Halve() {
   380  	var carry uint64
   381  
   382  	if z[0]&1 == 1 {
   383  		// z = z + q
   384  		z[0], carry = bits.Add64(z[0], q0, 0)
   385  		z[1], carry = bits.Add64(z[1], q1, carry)
   386  		z[2], carry = bits.Add64(z[2], q2, carry)
   387  		z[3], carry = bits.Add64(z[3], q3, carry)
   388  		z[4], _ = bits.Add64(z[4], q4, carry)
   389  
   390  	}
   391  	// z = z >> 1
   392  	z[0] = z[0]>>1 | z[1]<<63
   393  	z[1] = z[1]>>1 | z[2]<<63
   394  	z[2] = z[2]>>1 | z[3]<<63
   395  	z[3] = z[3]>>1 | z[4]<<63
   396  	z[4] >>= 1
   397  
   398  }
   399  
   400  // fromMont converts z in place (i.e. mutates) from Montgomery to regular representation
   401  // sets and returns z = z * 1
   402  func (z *Element) fromMont() *Element {
   403  	fromMont(z)
   404  	return z
   405  }
   406  
   407  // Add z = x + y (mod q)
   408  func (z *Element) Add(x, y *Element) *Element {
   409  
   410  	var carry uint64
   411  	z[0], carry = bits.Add64(x[0], y[0], 0)
   412  	z[1], carry = bits.Add64(x[1], y[1], carry)
   413  	z[2], carry = bits.Add64(x[2], y[2], carry)
   414  	z[3], carry = bits.Add64(x[3], y[3], carry)
   415  	z[4], _ = bits.Add64(x[4], y[4], carry)
   416  
   417  	// if z ⩾ q → z -= q
   418  	if !z.smallerThanModulus() {
   419  		var b uint64
   420  		z[0], b = bits.Sub64(z[0], q0, 0)
   421  		z[1], b = bits.Sub64(z[1], q1, b)
   422  		z[2], b = bits.Sub64(z[2], q2, b)
   423  		z[3], b = bits.Sub64(z[3], q3, b)
   424  		z[4], _ = bits.Sub64(z[4], q4, b)
   425  	}
   426  	return z
   427  }
   428  
   429  // Double z = x + x (mod q), aka Lsh 1
   430  func (z *Element) Double(x *Element) *Element {
   431  
   432  	var carry uint64
   433  	z[0], carry = bits.Add64(x[0], x[0], 0)
   434  	z[1], carry = bits.Add64(x[1], x[1], carry)
   435  	z[2], carry = bits.Add64(x[2], x[2], carry)
   436  	z[3], carry = bits.Add64(x[3], x[3], carry)
   437  	z[4], _ = bits.Add64(x[4], x[4], carry)
   438  
   439  	// if z ⩾ q → z -= q
   440  	if !z.smallerThanModulus() {
   441  		var b uint64
   442  		z[0], b = bits.Sub64(z[0], q0, 0)
   443  		z[1], b = bits.Sub64(z[1], q1, b)
   444  		z[2], b = bits.Sub64(z[2], q2, b)
   445  		z[3], b = bits.Sub64(z[3], q3, b)
   446  		z[4], _ = bits.Sub64(z[4], q4, b)
   447  	}
   448  	return z
   449  }
   450  
   451  // Sub z = x - y (mod q)
   452  func (z *Element) Sub(x, y *Element) *Element {
   453  	var b uint64
   454  	z[0], b = bits.Sub64(x[0], y[0], 0)
   455  	z[1], b = bits.Sub64(x[1], y[1], b)
   456  	z[2], b = bits.Sub64(x[2], y[2], b)
   457  	z[3], b = bits.Sub64(x[3], y[3], b)
   458  	z[4], b = bits.Sub64(x[4], y[4], b)
   459  	if b != 0 {
   460  		var c uint64
   461  		z[0], c = bits.Add64(z[0], q0, 0)
   462  		z[1], c = bits.Add64(z[1], q1, c)
   463  		z[2], c = bits.Add64(z[2], q2, c)
   464  		z[3], c = bits.Add64(z[3], q3, c)
   465  		z[4], _ = bits.Add64(z[4], q4, c)
   466  	}
   467  	return z
   468  }
   469  
   470  // Neg z = q - x
   471  func (z *Element) Neg(x *Element) *Element {
   472  	if x.IsZero() {
   473  		z.SetZero()
   474  		return z
   475  	}
   476  	var borrow uint64
   477  	z[0], borrow = bits.Sub64(q0, x[0], 0)
   478  	z[1], borrow = bits.Sub64(q1, x[1], borrow)
   479  	z[2], borrow = bits.Sub64(q2, x[2], borrow)
   480  	z[3], borrow = bits.Sub64(q3, x[3], borrow)
   481  	z[4], _ = bits.Sub64(q4, x[4], borrow)
   482  	return z
   483  }
   484  
   485  // Select is a constant-time conditional move.
   486  // If c=0, z = x0. Else z = x1
   487  func (z *Element) Select(c int, x0 *Element, x1 *Element) *Element {
   488  	cC := uint64((int64(c) | -int64(c)) >> 63) // "canonicized" into: 0 if c=0, -1 otherwise
   489  	z[0] = x0[0] ^ cC&(x0[0]^x1[0])
   490  	z[1] = x0[1] ^ cC&(x0[1]^x1[1])
   491  	z[2] = x0[2] ^ cC&(x0[2]^x1[2])
   492  	z[3] = x0[3] ^ cC&(x0[3]^x1[3])
   493  	z[4] = x0[4] ^ cC&(x0[4]^x1[4])
   494  	return z
   495  }
   496  
   497  // _mulGeneric is unoptimized textbook CIOS
   498  // it is a fallback solution on x86 when ADX instruction set is not available
   499  // and is used for testing purposes.
   500  func _mulGeneric(z, x, y *Element) {
   501  
   502  	// Implements CIOS multiplication -- section 2.3.2 of Tolga Acar's thesis
   503  	// https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf
   504  	//
   505  	// The algorithm:
   506  	//
   507  	// for i=0 to N-1
   508  	// 		C := 0
   509  	// 		for j=0 to N-1
   510  	// 			(C,t[j]) := t[j] + x[j]*y[i] + C
   511  	// 		(t[N+1],t[N]) := t[N] + C
   512  	//
   513  	// 		C := 0
   514  	// 		m := t[0]*q'[0] mod D
   515  	// 		(C,_) := t[0] + m*q[0]
   516  	// 		for j=1 to N-1
   517  	// 			(C,t[j-1]) := t[j] + m*q[j] + C
   518  	//
   519  	// 		(C,t[N-1]) := t[N] + C
   520  	// 		t[N] := t[N+1] + C
   521  	//
   522  	// → N is the number of machine words needed to store the modulus q
   523  	// → D is the word size. For example, on a 64-bit architecture D is 2	64
   524  	// → x[i], y[i], q[i] is the ith word of the numbers x,y,q
   525  	// → q'[0] is the lowest word of the number -q⁻¹ mod r. This quantity is pre-computed, as it does not depend on the inputs.
   526  	// → t is a temporary array of size N+2
   527  	// → C, S are machine words. A pair (C,S) refers to (hi-bits, lo-bits) of a two-word number
   528  
   529  	var t [6]uint64
   530  	var D uint64
   531  	var m, C uint64
   532  	// -----------------------------------
   533  	// First loop
   534  
   535  	C, t[0] = bits.Mul64(y[0], x[0])
   536  	C, t[1] = madd1(y[0], x[1], C)
   537  	C, t[2] = madd1(y[0], x[2], C)
   538  	C, t[3] = madd1(y[0], x[3], C)
   539  	C, t[4] = madd1(y[0], x[4], C)
   540  
   541  	t[5], D = bits.Add64(t[5], C, 0)
   542  
   543  	// m = t[0]n'[0] mod W
   544  	m = t[0] * qInvNeg
   545  
   546  	// -----------------------------------
   547  	// Second loop
   548  	C = madd0(m, q0, t[0])
   549  	C, t[0] = madd2(m, q1, t[1], C)
   550  	C, t[1] = madd2(m, q2, t[2], C)
   551  	C, t[2] = madd2(m, q3, t[3], C)
   552  	C, t[3] = madd2(m, q4, t[4], C)
   553  
   554  	t[4], C = bits.Add64(t[5], C, 0)
   555  	t[5], _ = bits.Add64(0, D, C)
   556  	// -----------------------------------
   557  	// First loop
   558  
   559  	C, t[0] = madd1(y[1], x[0], t[0])
   560  	C, t[1] = madd2(y[1], x[1], t[1], C)
   561  	C, t[2] = madd2(y[1], x[2], t[2], C)
   562  	C, t[3] = madd2(y[1], x[3], t[3], C)
   563  	C, t[4] = madd2(y[1], x[4], t[4], C)
   564  
   565  	t[5], D = bits.Add64(t[5], C, 0)
   566  
   567  	// m = t[0]n'[0] mod W
   568  	m = t[0] * qInvNeg
   569  
   570  	// -----------------------------------
   571  	// Second loop
   572  	C = madd0(m, q0, t[0])
   573  	C, t[0] = madd2(m, q1, t[1], C)
   574  	C, t[1] = madd2(m, q2, t[2], C)
   575  	C, t[2] = madd2(m, q3, t[3], C)
   576  	C, t[3] = madd2(m, q4, t[4], C)
   577  
   578  	t[4], C = bits.Add64(t[5], C, 0)
   579  	t[5], _ = bits.Add64(0, D, C)
   580  	// -----------------------------------
   581  	// First loop
   582  
   583  	C, t[0] = madd1(y[2], x[0], t[0])
   584  	C, t[1] = madd2(y[2], x[1], t[1], C)
   585  	C, t[2] = madd2(y[2], x[2], t[2], C)
   586  	C, t[3] = madd2(y[2], x[3], t[3], C)
   587  	C, t[4] = madd2(y[2], x[4], t[4], C)
   588  
   589  	t[5], D = bits.Add64(t[5], C, 0)
   590  
   591  	// m = t[0]n'[0] mod W
   592  	m = t[0] * qInvNeg
   593  
   594  	// -----------------------------------
   595  	// Second loop
   596  	C = madd0(m, q0, t[0])
   597  	C, t[0] = madd2(m, q1, t[1], C)
   598  	C, t[1] = madd2(m, q2, t[2], C)
   599  	C, t[2] = madd2(m, q3, t[3], C)
   600  	C, t[3] = madd2(m, q4, t[4], C)
   601  
   602  	t[4], C = bits.Add64(t[5], C, 0)
   603  	t[5], _ = bits.Add64(0, D, C)
   604  	// -----------------------------------
   605  	// First loop
   606  
   607  	C, t[0] = madd1(y[3], x[0], t[0])
   608  	C, t[1] = madd2(y[3], x[1], t[1], C)
   609  	C, t[2] = madd2(y[3], x[2], t[2], C)
   610  	C, t[3] = madd2(y[3], x[3], t[3], C)
   611  	C, t[4] = madd2(y[3], x[4], t[4], C)
   612  
   613  	t[5], D = bits.Add64(t[5], C, 0)
   614  
   615  	// m = t[0]n'[0] mod W
   616  	m = t[0] * qInvNeg
   617  
   618  	// -----------------------------------
   619  	// Second loop
   620  	C = madd0(m, q0, t[0])
   621  	C, t[0] = madd2(m, q1, t[1], C)
   622  	C, t[1] = madd2(m, q2, t[2], C)
   623  	C, t[2] = madd2(m, q3, t[3], C)
   624  	C, t[3] = madd2(m, q4, t[4], C)
   625  
   626  	t[4], C = bits.Add64(t[5], C, 0)
   627  	t[5], _ = bits.Add64(0, D, C)
   628  	// -----------------------------------
   629  	// First loop
   630  
   631  	C, t[0] = madd1(y[4], x[0], t[0])
   632  	C, t[1] = madd2(y[4], x[1], t[1], C)
   633  	C, t[2] = madd2(y[4], x[2], t[2], C)
   634  	C, t[3] = madd2(y[4], x[3], t[3], C)
   635  	C, t[4] = madd2(y[4], x[4], t[4], C)
   636  
   637  	t[5], D = bits.Add64(t[5], C, 0)
   638  
   639  	// m = t[0]n'[0] mod W
   640  	m = t[0] * qInvNeg
   641  
   642  	// -----------------------------------
   643  	// Second loop
   644  	C = madd0(m, q0, t[0])
   645  	C, t[0] = madd2(m, q1, t[1], C)
   646  	C, t[1] = madd2(m, q2, t[2], C)
   647  	C, t[2] = madd2(m, q3, t[3], C)
   648  	C, t[3] = madd2(m, q4, t[4], C)
   649  
   650  	t[4], C = bits.Add64(t[5], C, 0)
   651  	t[5], _ = bits.Add64(0, D, C)
   652  
   653  	if t[5] != 0 {
   654  		// we need to reduce, we have a result on 6 words
   655  		var b uint64
   656  		z[0], b = bits.Sub64(t[0], q0, 0)
   657  		z[1], b = bits.Sub64(t[1], q1, b)
   658  		z[2], b = bits.Sub64(t[2], q2, b)
   659  		z[3], b = bits.Sub64(t[3], q3, b)
   660  		z[4], _ = bits.Sub64(t[4], q4, b)
   661  		return
   662  	}
   663  
   664  	// copy t into z
   665  	z[0] = t[0]
   666  	z[1] = t[1]
   667  	z[2] = t[2]
   668  	z[3] = t[3]
   669  	z[4] = t[4]
   670  
   671  	// if z ⩾ q → z -= q
   672  	if !z.smallerThanModulus() {
   673  		var b uint64
   674  		z[0], b = bits.Sub64(z[0], q0, 0)
   675  		z[1], b = bits.Sub64(z[1], q1, b)
   676  		z[2], b = bits.Sub64(z[2], q2, b)
   677  		z[3], b = bits.Sub64(z[3], q3, b)
   678  		z[4], _ = bits.Sub64(z[4], q4, b)
   679  	}
   680  }
   681  
   682  func _fromMontGeneric(z *Element) {
   683  	// the following lines implement z = z * 1
   684  	// with a modified CIOS montgomery multiplication
   685  	// see Mul for algorithm documentation
   686  	{
   687  		// m = z[0]n'[0] mod W
   688  		m := z[0] * qInvNeg
   689  		C := madd0(m, q0, z[0])
   690  		C, z[0] = madd2(m, q1, z[1], C)
   691  		C, z[1] = madd2(m, q2, z[2], C)
   692  		C, z[2] = madd2(m, q3, z[3], C)
   693  		C, z[3] = madd2(m, q4, z[4], C)
   694  		z[4] = C
   695  	}
   696  	{
   697  		// m = z[0]n'[0] mod W
   698  		m := z[0] * qInvNeg
   699  		C := madd0(m, q0, z[0])
   700  		C, z[0] = madd2(m, q1, z[1], C)
   701  		C, z[1] = madd2(m, q2, z[2], C)
   702  		C, z[2] = madd2(m, q3, z[3], C)
   703  		C, z[3] = madd2(m, q4, z[4], C)
   704  		z[4] = C
   705  	}
   706  	{
   707  		// m = z[0]n'[0] mod W
   708  		m := z[0] * qInvNeg
   709  		C := madd0(m, q0, z[0])
   710  		C, z[0] = madd2(m, q1, z[1], C)
   711  		C, z[1] = madd2(m, q2, z[2], C)
   712  		C, z[2] = madd2(m, q3, z[3], C)
   713  		C, z[3] = madd2(m, q4, z[4], C)
   714  		z[4] = C
   715  	}
   716  	{
   717  		// m = z[0]n'[0] mod W
   718  		m := z[0] * qInvNeg
   719  		C := madd0(m, q0, z[0])
   720  		C, z[0] = madd2(m, q1, z[1], C)
   721  		C, z[1] = madd2(m, q2, z[2], C)
   722  		C, z[2] = madd2(m, q3, z[3], C)
   723  		C, z[3] = madd2(m, q4, z[4], C)
   724  		z[4] = C
   725  	}
   726  	{
   727  		// m = z[0]n'[0] mod W
   728  		m := z[0] * qInvNeg
   729  		C := madd0(m, q0, z[0])
   730  		C, z[0] = madd2(m, q1, z[1], C)
   731  		C, z[1] = madd2(m, q2, z[2], C)
   732  		C, z[2] = madd2(m, q3, z[3], C)
   733  		C, z[3] = madd2(m, q4, z[4], C)
   734  		z[4] = C
   735  	}
   736  
   737  	// if z ⩾ q → z -= q
   738  	if !z.smallerThanModulus() {
   739  		var b uint64
   740  		z[0], b = bits.Sub64(z[0], q0, 0)
   741  		z[1], b = bits.Sub64(z[1], q1, b)
   742  		z[2], b = bits.Sub64(z[2], q2, b)
   743  		z[3], b = bits.Sub64(z[3], q3, b)
   744  		z[4], _ = bits.Sub64(z[4], q4, b)
   745  	}
   746  }
   747  
   748  func _reduceGeneric(z *Element) {
   749  
   750  	// if z ⩾ q → z -= q
   751  	if !z.smallerThanModulus() {
   752  		var b uint64
   753  		z[0], b = bits.Sub64(z[0], q0, 0)
   754  		z[1], b = bits.Sub64(z[1], q1, b)
   755  		z[2], b = bits.Sub64(z[2], q2, b)
   756  		z[3], b = bits.Sub64(z[3], q3, b)
   757  		z[4], _ = bits.Sub64(z[4], q4, b)
   758  	}
   759  }
   760  
   761  // BatchInvert returns a new slice with every element inverted.
   762  // Uses Montgomery batch inversion trick
   763  func BatchInvert(a []Element) []Element {
   764  	res := make([]Element, len(a))
   765  	if len(a) == 0 {
   766  		return res
   767  	}
   768  
   769  	zeroes := bitset.New(uint(len(a)))
   770  	accumulator := One()
   771  
   772  	for i := 0; i < len(a); i++ {
   773  		if a[i].IsZero() {
   774  			zeroes.Set(uint(i))
   775  			continue
   776  		}
   777  		res[i] = accumulator
   778  		accumulator.Mul(&accumulator, &a[i])
   779  	}
   780  
   781  	accumulator.Inverse(&accumulator)
   782  
   783  	for i := len(a) - 1; i >= 0; i-- {
   784  		if zeroes.Test(uint(i)) {
   785  			continue
   786  		}
   787  		res[i].Mul(&res[i], &accumulator)
   788  		accumulator.Mul(&accumulator, &a[i])
   789  	}
   790  
   791  	return res
   792  }
   793  
   794  func _butterflyGeneric(a, b *Element) {
   795  	t := *a
   796  	a.Add(a, b)
   797  	b.Sub(&t, b)
   798  }
   799  
   800  // BitLen returns the minimum number of bits needed to represent z
   801  // returns 0 if z == 0
   802  func (z *Element) BitLen() int {
   803  	if z[4] != 0 {
   804  		return 256 + bits.Len64(z[4])
   805  	}
   806  	if z[3] != 0 {
   807  		return 192 + bits.Len64(z[3])
   808  	}
   809  	if z[2] != 0 {
   810  		return 128 + bits.Len64(z[2])
   811  	}
   812  	if z[1] != 0 {
   813  		return 64 + bits.Len64(z[1])
   814  	}
   815  	return bits.Len64(z[0])
   816  }
   817  
   818  // Hash msg to count prime field elements.
   819  // https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-5.2
   820  func Hash(msg, dst []byte, count int) ([]Element, error) {
   821  	// 128 bits of security
   822  	// L = ceil((ceil(log2(p)) + k) / 8), where k is the security parameter = 128
   823  	const Bytes = 1 + (Bits-1)/8
   824  	const L = 16 + Bytes
   825  
   826  	lenInBytes := count * L
   827  	pseudoRandomBytes, err := hash.ExpandMsgXmd(msg, dst, lenInBytes)
   828  	if err != nil {
   829  		return nil, err
   830  	}
   831  
   832  	// get temporary big int from the pool
   833  	vv := pool.BigInt.Get()
   834  
   835  	res := make([]Element, count)
   836  	for i := 0; i < count; i++ {
   837  		vv.SetBytes(pseudoRandomBytes[i*L : (i+1)*L])
   838  		res[i].SetBigInt(vv)
   839  	}
   840  
   841  	// release object into pool
   842  	pool.BigInt.Put(vv)
   843  
   844  	return res, nil
   845  }
   846  
   847  // Exp z = xᵏ (mod q)
   848  func (z *Element) Exp(x Element, k *big.Int) *Element {
   849  	if k.IsUint64() && k.Uint64() == 0 {
   850  		return z.SetOne()
   851  	}
   852  
   853  	e := k
   854  	if k.Sign() == -1 {
   855  		// negative k, we invert
   856  		// if k < 0: xᵏ (mod q) == (x⁻¹)ᵏ (mod q)
   857  		x.Inverse(&x)
   858  
   859  		// we negate k in a temp big.Int since
   860  		// Int.Bit(_) of k and -k is different
   861  		e = pool.BigInt.Get()
   862  		defer pool.BigInt.Put(e)
   863  		e.Neg(k)
   864  	}
   865  
   866  	z.Set(&x)
   867  
   868  	for i := e.BitLen() - 2; i >= 0; i-- {
   869  		z.Square(z)
   870  		if e.Bit(i) == 1 {
   871  			z.Mul(z, &x)
   872  		}
   873  	}
   874  
   875  	return z
   876  }
   877  
   878  // rSquare where r is the Montgommery constant
   879  // see section 2.3.2 of Tolga Acar's thesis
   880  // https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf
   881  var rSquare = Element{
   882  	7746605402484284438,
   883  	6457291528853138485,
   884  	14067144135019420374,
   885  	14705958577488011058,
   886  	150264569250089173,
   887  }
   888  
   889  // toMont converts z to Montgomery form
   890  // sets and returns z = z * r²
   891  func (z *Element) toMont() *Element {
   892  	return z.Mul(z, &rSquare)
   893  }
   894  
   895  // String returns the decimal representation of z as generated by
   896  // z.Text(10).
   897  func (z *Element) String() string {
   898  	return z.Text(10)
   899  }
   900  
   901  // toBigInt returns z as a big.Int in Montgomery form
   902  func (z *Element) toBigInt(res *big.Int) *big.Int {
   903  	var b [Bytes]byte
   904  	binary.BigEndian.PutUint64(b[32:40], z[0])
   905  	binary.BigEndian.PutUint64(b[24:32], z[1])
   906  	binary.BigEndian.PutUint64(b[16:24], z[2])
   907  	binary.BigEndian.PutUint64(b[8:16], z[3])
   908  	binary.BigEndian.PutUint64(b[0:8], z[4])
   909  
   910  	return res.SetBytes(b[:])
   911  }
   912  
   913  // Text returns the string representation of z in the given base.
   914  // Base must be between 2 and 36, inclusive. The result uses the
   915  // lower-case letters 'a' to 'z' for digit values 10 to 35.
   916  // No prefix (such as "0x") is added to the string. If z is a nil
   917  // pointer it returns "<nil>".
   918  // If base == 10 and -z fits in a uint16 prefix "-" is added to the string.
   919  func (z *Element) Text(base int) string {
   920  	if base < 2 || base > 36 {
   921  		panic("invalid base")
   922  	}
   923  	if z == nil {
   924  		return "<nil>"
   925  	}
   926  
   927  	const maxUint16 = 65535
   928  	if base == 10 {
   929  		var zzNeg Element
   930  		zzNeg.Neg(z)
   931  		zzNeg.fromMont()
   932  		if zzNeg.FitsOnOneWord() && zzNeg[0] <= maxUint16 && zzNeg[0] != 0 {
   933  			return "-" + strconv.FormatUint(zzNeg[0], base)
   934  		}
   935  	}
   936  	zz := *z
   937  	zz.fromMont()
   938  	if zz.FitsOnOneWord() {
   939  		return strconv.FormatUint(zz[0], base)
   940  	}
   941  	vv := pool.BigInt.Get()
   942  	r := zz.toBigInt(vv).Text(base)
   943  	pool.BigInt.Put(vv)
   944  	return r
   945  }
   946  
   947  // BigInt sets and return z as a *big.Int
   948  func (z *Element) BigInt(res *big.Int) *big.Int {
   949  	_z := *z
   950  	_z.fromMont()
   951  	return _z.toBigInt(res)
   952  }
   953  
   954  // ToBigIntRegular returns z as a big.Int in regular form
   955  //
   956  // Deprecated: use BigInt(*big.Int) instead
   957  func (z Element) ToBigIntRegular(res *big.Int) *big.Int {
   958  	z.fromMont()
   959  	return z.toBigInt(res)
   960  }
   961  
   962  // Bits provides access to z by returning its value as a little-endian [5]uint64 array.
   963  // Bits is intended to support implementation of missing low-level Element
   964  // functionality outside this package; it should be avoided otherwise.
   965  func (z *Element) Bits() [5]uint64 {
   966  	_z := *z
   967  	fromMont(&_z)
   968  	return _z
   969  }
   970  
   971  // Bytes returns the value of z as a big-endian byte array
   972  func (z *Element) Bytes() (res [Bytes]byte) {
   973  	BigEndian.PutElement(&res, *z)
   974  	return
   975  }
   976  
   977  // Marshal returns the value of z as a big-endian byte slice
   978  func (z *Element) Marshal() []byte {
   979  	b := z.Bytes()
   980  	return b[:]
   981  }
   982  
   983  // Unmarshal is an alias for SetBytes, it sets z to the value of e.
   984  func (z *Element) Unmarshal(e []byte) {
   985  	z.SetBytes(e)
   986  }
   987  
   988  // SetBytes interprets e as the bytes of a big-endian unsigned integer,
   989  // sets z to that value, and returns z.
   990  func (z *Element) SetBytes(e []byte) *Element {
   991  	if len(e) == Bytes {
   992  		// fast path
   993  		v, err := BigEndian.Element((*[Bytes]byte)(e))
   994  		if err == nil {
   995  			*z = v
   996  			return z
   997  		}
   998  	}
   999  
  1000  	// slow path.
  1001  	// get a big int from our pool
  1002  	vv := pool.BigInt.Get()
  1003  	vv.SetBytes(e)
  1004  
  1005  	// set big int
  1006  	z.SetBigInt(vv)
  1007  
  1008  	// put temporary object back in pool
  1009  	pool.BigInt.Put(vv)
  1010  
  1011  	return z
  1012  }
  1013  
  1014  // SetBytesCanonical interprets e as the bytes of a big-endian 40-byte integer.
  1015  // If e is not a 40-byte slice or encodes a value higher than q,
  1016  // SetBytesCanonical returns an error.
  1017  func (z *Element) SetBytesCanonical(e []byte) error {
  1018  	if len(e) != Bytes {
  1019  		return errors.New("invalid fp.Element encoding")
  1020  	}
  1021  	v, err := BigEndian.Element((*[Bytes]byte)(e))
  1022  	if err != nil {
  1023  		return err
  1024  	}
  1025  	*z = v
  1026  	return nil
  1027  }
  1028  
  1029  // SetBigInt sets z to v and returns z
  1030  func (z *Element) SetBigInt(v *big.Int) *Element {
  1031  	z.SetZero()
  1032  
  1033  	var zero big.Int
  1034  
  1035  	// fast path
  1036  	c := v.Cmp(&_modulus)
  1037  	if c == 0 {
  1038  		// v == 0
  1039  		return z
  1040  	} else if c != 1 && v.Cmp(&zero) != -1 {
  1041  		// 0 < v < q
  1042  		return z.setBigInt(v)
  1043  	}
  1044  
  1045  	// get temporary big int from the pool
  1046  	vv := pool.BigInt.Get()
  1047  
  1048  	// copy input + modular reduction
  1049  	vv.Mod(v, &_modulus)
  1050  
  1051  	// set big int byte value
  1052  	z.setBigInt(vv)
  1053  
  1054  	// release object into pool
  1055  	pool.BigInt.Put(vv)
  1056  	return z
  1057  }
  1058  
  1059  // setBigInt assumes 0 ⩽ v < q
  1060  func (z *Element) setBigInt(v *big.Int) *Element {
  1061  	vBits := v.Bits()
  1062  
  1063  	if bits.UintSize == 64 {
  1064  		for i := 0; i < len(vBits); i++ {
  1065  			z[i] = uint64(vBits[i])
  1066  		}
  1067  	} else {
  1068  		for i := 0; i < len(vBits); i++ {
  1069  			if i%2 == 0 {
  1070  				z[i/2] = uint64(vBits[i])
  1071  			} else {
  1072  				z[i/2] |= uint64(vBits[i]) << 32
  1073  			}
  1074  		}
  1075  	}
  1076  
  1077  	return z.toMont()
  1078  }
  1079  
  1080  // SetString creates a big.Int with number and calls SetBigInt on z
  1081  //
  1082  // The number prefix determines the actual base: A prefix of
  1083  // ”0b” or ”0B” selects base 2, ”0”, ”0o” or ”0O” selects base 8,
  1084  // and ”0x” or ”0X” selects base 16. Otherwise, the selected base is 10
  1085  // and no prefix is accepted.
  1086  //
  1087  // For base 16, lower and upper case letters are considered the same:
  1088  // The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15.
  1089  //
  1090  // An underscore character ”_” may appear between a base
  1091  // prefix and an adjacent digit, and between successive digits; such
  1092  // underscores do not change the value of the number.
  1093  // Incorrect placement of underscores is reported as a panic if there
  1094  // are no other errors.
  1095  //
  1096  // If the number is invalid this method leaves z unchanged and returns nil, error.
  1097  func (z *Element) SetString(number string) (*Element, error) {
  1098  	// get temporary big int from the pool
  1099  	vv := pool.BigInt.Get()
  1100  
  1101  	if _, ok := vv.SetString(number, 0); !ok {
  1102  		return nil, errors.New("Element.SetString failed -> can't parse number into a big.Int " + number)
  1103  	}
  1104  
  1105  	z.SetBigInt(vv)
  1106  
  1107  	// release object into pool
  1108  	pool.BigInt.Put(vv)
  1109  
  1110  	return z, nil
  1111  }
  1112  
  1113  // MarshalJSON returns json encoding of z (z.Text(10))
  1114  // If z == nil, returns null
  1115  func (z *Element) MarshalJSON() ([]byte, error) {
  1116  	if z == nil {
  1117  		return []byte("null"), nil
  1118  	}
  1119  	const maxSafeBound = 15 // we encode it as number if it's small
  1120  	s := z.Text(10)
  1121  	if len(s) <= maxSafeBound {
  1122  		return []byte(s), nil
  1123  	}
  1124  	var sbb strings.Builder
  1125  	sbb.WriteByte('"')
  1126  	sbb.WriteString(s)
  1127  	sbb.WriteByte('"')
  1128  	return []byte(sbb.String()), nil
  1129  }
  1130  
  1131  // UnmarshalJSON accepts numbers and strings as input
  1132  // See Element.SetString for valid prefixes (0x, 0b, ...)
  1133  func (z *Element) UnmarshalJSON(data []byte) error {
  1134  	s := string(data)
  1135  	if len(s) > Bits*3 {
  1136  		return errors.New("value too large (max = Element.Bits * 3)")
  1137  	}
  1138  
  1139  	// we accept numbers and strings, remove leading and trailing quotes if any
  1140  	if len(s) > 0 && s[0] == '"' {
  1141  		s = s[1:]
  1142  	}
  1143  	if len(s) > 0 && s[len(s)-1] == '"' {
  1144  		s = s[:len(s)-1]
  1145  	}
  1146  
  1147  	// get temporary big int from the pool
  1148  	vv := pool.BigInt.Get()
  1149  
  1150  	if _, ok := vv.SetString(s, 0); !ok {
  1151  		return errors.New("can't parse into a big.Int: " + s)
  1152  	}
  1153  
  1154  	z.SetBigInt(vv)
  1155  
  1156  	// release object into pool
  1157  	pool.BigInt.Put(vv)
  1158  	return nil
  1159  }
  1160  
  1161  // A ByteOrder specifies how to convert byte slices into a Element
  1162  type ByteOrder interface {
  1163  	Element(*[Bytes]byte) (Element, error)
  1164  	PutElement(*[Bytes]byte, Element)
  1165  	String() string
  1166  }
  1167  
  1168  // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder.
  1169  var BigEndian bigEndian
  1170  
  1171  type bigEndian struct{}
  1172  
  1173  // Element interpret b is a big-endian 40-byte slice.
  1174  // If b encodes a value higher than q, Element returns error.
  1175  func (bigEndian) Element(b *[Bytes]byte) (Element, error) {
  1176  	var z Element
  1177  	z[0] = binary.BigEndian.Uint64((*b)[32:40])
  1178  	z[1] = binary.BigEndian.Uint64((*b)[24:32])
  1179  	z[2] = binary.BigEndian.Uint64((*b)[16:24])
  1180  	z[3] = binary.BigEndian.Uint64((*b)[8:16])
  1181  	z[4] = binary.BigEndian.Uint64((*b)[0:8])
  1182  
  1183  	if !z.smallerThanModulus() {
  1184  		return Element{}, errors.New("invalid fp.Element encoding")
  1185  	}
  1186  
  1187  	z.toMont()
  1188  	return z, nil
  1189  }
  1190  
  1191  func (bigEndian) PutElement(b *[Bytes]byte, e Element) {
  1192  	e.fromMont()
  1193  	binary.BigEndian.PutUint64((*b)[32:40], e[0])
  1194  	binary.BigEndian.PutUint64((*b)[24:32], e[1])
  1195  	binary.BigEndian.PutUint64((*b)[16:24], e[2])
  1196  	binary.BigEndian.PutUint64((*b)[8:16], e[3])
  1197  	binary.BigEndian.PutUint64((*b)[0:8], e[4])
  1198  }
  1199  
  1200  func (bigEndian) String() string { return "BigEndian" }
  1201  
  1202  // LittleEndian is the little-endian implementation of ByteOrder and AppendByteOrder.
  1203  var LittleEndian littleEndian
  1204  
  1205  type littleEndian struct{}
  1206  
  1207  func (littleEndian) Element(b *[Bytes]byte) (Element, error) {
  1208  	var z Element
  1209  	z[0] = binary.LittleEndian.Uint64((*b)[0:8])
  1210  	z[1] = binary.LittleEndian.Uint64((*b)[8:16])
  1211  	z[2] = binary.LittleEndian.Uint64((*b)[16:24])
  1212  	z[3] = binary.LittleEndian.Uint64((*b)[24:32])
  1213  	z[4] = binary.LittleEndian.Uint64((*b)[32:40])
  1214  
  1215  	if !z.smallerThanModulus() {
  1216  		return Element{}, errors.New("invalid fp.Element encoding")
  1217  	}
  1218  
  1219  	z.toMont()
  1220  	return z, nil
  1221  }
  1222  
  1223  func (littleEndian) PutElement(b *[Bytes]byte, e Element) {
  1224  	e.fromMont()
  1225  	binary.LittleEndian.PutUint64((*b)[0:8], e[0])
  1226  	binary.LittleEndian.PutUint64((*b)[8:16], e[1])
  1227  	binary.LittleEndian.PutUint64((*b)[16:24], e[2])
  1228  	binary.LittleEndian.PutUint64((*b)[24:32], e[3])
  1229  	binary.LittleEndian.PutUint64((*b)[32:40], e[4])
  1230  }
  1231  
  1232  func (littleEndian) String() string { return "LittleEndian" }
  1233  
  1234  // Legendre returns the Legendre symbol of z (either +1, -1, or 0.)
  1235  func (z *Element) Legendre() int {
  1236  	var l Element
  1237  	// z^((q-1)/2)
  1238  	l.expByLegendreExp(*z)
  1239  
  1240  	if l.IsZero() {
  1241  		return 0
  1242  	}
  1243  
  1244  	// if l == 1
  1245  	if l.IsOne() {
  1246  		return 1
  1247  	}
  1248  	return -1
  1249  }
  1250  
  1251  // Sqrt z = √x (mod q)
  1252  // if the square root doesn't exist (x is not a square mod q)
  1253  // Sqrt leaves z unchanged and returns nil
  1254  func (z *Element) Sqrt(x *Element) *Element {
  1255  	// q ≡ 1 (mod 4)
  1256  	// see modSqrtTonelliShanks in math/big/int.go
  1257  	// using https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf
  1258  
  1259  	var y, b, t, w Element
  1260  	// w = x^((s-1)/2))
  1261  	w.expBySqrtExp(*x)
  1262  
  1263  	// y = x^((s+1)/2)) = w * x
  1264  	y.Mul(x, &w)
  1265  
  1266  	// b = xˢ = w * w * x = y * x
  1267  	b.Mul(&w, &y)
  1268  
  1269  	// g = nonResidue ^ s
  1270  	var g = Element{
  1271  		11195128742969911322,
  1272  		1359304652430195240,
  1273  		15267589139354181340,
  1274  		10518360976114966361,
  1275  		300769513466036652,
  1276  	}
  1277  	r := uint64(20)
  1278  
  1279  	// compute legendre symbol
  1280  	// t = x^((q-1)/2) = r-1 squaring of xˢ
  1281  	t = b
  1282  	for i := uint64(0); i < r-1; i++ {
  1283  		t.Square(&t)
  1284  	}
  1285  	if t.IsZero() {
  1286  		return z.SetZero()
  1287  	}
  1288  	if !t.IsOne() {
  1289  		// t != 1, we don't have a square root
  1290  		return nil
  1291  	}
  1292  	for {
  1293  		var m uint64
  1294  		t = b
  1295  
  1296  		// for t != 1
  1297  		for !t.IsOne() {
  1298  			t.Square(&t)
  1299  			m++
  1300  		}
  1301  
  1302  		if m == 0 {
  1303  			return z.Set(&y)
  1304  		}
  1305  		// t = g^(2^(r-m-1)) (mod q)
  1306  		ge := int(r - m - 1)
  1307  		t = g
  1308  		for ge > 0 {
  1309  			t.Square(&t)
  1310  			ge--
  1311  		}
  1312  
  1313  		g.Square(&t)
  1314  		y.Mul(&y, &t)
  1315  		b.Mul(&b, &g)
  1316  		r = m
  1317  	}
  1318  }
  1319  
  1320  const (
  1321  	k               = 32 // word size / 2
  1322  	signBitSelector = uint64(1) << 63
  1323  	approxLowBitsN  = k - 1
  1324  	approxHighBitsN = k + 1
  1325  )
  1326  
  1327  const (
  1328  	inversionCorrectionFactorWord0 = 13359241550610159594
  1329  	inversionCorrectionFactorWord1 = 7624632887220174691
  1330  	inversionCorrectionFactorWord2 = 6412344873752403825
  1331  	inversionCorrectionFactorWord3 = 11214014560053792263
  1332  	inversionCorrectionFactorWord4 = 75428258669939399
  1333  	invIterationsN                 = 22
  1334  )
  1335  
  1336  // Inverse z = x⁻¹ (mod q)
  1337  //
  1338  // if x == 0, sets and returns z = x
  1339  func (z *Element) Inverse(x *Element) *Element {
  1340  	// Implements "Optimized Binary GCD for Modular Inversion"
  1341  	// https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
  1342  
  1343  	a := *x
  1344  	b := Element{
  1345  		q0,
  1346  		q1,
  1347  		q2,
  1348  		q3,
  1349  		q4,
  1350  	} // b := q
  1351  
  1352  	u := Element{1}
  1353  
  1354  	// Update factors: we get [u; v] ← [f₀ g₀; f₁ g₁] [u; v]
  1355  	// cᵢ = fᵢ + 2³¹ - 1 + 2³² * (gᵢ + 2³¹ - 1)
  1356  	var c0, c1 int64
  1357  
  1358  	// Saved update factors to reduce the number of field multiplications
  1359  	var pf0, pf1, pg0, pg1 int64
  1360  
  1361  	var i uint
  1362  
  1363  	var v, s Element
  1364  
  1365  	// Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations
  1366  	// This also lets us get away with half as many updates to u,v
  1367  	// To make this constant-time-ish, replace the condition with i < invIterationsN
  1368  	for i = 0; i&1 == 1 || !a.IsZero(); i++ {
  1369  		n := max(a.BitLen(), b.BitLen())
  1370  		aApprox, bApprox := approximate(&a, n), approximate(&b, n)
  1371  
  1372  		// f₀, g₀, f₁, g₁ = 1, 0, 0, 1
  1373  		c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1
  1374  
  1375  		for j := 0; j < approxLowBitsN; j++ {
  1376  
  1377  			// -2ʲ < f₀, f₁ ≤ 2ʲ
  1378  			// |f₀| + |f₁| < 2ʲ⁺¹
  1379  
  1380  			if aApprox&1 == 0 {
  1381  				aApprox /= 2
  1382  			} else {
  1383  				s, borrow := bits.Sub64(aApprox, bApprox, 0)
  1384  				if borrow == 1 {
  1385  					s = bApprox - aApprox
  1386  					bApprox = aApprox
  1387  					c0, c1 = c1, c0
  1388  					// invariants unchanged
  1389  				}
  1390  
  1391  				aApprox = s / 2
  1392  				c0 = c0 - c1
  1393  
  1394  				// Now |f₀| < 2ʲ⁺¹ ≤ 2ʲ⁺¹ (only the weaker inequality is needed, strictly speaking)
  1395  				// Started with f₀ > -2ʲ and f₁ ≤ 2ʲ, so f₀ - f₁ > -2ʲ⁺¹
  1396  				// Invariants unchanged for f₁
  1397  			}
  1398  
  1399  			c1 *= 2
  1400  			// -2ʲ⁺¹ < f₁ ≤ 2ʲ⁺¹
  1401  			// So now |f₀| + |f₁| < 2ʲ⁺²
  1402  		}
  1403  
  1404  		s = a
  1405  
  1406  		var g0 int64
  1407  		// from this point on c0 aliases for f0
  1408  		c0, g0 = updateFactorsDecompose(c0)
  1409  		aHi := a.linearCombNonModular(&s, c0, &b, g0)
  1410  		if aHi&signBitSelector != 0 {
  1411  			// if aHi < 0
  1412  			c0, g0 = -c0, -g0
  1413  			aHi = negL(&a, aHi)
  1414  		}
  1415  		// right-shift a by k-1 bits
  1416  		a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN)
  1417  		a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN)
  1418  		a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN)
  1419  		a[3] = (a[3] >> approxLowBitsN) | ((a[4]) << approxHighBitsN)
  1420  		a[4] = (a[4] >> approxLowBitsN) | (aHi << approxHighBitsN)
  1421  
  1422  		var f1 int64
  1423  		// from this point on c1 aliases for g0
  1424  		f1, c1 = updateFactorsDecompose(c1)
  1425  		bHi := b.linearCombNonModular(&s, f1, &b, c1)
  1426  		if bHi&signBitSelector != 0 {
  1427  			// if bHi < 0
  1428  			f1, c1 = -f1, -c1
  1429  			bHi = negL(&b, bHi)
  1430  		}
  1431  		// right-shift b by k-1 bits
  1432  		b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN)
  1433  		b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN)
  1434  		b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN)
  1435  		b[3] = (b[3] >> approxLowBitsN) | ((b[4]) << approxHighBitsN)
  1436  		b[4] = (b[4] >> approxLowBitsN) | (bHi << approxHighBitsN)
  1437  
  1438  		if i&1 == 1 {
  1439  			// Combine current update factors with previously stored ones
  1440  			// [F₀, G₀; F₁, G₁] ← [f₀, g₀; f₁, g₁] [pf₀, pg₀; pf₁, pg₁], with capital letters denoting new combined values
  1441  			// We get |F₀| = | f₀pf₀ + g₀pf₁ | ≤ |f₀pf₀| + |g₀pf₁| = |f₀| |pf₀| + |g₀| |pf₁| ≤ 2ᵏ⁻¹|pf₀| + 2ᵏ⁻¹|pf₁|
  1442  			// = 2ᵏ⁻¹ (|pf₀| + |pf₁|) < 2ᵏ⁻¹ 2ᵏ = 2²ᵏ⁻¹
  1443  			// So |F₀| < 2²ᵏ⁻¹ meaning it fits in a 2k-bit signed register
  1444  
  1445  			// c₀ aliases f₀, c₁ aliases g₁
  1446  			c0, g0, f1, c1 = c0*pf0+g0*pf1,
  1447  				c0*pg0+g0*pg1,
  1448  				f1*pf0+c1*pf1,
  1449  				f1*pg0+c1*pg1
  1450  
  1451  			s = u
  1452  
  1453  			// 0 ≤ u, v < 2²⁵⁵
  1454  			// |F₀|, |G₀| < 2⁶³
  1455  			u.linearComb(&u, c0, &v, g0)
  1456  			// |F₁|, |G₁| < 2⁶³
  1457  			v.linearComb(&s, f1, &v, c1)
  1458  
  1459  		} else {
  1460  			// Save update factors
  1461  			pf0, pg0, pf1, pg1 = c0, g0, f1, c1
  1462  		}
  1463  	}
  1464  
  1465  	// For every iteration that we miss, v is not being multiplied by 2ᵏ⁻²
  1466  	const pSq uint64 = 1 << (2 * (k - 1))
  1467  	a = Element{pSq}
  1468  	// If the function is constant-time ish, this loop will not run (no need to take it out explicitly)
  1469  	for ; i < invIterationsN; i += 2 {
  1470  		// could optimize further with mul by word routine or by pre-computing a table since with k=26,
  1471  		// we would multiply by pSq up to 13times;
  1472  		// on x86, the assembly routine outperforms generic code for mul by word
  1473  		// on arm64, we may loose up to ~5% for 6 limbs
  1474  		v.Mul(&v, &a)
  1475  	}
  1476  
  1477  	u.Set(x) // for correctness check
  1478  
  1479  	z.Mul(&v, &Element{
  1480  		inversionCorrectionFactorWord0,
  1481  		inversionCorrectionFactorWord1,
  1482  		inversionCorrectionFactorWord2,
  1483  		inversionCorrectionFactorWord3,
  1484  		inversionCorrectionFactorWord4,
  1485  	})
  1486  
  1487  	// correctness check
  1488  	v.Mul(&u, z)
  1489  	if !v.IsOne() && !u.IsZero() {
  1490  		return z.inverseExp(u)
  1491  	}
  1492  
  1493  	return z
  1494  }
  1495  
  1496  // inverseExp computes z = x⁻¹ (mod q) = x**(q-2) (mod q)
  1497  func (z *Element) inverseExp(x Element) *Element {
  1498  	// e == q-2
  1499  	e := Modulus()
  1500  	e.Sub(e, big.NewInt(2))
  1501  
  1502  	z.Set(&x)
  1503  
  1504  	for i := e.BitLen() - 2; i >= 0; i-- {
  1505  		z.Square(z)
  1506  		if e.Bit(i) == 1 {
  1507  			z.Mul(z, &x)
  1508  		}
  1509  	}
  1510  
  1511  	return z
  1512  }
  1513  
  1514  // approximate a big number x into a single 64 bit word using its uppermost and lowermost bits
  1515  // if x fits in a word as is, no approximation necessary
  1516  func approximate(x *Element, nBits int) uint64 {
  1517  
  1518  	if nBits <= 64 {
  1519  		return x[0]
  1520  	}
  1521  
  1522  	const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones
  1523  	lo := mask & x[0]
  1524  
  1525  	hiWordIndex := (nBits - 1) / 64
  1526  
  1527  	hiWordBitsAvailable := nBits - hiWordIndex*64
  1528  	hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN)
  1529  
  1530  	mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1))
  1531  	hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable)
  1532  
  1533  	mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1)
  1534  	mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed
  1535  
  1536  	return lo | mid | hi
  1537  }
  1538  
  1539  // linearComb z = xC * x + yC * y;
  1540  // 0 ≤ x, y < 2³¹⁵
  1541  // |xC|, |yC| < 2⁶³
  1542  func (z *Element) linearComb(x *Element, xC int64, y *Element, yC int64) {
  1543  	// | (hi, z) | < 2 * 2⁶³ * 2³¹⁵ = 2³⁷⁹
  1544  	// therefore | hi | < 2⁵⁹ ≤ 2⁶³
  1545  	hi := z.linearCombNonModular(x, xC, y, yC)
  1546  	z.montReduceSigned(z, hi)
  1547  }
  1548  
  1549  // montReduceSigned z = (xHi * r + x) * r⁻¹ using the SOS algorithm
  1550  // Requires |xHi| < 2⁶³. Most significant bit of xHi is the sign bit.
  1551  func (z *Element) montReduceSigned(x *Element, xHi uint64) {
  1552  	const signBitRemover = ^signBitSelector
  1553  	mustNeg := xHi&signBitSelector != 0
  1554  	// the SOS implementation requires that most significant bit is 0
  1555  	// Let X be xHi*r + x
  1556  	// If X is negative we would have initially stored it as 2⁶⁴ r + X (à la 2's complement)
  1557  	xHi &= signBitRemover
  1558  	// with this a negative X is now represented as 2⁶³ r + X
  1559  
  1560  	var t [2*Limbs - 1]uint64
  1561  	var C uint64
  1562  
  1563  	m := x[0] * qInvNeg
  1564  
  1565  	C = madd0(m, q0, x[0])
  1566  	C, t[1] = madd2(m, q1, x[1], C)
  1567  	C, t[2] = madd2(m, q2, x[2], C)
  1568  	C, t[3] = madd2(m, q3, x[3], C)
  1569  	C, t[4] = madd2(m, q4, x[4], C)
  1570  
  1571  	// m * qElement[4] ≤ (2⁶⁴ - 1) * (2⁶³ - 1) = 2¹²⁷ - 2⁶⁴ - 2⁶³ + 1
  1572  	// x[4] + C ≤ 2*(2⁶⁴ - 1) = 2⁶⁵ - 2
  1573  	// On LHS, (C, t[4]) ≤ 2¹²⁷ - 2⁶⁴ - 2⁶³ + 1 + 2⁶⁵ - 2 = 2¹²⁷ + 2⁶³ - 1
  1574  	// So on LHS, C ≤ 2⁶³
  1575  	t[5] = xHi + C
  1576  	// xHi + C < 2⁶³ + 2⁶³ = 2⁶⁴
  1577  
  1578  	// <standard SOS>
  1579  	{
  1580  		const i = 1
  1581  		m = t[i] * qInvNeg
  1582  
  1583  		C = madd0(m, q0, t[i+0])
  1584  		C, t[i+1] = madd2(m, q1, t[i+1], C)
  1585  		C, t[i+2] = madd2(m, q2, t[i+2], C)
  1586  		C, t[i+3] = madd2(m, q3, t[i+3], C)
  1587  		C, t[i+4] = madd2(m, q4, t[i+4], C)
  1588  
  1589  		t[i+Limbs] += C
  1590  	}
  1591  	{
  1592  		const i = 2
  1593  		m = t[i] * qInvNeg
  1594  
  1595  		C = madd0(m, q0, t[i+0])
  1596  		C, t[i+1] = madd2(m, q1, t[i+1], C)
  1597  		C, t[i+2] = madd2(m, q2, t[i+2], C)
  1598  		C, t[i+3] = madd2(m, q3, t[i+3], C)
  1599  		C, t[i+4] = madd2(m, q4, t[i+4], C)
  1600  
  1601  		t[i+Limbs] += C
  1602  	}
  1603  	{
  1604  		const i = 3
  1605  		m = t[i] * qInvNeg
  1606  
  1607  		C = madd0(m, q0, t[i+0])
  1608  		C, t[i+1] = madd2(m, q1, t[i+1], C)
  1609  		C, t[i+2] = madd2(m, q2, t[i+2], C)
  1610  		C, t[i+3] = madd2(m, q3, t[i+3], C)
  1611  		C, t[i+4] = madd2(m, q4, t[i+4], C)
  1612  
  1613  		t[i+Limbs] += C
  1614  	}
  1615  	{
  1616  		const i = 4
  1617  		m := t[i] * qInvNeg
  1618  
  1619  		C = madd0(m, q0, t[i+0])
  1620  		C, z[0] = madd2(m, q1, t[i+1], C)
  1621  		C, z[1] = madd2(m, q2, t[i+2], C)
  1622  		C, z[2] = madd2(m, q3, t[i+3], C)
  1623  		z[4], z[3] = madd2(m, q4, t[i+4], C)
  1624  	}
  1625  
  1626  	// if z ⩾ q → z -= q
  1627  	if !z.smallerThanModulus() {
  1628  		var b uint64
  1629  		z[0], b = bits.Sub64(z[0], q0, 0)
  1630  		z[1], b = bits.Sub64(z[1], q1, b)
  1631  		z[2], b = bits.Sub64(z[2], q2, b)
  1632  		z[3], b = bits.Sub64(z[3], q3, b)
  1633  		z[4], _ = bits.Sub64(z[4], q4, b)
  1634  	}
  1635  	// </standard SOS>
  1636  
  1637  	if mustNeg {
  1638  		// We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead
  1639  		var b uint64
  1640  		z[0], b = bits.Sub64(z[0], signBitSelector, 0)
  1641  		z[1], b = bits.Sub64(z[1], 0, b)
  1642  		z[2], b = bits.Sub64(z[2], 0, b)
  1643  		z[3], b = bits.Sub64(z[3], 0, b)
  1644  		z[4], b = bits.Sub64(z[4], 0, b)
  1645  
  1646  		// Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0
  1647  
  1648  		if b != 0 {
  1649  			// z[4] = -1
  1650  			// negative: add q
  1651  			const neg1 = 0xFFFFFFFFFFFFFFFF
  1652  
  1653  			var carry uint64
  1654  
  1655  			z[0], carry = bits.Add64(z[0], q0, 0)
  1656  			z[1], carry = bits.Add64(z[1], q1, carry)
  1657  			z[2], carry = bits.Add64(z[2], q2, carry)
  1658  			z[3], carry = bits.Add64(z[3], q3, carry)
  1659  			z[4], _ = bits.Add64(neg1, q4, carry)
  1660  		}
  1661  	}
  1662  }
  1663  
  1664  const (
  1665  	updateFactorsConversionBias    int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1)
  1666  	updateFactorIdentityMatrixRow0       = 1
  1667  	updateFactorIdentityMatrixRow1       = 1 << 32
  1668  )
  1669  
  1670  func updateFactorsDecompose(c int64) (int64, int64) {
  1671  	c += updateFactorsConversionBias
  1672  	const low32BitsFilter int64 = 0xFFFFFFFF
  1673  	f := c&low32BitsFilter - 0x7FFFFFFF
  1674  	g := c>>32&low32BitsFilter - 0x7FFFFFFF
  1675  	return f, g
  1676  }
  1677  
  1678  // negL negates in place [x | xHi] and return the new most significant word xHi
  1679  func negL(x *Element, xHi uint64) uint64 {
  1680  	var b uint64
  1681  
  1682  	x[0], b = bits.Sub64(0, x[0], 0)
  1683  	x[1], b = bits.Sub64(0, x[1], b)
  1684  	x[2], b = bits.Sub64(0, x[2], b)
  1685  	x[3], b = bits.Sub64(0, x[3], b)
  1686  	x[4], b = bits.Sub64(0, x[4], b)
  1687  	xHi, _ = bits.Sub64(0, xHi, b)
  1688  
  1689  	return xHi
  1690  }
  1691  
  1692  // mulWNonModular multiplies by one word in non-montgomery, without reducing
  1693  func (z *Element) mulWNonModular(x *Element, y int64) uint64 {
  1694  
  1695  	// w := abs(y)
  1696  	m := y >> 63
  1697  	w := uint64((y ^ m) - m)
  1698  
  1699  	var c uint64
  1700  	c, z[0] = bits.Mul64(x[0], w)
  1701  	c, z[1] = madd1(x[1], w, c)
  1702  	c, z[2] = madd1(x[2], w, c)
  1703  	c, z[3] = madd1(x[3], w, c)
  1704  	c, z[4] = madd1(x[4], w, c)
  1705  
  1706  	if y < 0 {
  1707  		c = negL(z, c)
  1708  	}
  1709  
  1710  	return c
  1711  }
  1712  
  1713  // linearCombNonModular computes a linear combination without modular reduction
  1714  func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 {
  1715  	var yTimes Element
  1716  
  1717  	yHi := yTimes.mulWNonModular(y, yC)
  1718  	xHi := z.mulWNonModular(x, xC)
  1719  
  1720  	var carry uint64
  1721  	z[0], carry = bits.Add64(z[0], yTimes[0], 0)
  1722  	z[1], carry = bits.Add64(z[1], yTimes[1], carry)
  1723  	z[2], carry = bits.Add64(z[2], yTimes[2], carry)
  1724  	z[3], carry = bits.Add64(z[3], yTimes[3], carry)
  1725  	z[4], carry = bits.Add64(z[4], yTimes[4], carry)
  1726  
  1727  	yHi, _ = bits.Add64(xHi, yHi, carry)
  1728  
  1729  	return yHi
  1730  }