github.com/consensys/gnark-crypto@v0.14.0/ecc/bw6-633/fp/element.go (about)

     1  // Copyright 2020 ConsenSys Software Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Code generated by consensys/gnark-crypto DO NOT EDIT
    16  
    17  package fp
    18  
    19  import (
    20  	"crypto/rand"
    21  	"encoding/binary"
    22  	"errors"
    23  	"io"
    24  	"math/big"
    25  	"math/bits"
    26  	"reflect"
    27  	"strconv"
    28  	"strings"
    29  
    30  	"github.com/bits-and-blooms/bitset"
    31  	"github.com/consensys/gnark-crypto/field/hash"
    32  	"github.com/consensys/gnark-crypto/field/pool"
    33  )
    34  
    35  // Element represents a field element stored on 10 words (uint64)
    36  //
    37  // Element are assumed to be in Montgomery form in all methods.
    38  //
    39  // Modulus q =
    40  //
    41  //	q[base10] = 20494478644167774678813387386538961497669590920908778075528754551012016751717791778743535050360001387419576570244406805463255765034468441182772056330021723098661967429339971741066259394985997
    42  //	q[base16] = 0x126633cc0f35f63fc1a174f01d72ab5a8fcd8c75d79d2c74e59769ad9bbda2f8152a6c0fadea490b8da9f5e83f57c497e0e8850edbda407d7b5ce7ab839c2253d369bd31147f73cd74916ea4570000d
    43  //
    44  // # Warning
    45  //
    46  // This code has not been audited and is provided as-is. In particular, there is no security guarantees such as constant time implementation or side-channel attack resistance.
    47  type Element [10]uint64
    48  
    49  const (
    50  	Limbs = 10  // number of 64 bits words needed to represent a Element
    51  	Bits  = 633 // number of bits needed to represent a Element
    52  	Bytes = 80  // number of bytes needed to represent a Element
    53  )
    54  
    55  // Field modulus q
    56  const (
    57  	q0 uint64 = 15512955586897510413
    58  	q1 uint64 = 4410884215886313276
    59  	q2 uint64 = 15543556715411259941
    60  	q3 uint64 = 9083347379620258823
    61  	q4 uint64 = 13320134076191308873
    62  	q5 uint64 = 9318693926755804304
    63  	q6 uint64 = 5645674015335635503
    64  	q7 uint64 = 12176845843281334983
    65  	q8 uint64 = 18165857675053050549
    66  	q9 uint64 = 82862755739295587
    67  )
    68  
    69  var qElement = Element{
    70  	q0,
    71  	q1,
    72  	q2,
    73  	q3,
    74  	q4,
    75  	q5,
    76  	q6,
    77  	q7,
    78  	q8,
    79  	q9,
    80  }
    81  
    82  var _modulus big.Int // q stored as big.Int
    83  
    84  // Modulus returns q as a big.Int
    85  //
    86  //	q[base10] = 20494478644167774678813387386538961497669590920908778075528754551012016751717791778743535050360001387419576570244406805463255765034468441182772056330021723098661967429339971741066259394985997
    87  //	q[base16] = 0x126633cc0f35f63fc1a174f01d72ab5a8fcd8c75d79d2c74e59769ad9bbda2f8152a6c0fadea490b8da9f5e83f57c497e0e8850edbda407d7b5ce7ab839c2253d369bd31147f73cd74916ea4570000d
    88  func Modulus() *big.Int {
    89  	return new(big.Int).Set(&_modulus)
    90  }
    91  
    92  // q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r
    93  // used for Montgomery reduction
    94  const qInvNeg uint64 = 13046692460116554043
    95  
    96  func init() {
    97  	_modulus.SetString("126633cc0f35f63fc1a174f01d72ab5a8fcd8c75d79d2c74e59769ad9bbda2f8152a6c0fadea490b8da9f5e83f57c497e0e8850edbda407d7b5ce7ab839c2253d369bd31147f73cd74916ea4570000d", 16)
    98  }
    99  
   100  // NewElement returns a new Element from a uint64 value
   101  //
   102  // it is equivalent to
   103  //
   104  //	var v Element
   105  //	v.SetUint64(...)
   106  func NewElement(v uint64) Element {
   107  	z := Element{v}
   108  	z.Mul(&z, &rSquare)
   109  	return z
   110  }
   111  
   112  // SetUint64 sets z to v and returns z
   113  func (z *Element) SetUint64(v uint64) *Element {
   114  	//  sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
   115  	*z = Element{v}
   116  	return z.Mul(z, &rSquare) // z.toMont()
   117  }
   118  
   119  // SetInt64 sets z to v and returns z
   120  func (z *Element) SetInt64(v int64) *Element {
   121  
   122  	// absolute value of v
   123  	m := v >> 63
   124  	z.SetUint64(uint64((v ^ m) - m))
   125  
   126  	if m != 0 {
   127  		// v is negative
   128  		z.Neg(z)
   129  	}
   130  
   131  	return z
   132  }
   133  
   134  // Set z = x and returns z
   135  func (z *Element) Set(x *Element) *Element {
   136  	z[0] = x[0]
   137  	z[1] = x[1]
   138  	z[2] = x[2]
   139  	z[3] = x[3]
   140  	z[4] = x[4]
   141  	z[5] = x[5]
   142  	z[6] = x[6]
   143  	z[7] = x[7]
   144  	z[8] = x[8]
   145  	z[9] = x[9]
   146  	return z
   147  }
   148  
   149  // SetInterface converts provided interface into Element
   150  // returns an error if provided type is not supported
   151  // supported types:
   152  //
   153  //	Element
   154  //	*Element
   155  //	uint64
   156  //	int
   157  //	string (see SetString for valid formats)
   158  //	*big.Int
   159  //	big.Int
   160  //	[]byte
   161  func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
   162  	if i1 == nil {
   163  		return nil, errors.New("can't set fp.Element with <nil>")
   164  	}
   165  
   166  	switch c1 := i1.(type) {
   167  	case Element:
   168  		return z.Set(&c1), nil
   169  	case *Element:
   170  		if c1 == nil {
   171  			return nil, errors.New("can't set fp.Element with <nil>")
   172  		}
   173  		return z.Set(c1), nil
   174  	case uint8:
   175  		return z.SetUint64(uint64(c1)), nil
   176  	case uint16:
   177  		return z.SetUint64(uint64(c1)), nil
   178  	case uint32:
   179  		return z.SetUint64(uint64(c1)), nil
   180  	case uint:
   181  		return z.SetUint64(uint64(c1)), nil
   182  	case uint64:
   183  		return z.SetUint64(c1), nil
   184  	case int8:
   185  		return z.SetInt64(int64(c1)), nil
   186  	case int16:
   187  		return z.SetInt64(int64(c1)), nil
   188  	case int32:
   189  		return z.SetInt64(int64(c1)), nil
   190  	case int64:
   191  		return z.SetInt64(c1), nil
   192  	case int:
   193  		return z.SetInt64(int64(c1)), nil
   194  	case string:
   195  		return z.SetString(c1)
   196  	case *big.Int:
   197  		if c1 == nil {
   198  			return nil, errors.New("can't set fp.Element with <nil>")
   199  		}
   200  		return z.SetBigInt(c1), nil
   201  	case big.Int:
   202  		return z.SetBigInt(&c1), nil
   203  	case []byte:
   204  		return z.SetBytes(c1), nil
   205  	default:
   206  		return nil, errors.New("can't set fp.Element from type " + reflect.TypeOf(i1).String())
   207  	}
   208  }
   209  
   210  // SetZero z = 0
   211  func (z *Element) SetZero() *Element {
   212  	z[0] = 0
   213  	z[1] = 0
   214  	z[2] = 0
   215  	z[3] = 0
   216  	z[4] = 0
   217  	z[5] = 0
   218  	z[6] = 0
   219  	z[7] = 0
   220  	z[8] = 0
   221  	z[9] = 0
   222  	return z
   223  }
   224  
   225  // SetOne z = 1 (in Montgomery form)
   226  func (z *Element) SetOne() *Element {
   227  	z[0] = 5665001492438840506
   228  	z[1] = 16907884053554239805
   229  	z[2] = 17318295036095996852
   230  	z[3] = 12638729832353218866
   231  	z[4] = 12856030952767240260
   232  	z[5] = 15732028589390776959
   233  	z[6] = 1038965607738428109
   234  	z[7] = 8411601626847721258
   235  	z[8] = 7016548280614581879
   236  	z[9] = 51212299585931083
   237  	return z
   238  }
   239  
   240  // Div z = x*y⁻¹ (mod q)
   241  func (z *Element) Div(x, y *Element) *Element {
   242  	var yInv Element
   243  	yInv.Inverse(y)
   244  	z.Mul(x, &yInv)
   245  	return z
   246  }
   247  
   248  // Equal returns z == x; constant-time
   249  func (z *Element) Equal(x *Element) bool {
   250  	return z.NotEqual(x) == 0
   251  }
   252  
   253  // NotEqual returns 0 if and only if z == x; constant-time
   254  func (z *Element) NotEqual(x *Element) uint64 {
   255  	return (z[9] ^ x[9]) | (z[8] ^ x[8]) | (z[7] ^ x[7]) | (z[6] ^ x[6]) | (z[5] ^ x[5]) | (z[4] ^ x[4]) | (z[3] ^ x[3]) | (z[2] ^ x[2]) | (z[1] ^ x[1]) | (z[0] ^ x[0])
   256  }
   257  
   258  // IsZero returns z == 0
   259  func (z *Element) IsZero() bool {
   260  	return (z[9] | z[8] | z[7] | z[6] | z[5] | z[4] | z[3] | z[2] | z[1] | z[0]) == 0
   261  }
   262  
   263  // IsOne returns z == 1
   264  func (z *Element) IsOne() bool {
   265  	return ((z[9] ^ 51212299585931083) | (z[8] ^ 7016548280614581879) | (z[7] ^ 8411601626847721258) | (z[6] ^ 1038965607738428109) | (z[5] ^ 15732028589390776959) | (z[4] ^ 12856030952767240260) | (z[3] ^ 12638729832353218866) | (z[2] ^ 17318295036095996852) | (z[1] ^ 16907884053554239805) | (z[0] ^ 5665001492438840506)) == 0
   266  }
   267  
   268  // IsUint64 reports whether z can be represented as an uint64.
   269  func (z *Element) IsUint64() bool {
   270  	zz := *z
   271  	zz.fromMont()
   272  	return zz.FitsOnOneWord()
   273  }
   274  
   275  // Uint64 returns the uint64 representation of x. If x cannot be represented in a uint64, the result is undefined.
   276  func (z *Element) Uint64() uint64 {
   277  	return z.Bits()[0]
   278  }
   279  
   280  // FitsOnOneWord reports whether z words (except the least significant word) are 0
   281  //
   282  // It is the responsibility of the caller to convert from Montgomery to Regular form if needed.
   283  func (z *Element) FitsOnOneWord() bool {
   284  	return (z[9] | z[8] | z[7] | z[6] | z[5] | z[4] | z[3] | z[2] | z[1]) == 0
   285  }
   286  
   287  // Cmp compares (lexicographic order) z and x and returns:
   288  //
   289  //	-1 if z <  x
   290  //	 0 if z == x
   291  //	+1 if z >  x
   292  func (z *Element) Cmp(x *Element) int {
   293  	_z := z.Bits()
   294  	_x := x.Bits()
   295  	if _z[9] > _x[9] {
   296  		return 1
   297  	} else if _z[9] < _x[9] {
   298  		return -1
   299  	}
   300  	if _z[8] > _x[8] {
   301  		return 1
   302  	} else if _z[8] < _x[8] {
   303  		return -1
   304  	}
   305  	if _z[7] > _x[7] {
   306  		return 1
   307  	} else if _z[7] < _x[7] {
   308  		return -1
   309  	}
   310  	if _z[6] > _x[6] {
   311  		return 1
   312  	} else if _z[6] < _x[6] {
   313  		return -1
   314  	}
   315  	if _z[5] > _x[5] {
   316  		return 1
   317  	} else if _z[5] < _x[5] {
   318  		return -1
   319  	}
   320  	if _z[4] > _x[4] {
   321  		return 1
   322  	} else if _z[4] < _x[4] {
   323  		return -1
   324  	}
   325  	if _z[3] > _x[3] {
   326  		return 1
   327  	} else if _z[3] < _x[3] {
   328  		return -1
   329  	}
   330  	if _z[2] > _x[2] {
   331  		return 1
   332  	} else if _z[2] < _x[2] {
   333  		return -1
   334  	}
   335  	if _z[1] > _x[1] {
   336  		return 1
   337  	} else if _z[1] < _x[1] {
   338  		return -1
   339  	}
   340  	if _z[0] > _x[0] {
   341  		return 1
   342  	} else if _z[0] < _x[0] {
   343  		return -1
   344  	}
   345  	return 0
   346  }
   347  
   348  // LexicographicallyLargest returns true if this element is strictly lexicographically
   349  // larger than its negation, false otherwise
   350  func (z *Element) LexicographicallyLargest() bool {
   351  	// adapted from github.com/zkcrypto/bls12_381
   352  	// we check if the element is larger than (q-1) / 2
   353  	// if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2
   354  
   355  	_z := z.Bits()
   356  
   357  	var b uint64
   358  	_, b = bits.Sub64(_z[0], 7756477793448755207, 0)
   359  	_, b = bits.Sub64(_z[1], 11428814144797932446, b)
   360  	_, b = bits.Sub64(_z[2], 16995150394560405778, b)
   361  	_, b = bits.Sub64(_z[3], 13765045726664905219, b)
   362  	_, b = bits.Sub64(_z[4], 6660067038095654436, b)
   363  	_, b = bits.Sub64(_z[5], 13882719000232677960, b)
   364  	_, b = bits.Sub64(_z[6], 12046209044522593559, b)
   365  	_, b = bits.Sub64(_z[7], 15311794958495443299, b)
   366  	_, b = bits.Sub64(_z[8], 18306300874381301082, b)
   367  	_, b = bits.Sub64(_z[9], 41431377869647793, b)
   368  
   369  	return b == 0
   370  }
   371  
   372  // SetRandom sets z to a uniform random value in [0, q).
   373  //
   374  // This might error only if reading from crypto/rand.Reader errors,
   375  // in which case, value of z is undefined.
   376  func (z *Element) SetRandom() (*Element, error) {
   377  	// this code is generated for all modulus
   378  	// and derived from go/src/crypto/rand/util.go
   379  
   380  	// l is number of limbs * 8; the number of bytes needed to reconstruct 10 uint64
   381  	const l = 80
   382  
   383  	// bitLen is the maximum bit length needed to encode a value < q.
   384  	const bitLen = 633
   385  
   386  	// k is the maximum byte length needed to encode a value < q.
   387  	const k = (bitLen + 7) / 8
   388  
   389  	// b is the number of bits in the most significant byte of q-1.
   390  	b := uint(bitLen % 8)
   391  	if b == 0 {
   392  		b = 8
   393  	}
   394  
   395  	var bytes [l]byte
   396  
   397  	for {
   398  		// note that bytes[k:l] is always 0
   399  		if _, err := io.ReadFull(rand.Reader, bytes[:k]); err != nil {
   400  			return nil, err
   401  		}
   402  
   403  		// Clear unused bits in in the most significant byte to increase probability
   404  		// that the candidate is < q.
   405  		bytes[k-1] &= uint8(int(1<<b) - 1)
   406  		z[0] = binary.LittleEndian.Uint64(bytes[0:8])
   407  		z[1] = binary.LittleEndian.Uint64(bytes[8:16])
   408  		z[2] = binary.LittleEndian.Uint64(bytes[16:24])
   409  		z[3] = binary.LittleEndian.Uint64(bytes[24:32])
   410  		z[4] = binary.LittleEndian.Uint64(bytes[32:40])
   411  		z[5] = binary.LittleEndian.Uint64(bytes[40:48])
   412  		z[6] = binary.LittleEndian.Uint64(bytes[48:56])
   413  		z[7] = binary.LittleEndian.Uint64(bytes[56:64])
   414  		z[8] = binary.LittleEndian.Uint64(bytes[64:72])
   415  		z[9] = binary.LittleEndian.Uint64(bytes[72:80])
   416  
   417  		if !z.smallerThanModulus() {
   418  			continue // ignore the candidate and re-sample
   419  		}
   420  
   421  		return z, nil
   422  	}
   423  }
   424  
   425  // smallerThanModulus returns true if z < q
   426  // This is not constant time
   427  func (z *Element) smallerThanModulus() bool {
   428  	return (z[9] < q9 || (z[9] == q9 && (z[8] < q8 || (z[8] == q8 && (z[7] < q7 || (z[7] == q7 && (z[6] < q6 || (z[6] == q6 && (z[5] < q5 || (z[5] == q5 && (z[4] < q4 || (z[4] == q4 && (z[3] < q3 || (z[3] == q3 && (z[2] < q2 || (z[2] == q2 && (z[1] < q1 || (z[1] == q1 && (z[0] < q0)))))))))))))))))))
   429  }
   430  
   431  // One returns 1
   432  func One() Element {
   433  	var one Element
   434  	one.SetOne()
   435  	return one
   436  }
   437  
   438  // Halve sets z to z / 2 (mod q)
   439  func (z *Element) Halve() {
   440  	var carry uint64
   441  
   442  	if z[0]&1 == 1 {
   443  		// z = z + q
   444  		z[0], carry = bits.Add64(z[0], q0, 0)
   445  		z[1], carry = bits.Add64(z[1], q1, carry)
   446  		z[2], carry = bits.Add64(z[2], q2, carry)
   447  		z[3], carry = bits.Add64(z[3], q3, carry)
   448  		z[4], carry = bits.Add64(z[4], q4, carry)
   449  		z[5], carry = bits.Add64(z[5], q5, carry)
   450  		z[6], carry = bits.Add64(z[6], q6, carry)
   451  		z[7], carry = bits.Add64(z[7], q7, carry)
   452  		z[8], carry = bits.Add64(z[8], q8, carry)
   453  		z[9], _ = bits.Add64(z[9], q9, carry)
   454  
   455  	}
   456  	// z = z >> 1
   457  	z[0] = z[0]>>1 | z[1]<<63
   458  	z[1] = z[1]>>1 | z[2]<<63
   459  	z[2] = z[2]>>1 | z[3]<<63
   460  	z[3] = z[3]>>1 | z[4]<<63
   461  	z[4] = z[4]>>1 | z[5]<<63
   462  	z[5] = z[5]>>1 | z[6]<<63
   463  	z[6] = z[6]>>1 | z[7]<<63
   464  	z[7] = z[7]>>1 | z[8]<<63
   465  	z[8] = z[8]>>1 | z[9]<<63
   466  	z[9] >>= 1
   467  
   468  }
   469  
   470  // fromMont converts z in place (i.e. mutates) from Montgomery to regular representation
   471  // sets and returns z = z * 1
   472  func (z *Element) fromMont() *Element {
   473  	fromMont(z)
   474  	return z
   475  }
   476  
   477  // Add z = x + y (mod q)
   478  func (z *Element) Add(x, y *Element) *Element {
   479  
   480  	var carry uint64
   481  	z[0], carry = bits.Add64(x[0], y[0], 0)
   482  	z[1], carry = bits.Add64(x[1], y[1], carry)
   483  	z[2], carry = bits.Add64(x[2], y[2], carry)
   484  	z[3], carry = bits.Add64(x[3], y[3], carry)
   485  	z[4], carry = bits.Add64(x[4], y[4], carry)
   486  	z[5], carry = bits.Add64(x[5], y[5], carry)
   487  	z[6], carry = bits.Add64(x[6], y[6], carry)
   488  	z[7], carry = bits.Add64(x[7], y[7], carry)
   489  	z[8], carry = bits.Add64(x[8], y[8], carry)
   490  	z[9], _ = bits.Add64(x[9], y[9], carry)
   491  
   492  	// if z ⩾ q → z -= q
   493  	if !z.smallerThanModulus() {
   494  		var b uint64
   495  		z[0], b = bits.Sub64(z[0], q0, 0)
   496  		z[1], b = bits.Sub64(z[1], q1, b)
   497  		z[2], b = bits.Sub64(z[2], q2, b)
   498  		z[3], b = bits.Sub64(z[3], q3, b)
   499  		z[4], b = bits.Sub64(z[4], q4, b)
   500  		z[5], b = bits.Sub64(z[5], q5, b)
   501  		z[6], b = bits.Sub64(z[6], q6, b)
   502  		z[7], b = bits.Sub64(z[7], q7, b)
   503  		z[8], b = bits.Sub64(z[8], q8, b)
   504  		z[9], _ = bits.Sub64(z[9], q9, b)
   505  	}
   506  	return z
   507  }
   508  
   509  // Double z = x + x (mod q), aka Lsh 1
   510  func (z *Element) Double(x *Element) *Element {
   511  
   512  	var carry uint64
   513  	z[0], carry = bits.Add64(x[0], x[0], 0)
   514  	z[1], carry = bits.Add64(x[1], x[1], carry)
   515  	z[2], carry = bits.Add64(x[2], x[2], carry)
   516  	z[3], carry = bits.Add64(x[3], x[3], carry)
   517  	z[4], carry = bits.Add64(x[4], x[4], carry)
   518  	z[5], carry = bits.Add64(x[5], x[5], carry)
   519  	z[6], carry = bits.Add64(x[6], x[6], carry)
   520  	z[7], carry = bits.Add64(x[7], x[7], carry)
   521  	z[8], carry = bits.Add64(x[8], x[8], carry)
   522  	z[9], _ = bits.Add64(x[9], x[9], carry)
   523  
   524  	// if z ⩾ q → z -= q
   525  	if !z.smallerThanModulus() {
   526  		var b uint64
   527  		z[0], b = bits.Sub64(z[0], q0, 0)
   528  		z[1], b = bits.Sub64(z[1], q1, b)
   529  		z[2], b = bits.Sub64(z[2], q2, b)
   530  		z[3], b = bits.Sub64(z[3], q3, b)
   531  		z[4], b = bits.Sub64(z[4], q4, b)
   532  		z[5], b = bits.Sub64(z[5], q5, b)
   533  		z[6], b = bits.Sub64(z[6], q6, b)
   534  		z[7], b = bits.Sub64(z[7], q7, b)
   535  		z[8], b = bits.Sub64(z[8], q8, b)
   536  		z[9], _ = bits.Sub64(z[9], q9, b)
   537  	}
   538  	return z
   539  }
   540  
   541  // Sub z = x - y (mod q)
   542  func (z *Element) Sub(x, y *Element) *Element {
   543  	var b uint64
   544  	z[0], b = bits.Sub64(x[0], y[0], 0)
   545  	z[1], b = bits.Sub64(x[1], y[1], b)
   546  	z[2], b = bits.Sub64(x[2], y[2], b)
   547  	z[3], b = bits.Sub64(x[3], y[3], b)
   548  	z[4], b = bits.Sub64(x[4], y[4], b)
   549  	z[5], b = bits.Sub64(x[5], y[5], b)
   550  	z[6], b = bits.Sub64(x[6], y[6], b)
   551  	z[7], b = bits.Sub64(x[7], y[7], b)
   552  	z[8], b = bits.Sub64(x[8], y[8], b)
   553  	z[9], b = bits.Sub64(x[9], y[9], b)
   554  	if b != 0 {
   555  		var c uint64
   556  		z[0], c = bits.Add64(z[0], q0, 0)
   557  		z[1], c = bits.Add64(z[1], q1, c)
   558  		z[2], c = bits.Add64(z[2], q2, c)
   559  		z[3], c = bits.Add64(z[3], q3, c)
   560  		z[4], c = bits.Add64(z[4], q4, c)
   561  		z[5], c = bits.Add64(z[5], q5, c)
   562  		z[6], c = bits.Add64(z[6], q6, c)
   563  		z[7], c = bits.Add64(z[7], q7, c)
   564  		z[8], c = bits.Add64(z[8], q8, c)
   565  		z[9], _ = bits.Add64(z[9], q9, c)
   566  	}
   567  	return z
   568  }
   569  
   570  // Neg z = q - x
   571  func (z *Element) Neg(x *Element) *Element {
   572  	if x.IsZero() {
   573  		z.SetZero()
   574  		return z
   575  	}
   576  	var borrow uint64
   577  	z[0], borrow = bits.Sub64(q0, x[0], 0)
   578  	z[1], borrow = bits.Sub64(q1, x[1], borrow)
   579  	z[2], borrow = bits.Sub64(q2, x[2], borrow)
   580  	z[3], borrow = bits.Sub64(q3, x[3], borrow)
   581  	z[4], borrow = bits.Sub64(q4, x[4], borrow)
   582  	z[5], borrow = bits.Sub64(q5, x[5], borrow)
   583  	z[6], borrow = bits.Sub64(q6, x[6], borrow)
   584  	z[7], borrow = bits.Sub64(q7, x[7], borrow)
   585  	z[8], borrow = bits.Sub64(q8, x[8], borrow)
   586  	z[9], _ = bits.Sub64(q9, x[9], borrow)
   587  	return z
   588  }
   589  
   590  // Select is a constant-time conditional move.
   591  // If c=0, z = x0. Else z = x1
   592  func (z *Element) Select(c int, x0 *Element, x1 *Element) *Element {
   593  	cC := uint64((int64(c) | -int64(c)) >> 63) // "canonicized" into: 0 if c=0, -1 otherwise
   594  	z[0] = x0[0] ^ cC&(x0[0]^x1[0])
   595  	z[1] = x0[1] ^ cC&(x0[1]^x1[1])
   596  	z[2] = x0[2] ^ cC&(x0[2]^x1[2])
   597  	z[3] = x0[3] ^ cC&(x0[3]^x1[3])
   598  	z[4] = x0[4] ^ cC&(x0[4]^x1[4])
   599  	z[5] = x0[5] ^ cC&(x0[5]^x1[5])
   600  	z[6] = x0[6] ^ cC&(x0[6]^x1[6])
   601  	z[7] = x0[7] ^ cC&(x0[7]^x1[7])
   602  	z[8] = x0[8] ^ cC&(x0[8]^x1[8])
   603  	z[9] = x0[9] ^ cC&(x0[9]^x1[9])
   604  	return z
   605  }
   606  
   607  // _mulGeneric is unoptimized textbook CIOS
   608  // it is a fallback solution on x86 when ADX instruction set is not available
   609  // and is used for testing purposes.
   610  func _mulGeneric(z, x, y *Element) {
   611  
   612  	// Implements CIOS multiplication -- section 2.3.2 of Tolga Acar's thesis
   613  	// https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf
   614  	//
   615  	// The algorithm:
   616  	//
   617  	// for i=0 to N-1
   618  	// 		C := 0
   619  	// 		for j=0 to N-1
   620  	// 			(C,t[j]) := t[j] + x[j]*y[i] + C
   621  	// 		(t[N+1],t[N]) := t[N] + C
   622  	//
   623  	// 		C := 0
   624  	// 		m := t[0]*q'[0] mod D
   625  	// 		(C,_) := t[0] + m*q[0]
   626  	// 		for j=1 to N-1
   627  	// 			(C,t[j-1]) := t[j] + m*q[j] + C
   628  	//
   629  	// 		(C,t[N-1]) := t[N] + C
   630  	// 		t[N] := t[N+1] + C
   631  	//
   632  	// → N is the number of machine words needed to store the modulus q
   633  	// → D is the word size. For example, on a 64-bit architecture D is 2	64
   634  	// → x[i], y[i], q[i] is the ith word of the numbers x,y,q
   635  	// → q'[0] is the lowest word of the number -q⁻¹ mod r. This quantity is pre-computed, as it does not depend on the inputs.
   636  	// → t is a temporary array of size N+2
   637  	// → C, S are machine words. A pair (C,S) refers to (hi-bits, lo-bits) of a two-word number
   638  
   639  	var t [11]uint64
   640  	var D uint64
   641  	var m, C uint64
   642  	// -----------------------------------
   643  	// First loop
   644  
   645  	C, t[0] = bits.Mul64(y[0], x[0])
   646  	C, t[1] = madd1(y[0], x[1], C)
   647  	C, t[2] = madd1(y[0], x[2], C)
   648  	C, t[3] = madd1(y[0], x[3], C)
   649  	C, t[4] = madd1(y[0], x[4], C)
   650  	C, t[5] = madd1(y[0], x[5], C)
   651  	C, t[6] = madd1(y[0], x[6], C)
   652  	C, t[7] = madd1(y[0], x[7], C)
   653  	C, t[8] = madd1(y[0], x[8], C)
   654  	C, t[9] = madd1(y[0], x[9], C)
   655  
   656  	t[10], D = bits.Add64(t[10], C, 0)
   657  
   658  	// m = t[0]n'[0] mod W
   659  	m = t[0] * qInvNeg
   660  
   661  	// -----------------------------------
   662  	// Second loop
   663  	C = madd0(m, q0, t[0])
   664  	C, t[0] = madd2(m, q1, t[1], C)
   665  	C, t[1] = madd2(m, q2, t[2], C)
   666  	C, t[2] = madd2(m, q3, t[3], C)
   667  	C, t[3] = madd2(m, q4, t[4], C)
   668  	C, t[4] = madd2(m, q5, t[5], C)
   669  	C, t[5] = madd2(m, q6, t[6], C)
   670  	C, t[6] = madd2(m, q7, t[7], C)
   671  	C, t[7] = madd2(m, q8, t[8], C)
   672  	C, t[8] = madd2(m, q9, t[9], C)
   673  
   674  	t[9], C = bits.Add64(t[10], C, 0)
   675  	t[10], _ = bits.Add64(0, D, C)
   676  	// -----------------------------------
   677  	// First loop
   678  
   679  	C, t[0] = madd1(y[1], x[0], t[0])
   680  	C, t[1] = madd2(y[1], x[1], t[1], C)
   681  	C, t[2] = madd2(y[1], x[2], t[2], C)
   682  	C, t[3] = madd2(y[1], x[3], t[3], C)
   683  	C, t[4] = madd2(y[1], x[4], t[4], C)
   684  	C, t[5] = madd2(y[1], x[5], t[5], C)
   685  	C, t[6] = madd2(y[1], x[6], t[6], C)
   686  	C, t[7] = madd2(y[1], x[7], t[7], C)
   687  	C, t[8] = madd2(y[1], x[8], t[8], C)
   688  	C, t[9] = madd2(y[1], x[9], t[9], C)
   689  
   690  	t[10], D = bits.Add64(t[10], C, 0)
   691  
   692  	// m = t[0]n'[0] mod W
   693  	m = t[0] * qInvNeg
   694  
   695  	// -----------------------------------
   696  	// Second loop
   697  	C = madd0(m, q0, t[0])
   698  	C, t[0] = madd2(m, q1, t[1], C)
   699  	C, t[1] = madd2(m, q2, t[2], C)
   700  	C, t[2] = madd2(m, q3, t[3], C)
   701  	C, t[3] = madd2(m, q4, t[4], C)
   702  	C, t[4] = madd2(m, q5, t[5], C)
   703  	C, t[5] = madd2(m, q6, t[6], C)
   704  	C, t[6] = madd2(m, q7, t[7], C)
   705  	C, t[7] = madd2(m, q8, t[8], C)
   706  	C, t[8] = madd2(m, q9, t[9], C)
   707  
   708  	t[9], C = bits.Add64(t[10], C, 0)
   709  	t[10], _ = bits.Add64(0, D, C)
   710  	// -----------------------------------
   711  	// First loop
   712  
   713  	C, t[0] = madd1(y[2], x[0], t[0])
   714  	C, t[1] = madd2(y[2], x[1], t[1], C)
   715  	C, t[2] = madd2(y[2], x[2], t[2], C)
   716  	C, t[3] = madd2(y[2], x[3], t[3], C)
   717  	C, t[4] = madd2(y[2], x[4], t[4], C)
   718  	C, t[5] = madd2(y[2], x[5], t[5], C)
   719  	C, t[6] = madd2(y[2], x[6], t[6], C)
   720  	C, t[7] = madd2(y[2], x[7], t[7], C)
   721  	C, t[8] = madd2(y[2], x[8], t[8], C)
   722  	C, t[9] = madd2(y[2], x[9], t[9], C)
   723  
   724  	t[10], D = bits.Add64(t[10], C, 0)
   725  
   726  	// m = t[0]n'[0] mod W
   727  	m = t[0] * qInvNeg
   728  
   729  	// -----------------------------------
   730  	// Second loop
   731  	C = madd0(m, q0, t[0])
   732  	C, t[0] = madd2(m, q1, t[1], C)
   733  	C, t[1] = madd2(m, q2, t[2], C)
   734  	C, t[2] = madd2(m, q3, t[3], C)
   735  	C, t[3] = madd2(m, q4, t[4], C)
   736  	C, t[4] = madd2(m, q5, t[5], C)
   737  	C, t[5] = madd2(m, q6, t[6], C)
   738  	C, t[6] = madd2(m, q7, t[7], C)
   739  	C, t[7] = madd2(m, q8, t[8], C)
   740  	C, t[8] = madd2(m, q9, t[9], C)
   741  
   742  	t[9], C = bits.Add64(t[10], C, 0)
   743  	t[10], _ = bits.Add64(0, D, C)
   744  	// -----------------------------------
   745  	// First loop
   746  
   747  	C, t[0] = madd1(y[3], x[0], t[0])
   748  	C, t[1] = madd2(y[3], x[1], t[1], C)
   749  	C, t[2] = madd2(y[3], x[2], t[2], C)
   750  	C, t[3] = madd2(y[3], x[3], t[3], C)
   751  	C, t[4] = madd2(y[3], x[4], t[4], C)
   752  	C, t[5] = madd2(y[3], x[5], t[5], C)
   753  	C, t[6] = madd2(y[3], x[6], t[6], C)
   754  	C, t[7] = madd2(y[3], x[7], t[7], C)
   755  	C, t[8] = madd2(y[3], x[8], t[8], C)
   756  	C, t[9] = madd2(y[3], x[9], t[9], C)
   757  
   758  	t[10], D = bits.Add64(t[10], C, 0)
   759  
   760  	// m = t[0]n'[0] mod W
   761  	m = t[0] * qInvNeg
   762  
   763  	// -----------------------------------
   764  	// Second loop
   765  	C = madd0(m, q0, t[0])
   766  	C, t[0] = madd2(m, q1, t[1], C)
   767  	C, t[1] = madd2(m, q2, t[2], C)
   768  	C, t[2] = madd2(m, q3, t[3], C)
   769  	C, t[3] = madd2(m, q4, t[4], C)
   770  	C, t[4] = madd2(m, q5, t[5], C)
   771  	C, t[5] = madd2(m, q6, t[6], C)
   772  	C, t[6] = madd2(m, q7, t[7], C)
   773  	C, t[7] = madd2(m, q8, t[8], C)
   774  	C, t[8] = madd2(m, q9, t[9], C)
   775  
   776  	t[9], C = bits.Add64(t[10], C, 0)
   777  	t[10], _ = bits.Add64(0, D, C)
   778  	// -----------------------------------
   779  	// First loop
   780  
   781  	C, t[0] = madd1(y[4], x[0], t[0])
   782  	C, t[1] = madd2(y[4], x[1], t[1], C)
   783  	C, t[2] = madd2(y[4], x[2], t[2], C)
   784  	C, t[3] = madd2(y[4], x[3], t[3], C)
   785  	C, t[4] = madd2(y[4], x[4], t[4], C)
   786  	C, t[5] = madd2(y[4], x[5], t[5], C)
   787  	C, t[6] = madd2(y[4], x[6], t[6], C)
   788  	C, t[7] = madd2(y[4], x[7], t[7], C)
   789  	C, t[8] = madd2(y[4], x[8], t[8], C)
   790  	C, t[9] = madd2(y[4], x[9], t[9], C)
   791  
   792  	t[10], D = bits.Add64(t[10], C, 0)
   793  
   794  	// m = t[0]n'[0] mod W
   795  	m = t[0] * qInvNeg
   796  
   797  	// -----------------------------------
   798  	// Second loop
   799  	C = madd0(m, q0, t[0])
   800  	C, t[0] = madd2(m, q1, t[1], C)
   801  	C, t[1] = madd2(m, q2, t[2], C)
   802  	C, t[2] = madd2(m, q3, t[3], C)
   803  	C, t[3] = madd2(m, q4, t[4], C)
   804  	C, t[4] = madd2(m, q5, t[5], C)
   805  	C, t[5] = madd2(m, q6, t[6], C)
   806  	C, t[6] = madd2(m, q7, t[7], C)
   807  	C, t[7] = madd2(m, q8, t[8], C)
   808  	C, t[8] = madd2(m, q9, t[9], C)
   809  
   810  	t[9], C = bits.Add64(t[10], C, 0)
   811  	t[10], _ = bits.Add64(0, D, C)
   812  	// -----------------------------------
   813  	// First loop
   814  
   815  	C, t[0] = madd1(y[5], x[0], t[0])
   816  	C, t[1] = madd2(y[5], x[1], t[1], C)
   817  	C, t[2] = madd2(y[5], x[2], t[2], C)
   818  	C, t[3] = madd2(y[5], x[3], t[3], C)
   819  	C, t[4] = madd2(y[5], x[4], t[4], C)
   820  	C, t[5] = madd2(y[5], x[5], t[5], C)
   821  	C, t[6] = madd2(y[5], x[6], t[6], C)
   822  	C, t[7] = madd2(y[5], x[7], t[7], C)
   823  	C, t[8] = madd2(y[5], x[8], t[8], C)
   824  	C, t[9] = madd2(y[5], x[9], t[9], C)
   825  
   826  	t[10], D = bits.Add64(t[10], C, 0)
   827  
   828  	// m = t[0]n'[0] mod W
   829  	m = t[0] * qInvNeg
   830  
   831  	// -----------------------------------
   832  	// Second loop
   833  	C = madd0(m, q0, t[0])
   834  	C, t[0] = madd2(m, q1, t[1], C)
   835  	C, t[1] = madd2(m, q2, t[2], C)
   836  	C, t[2] = madd2(m, q3, t[3], C)
   837  	C, t[3] = madd2(m, q4, t[4], C)
   838  	C, t[4] = madd2(m, q5, t[5], C)
   839  	C, t[5] = madd2(m, q6, t[6], C)
   840  	C, t[6] = madd2(m, q7, t[7], C)
   841  	C, t[7] = madd2(m, q8, t[8], C)
   842  	C, t[8] = madd2(m, q9, t[9], C)
   843  
   844  	t[9], C = bits.Add64(t[10], C, 0)
   845  	t[10], _ = bits.Add64(0, D, C)
   846  	// -----------------------------------
   847  	// First loop
   848  
   849  	C, t[0] = madd1(y[6], x[0], t[0])
   850  	C, t[1] = madd2(y[6], x[1], t[1], C)
   851  	C, t[2] = madd2(y[6], x[2], t[2], C)
   852  	C, t[3] = madd2(y[6], x[3], t[3], C)
   853  	C, t[4] = madd2(y[6], x[4], t[4], C)
   854  	C, t[5] = madd2(y[6], x[5], t[5], C)
   855  	C, t[6] = madd2(y[6], x[6], t[6], C)
   856  	C, t[7] = madd2(y[6], x[7], t[7], C)
   857  	C, t[8] = madd2(y[6], x[8], t[8], C)
   858  	C, t[9] = madd2(y[6], x[9], t[9], C)
   859  
   860  	t[10], D = bits.Add64(t[10], C, 0)
   861  
   862  	// m = t[0]n'[0] mod W
   863  	m = t[0] * qInvNeg
   864  
   865  	// -----------------------------------
   866  	// Second loop
   867  	C = madd0(m, q0, t[0])
   868  	C, t[0] = madd2(m, q1, t[1], C)
   869  	C, t[1] = madd2(m, q2, t[2], C)
   870  	C, t[2] = madd2(m, q3, t[3], C)
   871  	C, t[3] = madd2(m, q4, t[4], C)
   872  	C, t[4] = madd2(m, q5, t[5], C)
   873  	C, t[5] = madd2(m, q6, t[6], C)
   874  	C, t[6] = madd2(m, q7, t[7], C)
   875  	C, t[7] = madd2(m, q8, t[8], C)
   876  	C, t[8] = madd2(m, q9, t[9], C)
   877  
   878  	t[9], C = bits.Add64(t[10], C, 0)
   879  	t[10], _ = bits.Add64(0, D, C)
   880  	// -----------------------------------
   881  	// First loop
   882  
   883  	C, t[0] = madd1(y[7], x[0], t[0])
   884  	C, t[1] = madd2(y[7], x[1], t[1], C)
   885  	C, t[2] = madd2(y[7], x[2], t[2], C)
   886  	C, t[3] = madd2(y[7], x[3], t[3], C)
   887  	C, t[4] = madd2(y[7], x[4], t[4], C)
   888  	C, t[5] = madd2(y[7], x[5], t[5], C)
   889  	C, t[6] = madd2(y[7], x[6], t[6], C)
   890  	C, t[7] = madd2(y[7], x[7], t[7], C)
   891  	C, t[8] = madd2(y[7], x[8], t[8], C)
   892  	C, t[9] = madd2(y[7], x[9], t[9], C)
   893  
   894  	t[10], D = bits.Add64(t[10], C, 0)
   895  
   896  	// m = t[0]n'[0] mod W
   897  	m = t[0] * qInvNeg
   898  
   899  	// -----------------------------------
   900  	// Second loop
   901  	C = madd0(m, q0, t[0])
   902  	C, t[0] = madd2(m, q1, t[1], C)
   903  	C, t[1] = madd2(m, q2, t[2], C)
   904  	C, t[2] = madd2(m, q3, t[3], C)
   905  	C, t[3] = madd2(m, q4, t[4], C)
   906  	C, t[4] = madd2(m, q5, t[5], C)
   907  	C, t[5] = madd2(m, q6, t[6], C)
   908  	C, t[6] = madd2(m, q7, t[7], C)
   909  	C, t[7] = madd2(m, q8, t[8], C)
   910  	C, t[8] = madd2(m, q9, t[9], C)
   911  
   912  	t[9], C = bits.Add64(t[10], C, 0)
   913  	t[10], _ = bits.Add64(0, D, C)
   914  	// -----------------------------------
   915  	// First loop
   916  
   917  	C, t[0] = madd1(y[8], x[0], t[0])
   918  	C, t[1] = madd2(y[8], x[1], t[1], C)
   919  	C, t[2] = madd2(y[8], x[2], t[2], C)
   920  	C, t[3] = madd2(y[8], x[3], t[3], C)
   921  	C, t[4] = madd2(y[8], x[4], t[4], C)
   922  	C, t[5] = madd2(y[8], x[5], t[5], C)
   923  	C, t[6] = madd2(y[8], x[6], t[6], C)
   924  	C, t[7] = madd2(y[8], x[7], t[7], C)
   925  	C, t[8] = madd2(y[8], x[8], t[8], C)
   926  	C, t[9] = madd2(y[8], x[9], t[9], C)
   927  
   928  	t[10], D = bits.Add64(t[10], C, 0)
   929  
   930  	// m = t[0]n'[0] mod W
   931  	m = t[0] * qInvNeg
   932  
   933  	// -----------------------------------
   934  	// Second loop
   935  	C = madd0(m, q0, t[0])
   936  	C, t[0] = madd2(m, q1, t[1], C)
   937  	C, t[1] = madd2(m, q2, t[2], C)
   938  	C, t[2] = madd2(m, q3, t[3], C)
   939  	C, t[3] = madd2(m, q4, t[4], C)
   940  	C, t[4] = madd2(m, q5, t[5], C)
   941  	C, t[5] = madd2(m, q6, t[6], C)
   942  	C, t[6] = madd2(m, q7, t[7], C)
   943  	C, t[7] = madd2(m, q8, t[8], C)
   944  	C, t[8] = madd2(m, q9, t[9], C)
   945  
   946  	t[9], C = bits.Add64(t[10], C, 0)
   947  	t[10], _ = bits.Add64(0, D, C)
   948  	// -----------------------------------
   949  	// First loop
   950  
   951  	C, t[0] = madd1(y[9], x[0], t[0])
   952  	C, t[1] = madd2(y[9], x[1], t[1], C)
   953  	C, t[2] = madd2(y[9], x[2], t[2], C)
   954  	C, t[3] = madd2(y[9], x[3], t[3], C)
   955  	C, t[4] = madd2(y[9], x[4], t[4], C)
   956  	C, t[5] = madd2(y[9], x[5], t[5], C)
   957  	C, t[6] = madd2(y[9], x[6], t[6], C)
   958  	C, t[7] = madd2(y[9], x[7], t[7], C)
   959  	C, t[8] = madd2(y[9], x[8], t[8], C)
   960  	C, t[9] = madd2(y[9], x[9], t[9], C)
   961  
   962  	t[10], D = bits.Add64(t[10], C, 0)
   963  
   964  	// m = t[0]n'[0] mod W
   965  	m = t[0] * qInvNeg
   966  
   967  	// -----------------------------------
   968  	// Second loop
   969  	C = madd0(m, q0, t[0])
   970  	C, t[0] = madd2(m, q1, t[1], C)
   971  	C, t[1] = madd2(m, q2, t[2], C)
   972  	C, t[2] = madd2(m, q3, t[3], C)
   973  	C, t[3] = madd2(m, q4, t[4], C)
   974  	C, t[4] = madd2(m, q5, t[5], C)
   975  	C, t[5] = madd2(m, q6, t[6], C)
   976  	C, t[6] = madd2(m, q7, t[7], C)
   977  	C, t[7] = madd2(m, q8, t[8], C)
   978  	C, t[8] = madd2(m, q9, t[9], C)
   979  
   980  	t[9], C = bits.Add64(t[10], C, 0)
   981  	t[10], _ = bits.Add64(0, D, C)
   982  
   983  	if t[10] != 0 {
   984  		// we need to reduce, we have a result on 11 words
   985  		var b uint64
   986  		z[0], b = bits.Sub64(t[0], q0, 0)
   987  		z[1], b = bits.Sub64(t[1], q1, b)
   988  		z[2], b = bits.Sub64(t[2], q2, b)
   989  		z[3], b = bits.Sub64(t[3], q3, b)
   990  		z[4], b = bits.Sub64(t[4], q4, b)
   991  		z[5], b = bits.Sub64(t[5], q5, b)
   992  		z[6], b = bits.Sub64(t[6], q6, b)
   993  		z[7], b = bits.Sub64(t[7], q7, b)
   994  		z[8], b = bits.Sub64(t[8], q8, b)
   995  		z[9], _ = bits.Sub64(t[9], q9, b)
   996  		return
   997  	}
   998  
   999  	// copy t into z
  1000  	z[0] = t[0]
  1001  	z[1] = t[1]
  1002  	z[2] = t[2]
  1003  	z[3] = t[3]
  1004  	z[4] = t[4]
  1005  	z[5] = t[5]
  1006  	z[6] = t[6]
  1007  	z[7] = t[7]
  1008  	z[8] = t[8]
  1009  	z[9] = t[9]
  1010  
  1011  	// if z ⩾ q → z -= q
  1012  	if !z.smallerThanModulus() {
  1013  		var b uint64
  1014  		z[0], b = bits.Sub64(z[0], q0, 0)
  1015  		z[1], b = bits.Sub64(z[1], q1, b)
  1016  		z[2], b = bits.Sub64(z[2], q2, b)
  1017  		z[3], b = bits.Sub64(z[3], q3, b)
  1018  		z[4], b = bits.Sub64(z[4], q4, b)
  1019  		z[5], b = bits.Sub64(z[5], q5, b)
  1020  		z[6], b = bits.Sub64(z[6], q6, b)
  1021  		z[7], b = bits.Sub64(z[7], q7, b)
  1022  		z[8], b = bits.Sub64(z[8], q8, b)
  1023  		z[9], _ = bits.Sub64(z[9], q9, b)
  1024  	}
  1025  }
  1026  
  1027  func _fromMontGeneric(z *Element) {
  1028  	// the following lines implement z = z * 1
  1029  	// with a modified CIOS montgomery multiplication
  1030  	// see Mul for algorithm documentation
  1031  	{
  1032  		// m = z[0]n'[0] mod W
  1033  		m := z[0] * qInvNeg
  1034  		C := madd0(m, q0, z[0])
  1035  		C, z[0] = madd2(m, q1, z[1], C)
  1036  		C, z[1] = madd2(m, q2, z[2], C)
  1037  		C, z[2] = madd2(m, q3, z[3], C)
  1038  		C, z[3] = madd2(m, q4, z[4], C)
  1039  		C, z[4] = madd2(m, q5, z[5], C)
  1040  		C, z[5] = madd2(m, q6, z[6], C)
  1041  		C, z[6] = madd2(m, q7, z[7], C)
  1042  		C, z[7] = madd2(m, q8, z[8], C)
  1043  		C, z[8] = madd2(m, q9, z[9], C)
  1044  		z[9] = C
  1045  	}
  1046  	{
  1047  		// m = z[0]n'[0] mod W
  1048  		m := z[0] * qInvNeg
  1049  		C := madd0(m, q0, z[0])
  1050  		C, z[0] = madd2(m, q1, z[1], C)
  1051  		C, z[1] = madd2(m, q2, z[2], C)
  1052  		C, z[2] = madd2(m, q3, z[3], C)
  1053  		C, z[3] = madd2(m, q4, z[4], C)
  1054  		C, z[4] = madd2(m, q5, z[5], C)
  1055  		C, z[5] = madd2(m, q6, z[6], C)
  1056  		C, z[6] = madd2(m, q7, z[7], C)
  1057  		C, z[7] = madd2(m, q8, z[8], C)
  1058  		C, z[8] = madd2(m, q9, z[9], C)
  1059  		z[9] = C
  1060  	}
  1061  	{
  1062  		// m = z[0]n'[0] mod W
  1063  		m := z[0] * qInvNeg
  1064  		C := madd0(m, q0, z[0])
  1065  		C, z[0] = madd2(m, q1, z[1], C)
  1066  		C, z[1] = madd2(m, q2, z[2], C)
  1067  		C, z[2] = madd2(m, q3, z[3], C)
  1068  		C, z[3] = madd2(m, q4, z[4], C)
  1069  		C, z[4] = madd2(m, q5, z[5], C)
  1070  		C, z[5] = madd2(m, q6, z[6], C)
  1071  		C, z[6] = madd2(m, q7, z[7], C)
  1072  		C, z[7] = madd2(m, q8, z[8], C)
  1073  		C, z[8] = madd2(m, q9, z[9], C)
  1074  		z[9] = C
  1075  	}
  1076  	{
  1077  		// m = z[0]n'[0] mod W
  1078  		m := z[0] * qInvNeg
  1079  		C := madd0(m, q0, z[0])
  1080  		C, z[0] = madd2(m, q1, z[1], C)
  1081  		C, z[1] = madd2(m, q2, z[2], C)
  1082  		C, z[2] = madd2(m, q3, z[3], C)
  1083  		C, z[3] = madd2(m, q4, z[4], C)
  1084  		C, z[4] = madd2(m, q5, z[5], C)
  1085  		C, z[5] = madd2(m, q6, z[6], C)
  1086  		C, z[6] = madd2(m, q7, z[7], C)
  1087  		C, z[7] = madd2(m, q8, z[8], C)
  1088  		C, z[8] = madd2(m, q9, z[9], C)
  1089  		z[9] = C
  1090  	}
  1091  	{
  1092  		// m = z[0]n'[0] mod W
  1093  		m := z[0] * qInvNeg
  1094  		C := madd0(m, q0, z[0])
  1095  		C, z[0] = madd2(m, q1, z[1], C)
  1096  		C, z[1] = madd2(m, q2, z[2], C)
  1097  		C, z[2] = madd2(m, q3, z[3], C)
  1098  		C, z[3] = madd2(m, q4, z[4], C)
  1099  		C, z[4] = madd2(m, q5, z[5], C)
  1100  		C, z[5] = madd2(m, q6, z[6], C)
  1101  		C, z[6] = madd2(m, q7, z[7], C)
  1102  		C, z[7] = madd2(m, q8, z[8], C)
  1103  		C, z[8] = madd2(m, q9, z[9], C)
  1104  		z[9] = C
  1105  	}
  1106  	{
  1107  		// m = z[0]n'[0] mod W
  1108  		m := z[0] * qInvNeg
  1109  		C := madd0(m, q0, z[0])
  1110  		C, z[0] = madd2(m, q1, z[1], C)
  1111  		C, z[1] = madd2(m, q2, z[2], C)
  1112  		C, z[2] = madd2(m, q3, z[3], C)
  1113  		C, z[3] = madd2(m, q4, z[4], C)
  1114  		C, z[4] = madd2(m, q5, z[5], C)
  1115  		C, z[5] = madd2(m, q6, z[6], C)
  1116  		C, z[6] = madd2(m, q7, z[7], C)
  1117  		C, z[7] = madd2(m, q8, z[8], C)
  1118  		C, z[8] = madd2(m, q9, z[9], C)
  1119  		z[9] = C
  1120  	}
  1121  	{
  1122  		// m = z[0]n'[0] mod W
  1123  		m := z[0] * qInvNeg
  1124  		C := madd0(m, q0, z[0])
  1125  		C, z[0] = madd2(m, q1, z[1], C)
  1126  		C, z[1] = madd2(m, q2, z[2], C)
  1127  		C, z[2] = madd2(m, q3, z[3], C)
  1128  		C, z[3] = madd2(m, q4, z[4], C)
  1129  		C, z[4] = madd2(m, q5, z[5], C)
  1130  		C, z[5] = madd2(m, q6, z[6], C)
  1131  		C, z[6] = madd2(m, q7, z[7], C)
  1132  		C, z[7] = madd2(m, q8, z[8], C)
  1133  		C, z[8] = madd2(m, q9, z[9], C)
  1134  		z[9] = C
  1135  	}
  1136  	{
  1137  		// m = z[0]n'[0] mod W
  1138  		m := z[0] * qInvNeg
  1139  		C := madd0(m, q0, z[0])
  1140  		C, z[0] = madd2(m, q1, z[1], C)
  1141  		C, z[1] = madd2(m, q2, z[2], C)
  1142  		C, z[2] = madd2(m, q3, z[3], C)
  1143  		C, z[3] = madd2(m, q4, z[4], C)
  1144  		C, z[4] = madd2(m, q5, z[5], C)
  1145  		C, z[5] = madd2(m, q6, z[6], C)
  1146  		C, z[6] = madd2(m, q7, z[7], C)
  1147  		C, z[7] = madd2(m, q8, z[8], C)
  1148  		C, z[8] = madd2(m, q9, z[9], C)
  1149  		z[9] = C
  1150  	}
  1151  	{
  1152  		// m = z[0]n'[0] mod W
  1153  		m := z[0] * qInvNeg
  1154  		C := madd0(m, q0, z[0])
  1155  		C, z[0] = madd2(m, q1, z[1], C)
  1156  		C, z[1] = madd2(m, q2, z[2], C)
  1157  		C, z[2] = madd2(m, q3, z[3], C)
  1158  		C, z[3] = madd2(m, q4, z[4], C)
  1159  		C, z[4] = madd2(m, q5, z[5], C)
  1160  		C, z[5] = madd2(m, q6, z[6], C)
  1161  		C, z[6] = madd2(m, q7, z[7], C)
  1162  		C, z[7] = madd2(m, q8, z[8], C)
  1163  		C, z[8] = madd2(m, q9, z[9], C)
  1164  		z[9] = C
  1165  	}
  1166  	{
  1167  		// m = z[0]n'[0] mod W
  1168  		m := z[0] * qInvNeg
  1169  		C := madd0(m, q0, z[0])
  1170  		C, z[0] = madd2(m, q1, z[1], C)
  1171  		C, z[1] = madd2(m, q2, z[2], C)
  1172  		C, z[2] = madd2(m, q3, z[3], C)
  1173  		C, z[3] = madd2(m, q4, z[4], C)
  1174  		C, z[4] = madd2(m, q5, z[5], C)
  1175  		C, z[5] = madd2(m, q6, z[6], C)
  1176  		C, z[6] = madd2(m, q7, z[7], C)
  1177  		C, z[7] = madd2(m, q8, z[8], C)
  1178  		C, z[8] = madd2(m, q9, z[9], C)
  1179  		z[9] = C
  1180  	}
  1181  
  1182  	// if z ⩾ q → z -= q
  1183  	if !z.smallerThanModulus() {
  1184  		var b uint64
  1185  		z[0], b = bits.Sub64(z[0], q0, 0)
  1186  		z[1], b = bits.Sub64(z[1], q1, b)
  1187  		z[2], b = bits.Sub64(z[2], q2, b)
  1188  		z[3], b = bits.Sub64(z[3], q3, b)
  1189  		z[4], b = bits.Sub64(z[4], q4, b)
  1190  		z[5], b = bits.Sub64(z[5], q5, b)
  1191  		z[6], b = bits.Sub64(z[6], q6, b)
  1192  		z[7], b = bits.Sub64(z[7], q7, b)
  1193  		z[8], b = bits.Sub64(z[8], q8, b)
  1194  		z[9], _ = bits.Sub64(z[9], q9, b)
  1195  	}
  1196  }
  1197  
  1198  func _reduceGeneric(z *Element) {
  1199  
  1200  	// if z ⩾ q → z -= q
  1201  	if !z.smallerThanModulus() {
  1202  		var b uint64
  1203  		z[0], b = bits.Sub64(z[0], q0, 0)
  1204  		z[1], b = bits.Sub64(z[1], q1, b)
  1205  		z[2], b = bits.Sub64(z[2], q2, b)
  1206  		z[3], b = bits.Sub64(z[3], q3, b)
  1207  		z[4], b = bits.Sub64(z[4], q4, b)
  1208  		z[5], b = bits.Sub64(z[5], q5, b)
  1209  		z[6], b = bits.Sub64(z[6], q6, b)
  1210  		z[7], b = bits.Sub64(z[7], q7, b)
  1211  		z[8], b = bits.Sub64(z[8], q8, b)
  1212  		z[9], _ = bits.Sub64(z[9], q9, b)
  1213  	}
  1214  }
  1215  
  1216  // BatchInvert returns a new slice with every element inverted.
  1217  // Uses Montgomery batch inversion trick
  1218  func BatchInvert(a []Element) []Element {
  1219  	res := make([]Element, len(a))
  1220  	if len(a) == 0 {
  1221  		return res
  1222  	}
  1223  
  1224  	zeroes := bitset.New(uint(len(a)))
  1225  	accumulator := One()
  1226  
  1227  	for i := 0; i < len(a); i++ {
  1228  		if a[i].IsZero() {
  1229  			zeroes.Set(uint(i))
  1230  			continue
  1231  		}
  1232  		res[i] = accumulator
  1233  		accumulator.Mul(&accumulator, &a[i])
  1234  	}
  1235  
  1236  	accumulator.Inverse(&accumulator)
  1237  
  1238  	for i := len(a) - 1; i >= 0; i-- {
  1239  		if zeroes.Test(uint(i)) {
  1240  			continue
  1241  		}
  1242  		res[i].Mul(&res[i], &accumulator)
  1243  		accumulator.Mul(&accumulator, &a[i])
  1244  	}
  1245  
  1246  	return res
  1247  }
  1248  
  1249  func _butterflyGeneric(a, b *Element) {
  1250  	t := *a
  1251  	a.Add(a, b)
  1252  	b.Sub(&t, b)
  1253  }
  1254  
  1255  // BitLen returns the minimum number of bits needed to represent z
  1256  // returns 0 if z == 0
  1257  func (z *Element) BitLen() int {
  1258  	if z[9] != 0 {
  1259  		return 576 + bits.Len64(z[9])
  1260  	}
  1261  	if z[8] != 0 {
  1262  		return 512 + bits.Len64(z[8])
  1263  	}
  1264  	if z[7] != 0 {
  1265  		return 448 + bits.Len64(z[7])
  1266  	}
  1267  	if z[6] != 0 {
  1268  		return 384 + bits.Len64(z[6])
  1269  	}
  1270  	if z[5] != 0 {
  1271  		return 320 + bits.Len64(z[5])
  1272  	}
  1273  	if z[4] != 0 {
  1274  		return 256 + bits.Len64(z[4])
  1275  	}
  1276  	if z[3] != 0 {
  1277  		return 192 + bits.Len64(z[3])
  1278  	}
  1279  	if z[2] != 0 {
  1280  		return 128 + bits.Len64(z[2])
  1281  	}
  1282  	if z[1] != 0 {
  1283  		return 64 + bits.Len64(z[1])
  1284  	}
  1285  	return bits.Len64(z[0])
  1286  }
  1287  
  1288  // Hash msg to count prime field elements.
  1289  // https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-5.2
  1290  func Hash(msg, dst []byte, count int) ([]Element, error) {
  1291  	// 128 bits of security
  1292  	// L = ceil((ceil(log2(p)) + k) / 8), where k is the security parameter = 128
  1293  	const Bytes = 1 + (Bits-1)/8
  1294  	const L = 16 + Bytes
  1295  
  1296  	lenInBytes := count * L
  1297  	pseudoRandomBytes, err := hash.ExpandMsgXmd(msg, dst, lenInBytes)
  1298  	if err != nil {
  1299  		return nil, err
  1300  	}
  1301  
  1302  	// get temporary big int from the pool
  1303  	vv := pool.BigInt.Get()
  1304  
  1305  	res := make([]Element, count)
  1306  	for i := 0; i < count; i++ {
  1307  		vv.SetBytes(pseudoRandomBytes[i*L : (i+1)*L])
  1308  		res[i].SetBigInt(vv)
  1309  	}
  1310  
  1311  	// release object into pool
  1312  	pool.BigInt.Put(vv)
  1313  
  1314  	return res, nil
  1315  }
  1316  
  1317  // Exp z = xᵏ (mod q)
  1318  func (z *Element) Exp(x Element, k *big.Int) *Element {
  1319  	if k.IsUint64() && k.Uint64() == 0 {
  1320  		return z.SetOne()
  1321  	}
  1322  
  1323  	e := k
  1324  	if k.Sign() == -1 {
  1325  		// negative k, we invert
  1326  		// if k < 0: xᵏ (mod q) == (x⁻¹)ᵏ (mod q)
  1327  		x.Inverse(&x)
  1328  
  1329  		// we negate k in a temp big.Int since
  1330  		// Int.Bit(_) of k and -k is different
  1331  		e = pool.BigInt.Get()
  1332  		defer pool.BigInt.Put(e)
  1333  		e.Neg(k)
  1334  	}
  1335  
  1336  	z.Set(&x)
  1337  
  1338  	for i := e.BitLen() - 2; i >= 0; i-- {
  1339  		z.Square(z)
  1340  		if e.Bit(i) == 1 {
  1341  			z.Mul(z, &x)
  1342  		}
  1343  	}
  1344  
  1345  	return z
  1346  }
  1347  
  1348  // rSquare where r is the Montgommery constant
  1349  // see section 2.3.2 of Tolga Acar's thesis
  1350  // https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf
  1351  var rSquare = Element{
  1352  	7358459907925294924,
  1353  	14414180951914241931,
  1354  	16619482658146888203,
  1355  	760736596725344926,
  1356  	12753071240931896792,
  1357  	13425190760400245818,
  1358  	12591714441439252728,
  1359  	15325516497554583360,
  1360  	5301152003049442834,
  1361  	35368377961363834,
  1362  }
  1363  
  1364  // toMont converts z to Montgomery form
  1365  // sets and returns z = z * r²
  1366  func (z *Element) toMont() *Element {
  1367  	return z.Mul(z, &rSquare)
  1368  }
  1369  
  1370  // String returns the decimal representation of z as generated by
  1371  // z.Text(10).
  1372  func (z *Element) String() string {
  1373  	return z.Text(10)
  1374  }
  1375  
  1376  // toBigInt returns z as a big.Int in Montgomery form
  1377  func (z *Element) toBigInt(res *big.Int) *big.Int {
  1378  	var b [Bytes]byte
  1379  	binary.BigEndian.PutUint64(b[72:80], z[0])
  1380  	binary.BigEndian.PutUint64(b[64:72], z[1])
  1381  	binary.BigEndian.PutUint64(b[56:64], z[2])
  1382  	binary.BigEndian.PutUint64(b[48:56], z[3])
  1383  	binary.BigEndian.PutUint64(b[40:48], z[4])
  1384  	binary.BigEndian.PutUint64(b[32:40], z[5])
  1385  	binary.BigEndian.PutUint64(b[24:32], z[6])
  1386  	binary.BigEndian.PutUint64(b[16:24], z[7])
  1387  	binary.BigEndian.PutUint64(b[8:16], z[8])
  1388  	binary.BigEndian.PutUint64(b[0:8], z[9])
  1389  
  1390  	return res.SetBytes(b[:])
  1391  }
  1392  
  1393  // Text returns the string representation of z in the given base.
  1394  // Base must be between 2 and 36, inclusive. The result uses the
  1395  // lower-case letters 'a' to 'z' for digit values 10 to 35.
  1396  // No prefix (such as "0x") is added to the string. If z is a nil
  1397  // pointer it returns "<nil>".
  1398  // If base == 10 and -z fits in a uint16 prefix "-" is added to the string.
  1399  func (z *Element) Text(base int) string {
  1400  	if base < 2 || base > 36 {
  1401  		panic("invalid base")
  1402  	}
  1403  	if z == nil {
  1404  		return "<nil>"
  1405  	}
  1406  
  1407  	const maxUint16 = 65535
  1408  	if base == 10 {
  1409  		var zzNeg Element
  1410  		zzNeg.Neg(z)
  1411  		zzNeg.fromMont()
  1412  		if zzNeg.FitsOnOneWord() && zzNeg[0] <= maxUint16 && zzNeg[0] != 0 {
  1413  			return "-" + strconv.FormatUint(zzNeg[0], base)
  1414  		}
  1415  	}
  1416  	zz := *z
  1417  	zz.fromMont()
  1418  	if zz.FitsOnOneWord() {
  1419  		return strconv.FormatUint(zz[0], base)
  1420  	}
  1421  	vv := pool.BigInt.Get()
  1422  	r := zz.toBigInt(vv).Text(base)
  1423  	pool.BigInt.Put(vv)
  1424  	return r
  1425  }
  1426  
  1427  // BigInt sets and return z as a *big.Int
  1428  func (z *Element) BigInt(res *big.Int) *big.Int {
  1429  	_z := *z
  1430  	_z.fromMont()
  1431  	return _z.toBigInt(res)
  1432  }
  1433  
  1434  // ToBigIntRegular returns z as a big.Int in regular form
  1435  //
  1436  // Deprecated: use BigInt(*big.Int) instead
  1437  func (z Element) ToBigIntRegular(res *big.Int) *big.Int {
  1438  	z.fromMont()
  1439  	return z.toBigInt(res)
  1440  }
  1441  
  1442  // Bits provides access to z by returning its value as a little-endian [10]uint64 array.
  1443  // Bits is intended to support implementation of missing low-level Element
  1444  // functionality outside this package; it should be avoided otherwise.
  1445  func (z *Element) Bits() [10]uint64 {
  1446  	_z := *z
  1447  	fromMont(&_z)
  1448  	return _z
  1449  }
  1450  
  1451  // Bytes returns the value of z as a big-endian byte array
  1452  func (z *Element) Bytes() (res [Bytes]byte) {
  1453  	BigEndian.PutElement(&res, *z)
  1454  	return
  1455  }
  1456  
  1457  // Marshal returns the value of z as a big-endian byte slice
  1458  func (z *Element) Marshal() []byte {
  1459  	b := z.Bytes()
  1460  	return b[:]
  1461  }
  1462  
  1463  // Unmarshal is an alias for SetBytes, it sets z to the value of e.
  1464  func (z *Element) Unmarshal(e []byte) {
  1465  	z.SetBytes(e)
  1466  }
  1467  
  1468  // SetBytes interprets e as the bytes of a big-endian unsigned integer,
  1469  // sets z to that value, and returns z.
  1470  func (z *Element) SetBytes(e []byte) *Element {
  1471  	if len(e) == Bytes {
  1472  		// fast path
  1473  		v, err := BigEndian.Element((*[Bytes]byte)(e))
  1474  		if err == nil {
  1475  			*z = v
  1476  			return z
  1477  		}
  1478  	}
  1479  
  1480  	// slow path.
  1481  	// get a big int from our pool
  1482  	vv := pool.BigInt.Get()
  1483  	vv.SetBytes(e)
  1484  
  1485  	// set big int
  1486  	z.SetBigInt(vv)
  1487  
  1488  	// put temporary object back in pool
  1489  	pool.BigInt.Put(vv)
  1490  
  1491  	return z
  1492  }
  1493  
  1494  // SetBytesCanonical interprets e as the bytes of a big-endian 80-byte integer.
  1495  // If e is not a 80-byte slice or encodes a value higher than q,
  1496  // SetBytesCanonical returns an error.
  1497  func (z *Element) SetBytesCanonical(e []byte) error {
  1498  	if len(e) != Bytes {
  1499  		return errors.New("invalid fp.Element encoding")
  1500  	}
  1501  	v, err := BigEndian.Element((*[Bytes]byte)(e))
  1502  	if err != nil {
  1503  		return err
  1504  	}
  1505  	*z = v
  1506  	return nil
  1507  }
  1508  
  1509  // SetBigInt sets z to v and returns z
  1510  func (z *Element) SetBigInt(v *big.Int) *Element {
  1511  	z.SetZero()
  1512  
  1513  	var zero big.Int
  1514  
  1515  	// fast path
  1516  	c := v.Cmp(&_modulus)
  1517  	if c == 0 {
  1518  		// v == 0
  1519  		return z
  1520  	} else if c != 1 && v.Cmp(&zero) != -1 {
  1521  		// 0 < v < q
  1522  		return z.setBigInt(v)
  1523  	}
  1524  
  1525  	// get temporary big int from the pool
  1526  	vv := pool.BigInt.Get()
  1527  
  1528  	// copy input + modular reduction
  1529  	vv.Mod(v, &_modulus)
  1530  
  1531  	// set big int byte value
  1532  	z.setBigInt(vv)
  1533  
  1534  	// release object into pool
  1535  	pool.BigInt.Put(vv)
  1536  	return z
  1537  }
  1538  
  1539  // setBigInt assumes 0 ⩽ v < q
  1540  func (z *Element) setBigInt(v *big.Int) *Element {
  1541  	vBits := v.Bits()
  1542  
  1543  	if bits.UintSize == 64 {
  1544  		for i := 0; i < len(vBits); i++ {
  1545  			z[i] = uint64(vBits[i])
  1546  		}
  1547  	} else {
  1548  		for i := 0; i < len(vBits); i++ {
  1549  			if i%2 == 0 {
  1550  				z[i/2] = uint64(vBits[i])
  1551  			} else {
  1552  				z[i/2] |= uint64(vBits[i]) << 32
  1553  			}
  1554  		}
  1555  	}
  1556  
  1557  	return z.toMont()
  1558  }
  1559  
  1560  // SetString creates a big.Int with number and calls SetBigInt on z
  1561  //
  1562  // The number prefix determines the actual base: A prefix of
  1563  // ”0b” or ”0B” selects base 2, ”0”, ”0o” or ”0O” selects base 8,
  1564  // and ”0x” or ”0X” selects base 16. Otherwise, the selected base is 10
  1565  // and no prefix is accepted.
  1566  //
  1567  // For base 16, lower and upper case letters are considered the same:
  1568  // The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15.
  1569  //
  1570  // An underscore character ”_” may appear between a base
  1571  // prefix and an adjacent digit, and between successive digits; such
  1572  // underscores do not change the value of the number.
  1573  // Incorrect placement of underscores is reported as a panic if there
  1574  // are no other errors.
  1575  //
  1576  // If the number is invalid this method leaves z unchanged and returns nil, error.
  1577  func (z *Element) SetString(number string) (*Element, error) {
  1578  	// get temporary big int from the pool
  1579  	vv := pool.BigInt.Get()
  1580  
  1581  	if _, ok := vv.SetString(number, 0); !ok {
  1582  		return nil, errors.New("Element.SetString failed -> can't parse number into a big.Int " + number)
  1583  	}
  1584  
  1585  	z.SetBigInt(vv)
  1586  
  1587  	// release object into pool
  1588  	pool.BigInt.Put(vv)
  1589  
  1590  	return z, nil
  1591  }
  1592  
  1593  // MarshalJSON returns json encoding of z (z.Text(10))
  1594  // If z == nil, returns null
  1595  func (z *Element) MarshalJSON() ([]byte, error) {
  1596  	if z == nil {
  1597  		return []byte("null"), nil
  1598  	}
  1599  	const maxSafeBound = 15 // we encode it as number if it's small
  1600  	s := z.Text(10)
  1601  	if len(s) <= maxSafeBound {
  1602  		return []byte(s), nil
  1603  	}
  1604  	var sbb strings.Builder
  1605  	sbb.WriteByte('"')
  1606  	sbb.WriteString(s)
  1607  	sbb.WriteByte('"')
  1608  	return []byte(sbb.String()), nil
  1609  }
  1610  
  1611  // UnmarshalJSON accepts numbers and strings as input
  1612  // See Element.SetString for valid prefixes (0x, 0b, ...)
  1613  func (z *Element) UnmarshalJSON(data []byte) error {
  1614  	s := string(data)
  1615  	if len(s) > Bits*3 {
  1616  		return errors.New("value too large (max = Element.Bits * 3)")
  1617  	}
  1618  
  1619  	// we accept numbers and strings, remove leading and trailing quotes if any
  1620  	if len(s) > 0 && s[0] == '"' {
  1621  		s = s[1:]
  1622  	}
  1623  	if len(s) > 0 && s[len(s)-1] == '"' {
  1624  		s = s[:len(s)-1]
  1625  	}
  1626  
  1627  	// get temporary big int from the pool
  1628  	vv := pool.BigInt.Get()
  1629  
  1630  	if _, ok := vv.SetString(s, 0); !ok {
  1631  		return errors.New("can't parse into a big.Int: " + s)
  1632  	}
  1633  
  1634  	z.SetBigInt(vv)
  1635  
  1636  	// release object into pool
  1637  	pool.BigInt.Put(vv)
  1638  	return nil
  1639  }
  1640  
  1641  // A ByteOrder specifies how to convert byte slices into a Element
  1642  type ByteOrder interface {
  1643  	Element(*[Bytes]byte) (Element, error)
  1644  	PutElement(*[Bytes]byte, Element)
  1645  	String() string
  1646  }
  1647  
  1648  // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder.
  1649  var BigEndian bigEndian
  1650  
  1651  type bigEndian struct{}
  1652  
  1653  // Element interpret b is a big-endian 80-byte slice.
  1654  // If b encodes a value higher than q, Element returns error.
  1655  func (bigEndian) Element(b *[Bytes]byte) (Element, error) {
  1656  	var z Element
  1657  	z[0] = binary.BigEndian.Uint64((*b)[72:80])
  1658  	z[1] = binary.BigEndian.Uint64((*b)[64:72])
  1659  	z[2] = binary.BigEndian.Uint64((*b)[56:64])
  1660  	z[3] = binary.BigEndian.Uint64((*b)[48:56])
  1661  	z[4] = binary.BigEndian.Uint64((*b)[40:48])
  1662  	z[5] = binary.BigEndian.Uint64((*b)[32:40])
  1663  	z[6] = binary.BigEndian.Uint64((*b)[24:32])
  1664  	z[7] = binary.BigEndian.Uint64((*b)[16:24])
  1665  	z[8] = binary.BigEndian.Uint64((*b)[8:16])
  1666  	z[9] = binary.BigEndian.Uint64((*b)[0:8])
  1667  
  1668  	if !z.smallerThanModulus() {
  1669  		return Element{}, errors.New("invalid fp.Element encoding")
  1670  	}
  1671  
  1672  	z.toMont()
  1673  	return z, nil
  1674  }
  1675  
  1676  func (bigEndian) PutElement(b *[Bytes]byte, e Element) {
  1677  	e.fromMont()
  1678  	binary.BigEndian.PutUint64((*b)[72:80], e[0])
  1679  	binary.BigEndian.PutUint64((*b)[64:72], e[1])
  1680  	binary.BigEndian.PutUint64((*b)[56:64], e[2])
  1681  	binary.BigEndian.PutUint64((*b)[48:56], e[3])
  1682  	binary.BigEndian.PutUint64((*b)[40:48], e[4])
  1683  	binary.BigEndian.PutUint64((*b)[32:40], e[5])
  1684  	binary.BigEndian.PutUint64((*b)[24:32], e[6])
  1685  	binary.BigEndian.PutUint64((*b)[16:24], e[7])
  1686  	binary.BigEndian.PutUint64((*b)[8:16], e[8])
  1687  	binary.BigEndian.PutUint64((*b)[0:8], e[9])
  1688  }
  1689  
  1690  func (bigEndian) String() string { return "BigEndian" }
  1691  
  1692  // LittleEndian is the little-endian implementation of ByteOrder and AppendByteOrder.
  1693  var LittleEndian littleEndian
  1694  
  1695  type littleEndian struct{}
  1696  
  1697  func (littleEndian) Element(b *[Bytes]byte) (Element, error) {
  1698  	var z Element
  1699  	z[0] = binary.LittleEndian.Uint64((*b)[0:8])
  1700  	z[1] = binary.LittleEndian.Uint64((*b)[8:16])
  1701  	z[2] = binary.LittleEndian.Uint64((*b)[16:24])
  1702  	z[3] = binary.LittleEndian.Uint64((*b)[24:32])
  1703  	z[4] = binary.LittleEndian.Uint64((*b)[32:40])
  1704  	z[5] = binary.LittleEndian.Uint64((*b)[40:48])
  1705  	z[6] = binary.LittleEndian.Uint64((*b)[48:56])
  1706  	z[7] = binary.LittleEndian.Uint64((*b)[56:64])
  1707  	z[8] = binary.LittleEndian.Uint64((*b)[64:72])
  1708  	z[9] = binary.LittleEndian.Uint64((*b)[72:80])
  1709  
  1710  	if !z.smallerThanModulus() {
  1711  		return Element{}, errors.New("invalid fp.Element encoding")
  1712  	}
  1713  
  1714  	z.toMont()
  1715  	return z, nil
  1716  }
  1717  
  1718  func (littleEndian) PutElement(b *[Bytes]byte, e Element) {
  1719  	e.fromMont()
  1720  	binary.LittleEndian.PutUint64((*b)[0:8], e[0])
  1721  	binary.LittleEndian.PutUint64((*b)[8:16], e[1])
  1722  	binary.LittleEndian.PutUint64((*b)[16:24], e[2])
  1723  	binary.LittleEndian.PutUint64((*b)[24:32], e[3])
  1724  	binary.LittleEndian.PutUint64((*b)[32:40], e[4])
  1725  	binary.LittleEndian.PutUint64((*b)[40:48], e[5])
  1726  	binary.LittleEndian.PutUint64((*b)[48:56], e[6])
  1727  	binary.LittleEndian.PutUint64((*b)[56:64], e[7])
  1728  	binary.LittleEndian.PutUint64((*b)[64:72], e[8])
  1729  	binary.LittleEndian.PutUint64((*b)[72:80], e[9])
  1730  }
  1731  
  1732  func (littleEndian) String() string { return "LittleEndian" }
  1733  
  1734  // Legendre returns the Legendre symbol of z (either +1, -1, or 0.)
  1735  func (z *Element) Legendre() int {
  1736  	var l Element
  1737  	// z^((q-1)/2)
  1738  	l.expByLegendreExp(*z)
  1739  
  1740  	if l.IsZero() {
  1741  		return 0
  1742  	}
  1743  
  1744  	// if l == 1
  1745  	if l.IsOne() {
  1746  		return 1
  1747  	}
  1748  	return -1
  1749  }
  1750  
  1751  // Sqrt z = √x (mod q)
  1752  // if the square root doesn't exist (x is not a square mod q)
  1753  // Sqrt leaves z unchanged and returns nil
  1754  func (z *Element) Sqrt(x *Element) *Element {
  1755  	// q ≡ 5 (mod 8)
  1756  	// see modSqrt5Mod8Prime in math/big/int.go
  1757  	var one, alpha, beta, tx, square Element
  1758  	one.SetOne()
  1759  	tx.Double(x)
  1760  	alpha.expBySqrtExp(tx)
  1761  
  1762  	beta.Square(&alpha).
  1763  		Mul(&beta, &tx).
  1764  		Sub(&beta, &one).
  1765  		Mul(&beta, x).
  1766  		Mul(&beta, &alpha)
  1767  
  1768  	// as we didn't compute the legendre symbol, ensure we found beta such that beta * beta = x
  1769  	square.Square(&beta)
  1770  	if square.Equal(x) {
  1771  		return z.Set(&beta)
  1772  	}
  1773  	return nil
  1774  }
  1775  
  1776  const (
  1777  	k               = 32 // word size / 2
  1778  	signBitSelector = uint64(1) << 63
  1779  	approxLowBitsN  = k - 1
  1780  	approxHighBitsN = k + 1
  1781  )
  1782  
  1783  const (
  1784  	inversionCorrectionFactorWord0 = 17335095338408674528
  1785  	inversionCorrectionFactorWord1 = 1935156146725576072
  1786  	inversionCorrectionFactorWord2 = 12310223143035529855
  1787  	inversionCorrectionFactorWord3 = 14776388015283991997
  1788  	inversionCorrectionFactorWord4 = 13807356859349388480
  1789  	inversionCorrectionFactorWord5 = 10412247811534140886
  1790  	inversionCorrectionFactorWord6 = 1537112855455741892
  1791  	inversionCorrectionFactorWord7 = 5281081904757642912
  1792  	inversionCorrectionFactorWord8 = 14734303888675989218
  1793  	inversionCorrectionFactorWord9 = 64202171737444348
  1794  	invIterationsN                 = 42
  1795  )
  1796  
  1797  // Inverse z = x⁻¹ (mod q)
  1798  //
  1799  // if x == 0, sets and returns z = x
  1800  func (z *Element) Inverse(x *Element) *Element {
  1801  	// Implements "Optimized Binary GCD for Modular Inversion"
  1802  	// https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
  1803  
  1804  	a := *x
  1805  	b := Element{
  1806  		q0,
  1807  		q1,
  1808  		q2,
  1809  		q3,
  1810  		q4,
  1811  		q5,
  1812  		q6,
  1813  		q7,
  1814  		q8,
  1815  		q9,
  1816  	} // b := q
  1817  
  1818  	u := Element{1}
  1819  
  1820  	// Update factors: we get [u; v] ← [f₀ g₀; f₁ g₁] [u; v]
  1821  	// cᵢ = fᵢ + 2³¹ - 1 + 2³² * (gᵢ + 2³¹ - 1)
  1822  	var c0, c1 int64
  1823  
  1824  	// Saved update factors to reduce the number of field multiplications
  1825  	var pf0, pf1, pg0, pg1 int64
  1826  
  1827  	var i uint
  1828  
  1829  	var v, s Element
  1830  
  1831  	// Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations
  1832  	// This also lets us get away with half as many updates to u,v
  1833  	// To make this constant-time-ish, replace the condition with i < invIterationsN
  1834  	for i = 0; i&1 == 1 || !a.IsZero(); i++ {
  1835  		n := max(a.BitLen(), b.BitLen())
  1836  		aApprox, bApprox := approximate(&a, n), approximate(&b, n)
  1837  
  1838  		// f₀, g₀, f₁, g₁ = 1, 0, 0, 1
  1839  		c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1
  1840  
  1841  		for j := 0; j < approxLowBitsN; j++ {
  1842  
  1843  			// -2ʲ < f₀, f₁ ≤ 2ʲ
  1844  			// |f₀| + |f₁| < 2ʲ⁺¹
  1845  
  1846  			if aApprox&1 == 0 {
  1847  				aApprox /= 2
  1848  			} else {
  1849  				s, borrow := bits.Sub64(aApprox, bApprox, 0)
  1850  				if borrow == 1 {
  1851  					s = bApprox - aApprox
  1852  					bApprox = aApprox
  1853  					c0, c1 = c1, c0
  1854  					// invariants unchanged
  1855  				}
  1856  
  1857  				aApprox = s / 2
  1858  				c0 = c0 - c1
  1859  
  1860  				// Now |f₀| < 2ʲ⁺¹ ≤ 2ʲ⁺¹ (only the weaker inequality is needed, strictly speaking)
  1861  				// Started with f₀ > -2ʲ and f₁ ≤ 2ʲ, so f₀ - f₁ > -2ʲ⁺¹
  1862  				// Invariants unchanged for f₁
  1863  			}
  1864  
  1865  			c1 *= 2
  1866  			// -2ʲ⁺¹ < f₁ ≤ 2ʲ⁺¹
  1867  			// So now |f₀| + |f₁| < 2ʲ⁺²
  1868  		}
  1869  
  1870  		s = a
  1871  
  1872  		var g0 int64
  1873  		// from this point on c0 aliases for f0
  1874  		c0, g0 = updateFactorsDecompose(c0)
  1875  		aHi := a.linearCombNonModular(&s, c0, &b, g0)
  1876  		if aHi&signBitSelector != 0 {
  1877  			// if aHi < 0
  1878  			c0, g0 = -c0, -g0
  1879  			aHi = negL(&a, aHi)
  1880  		}
  1881  		// right-shift a by k-1 bits
  1882  		a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN)
  1883  		a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN)
  1884  		a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN)
  1885  		a[3] = (a[3] >> approxLowBitsN) | ((a[4]) << approxHighBitsN)
  1886  		a[4] = (a[4] >> approxLowBitsN) | ((a[5]) << approxHighBitsN)
  1887  		a[5] = (a[5] >> approxLowBitsN) | ((a[6]) << approxHighBitsN)
  1888  		a[6] = (a[6] >> approxLowBitsN) | ((a[7]) << approxHighBitsN)
  1889  		a[7] = (a[7] >> approxLowBitsN) | ((a[8]) << approxHighBitsN)
  1890  		a[8] = (a[8] >> approxLowBitsN) | ((a[9]) << approxHighBitsN)
  1891  		a[9] = (a[9] >> approxLowBitsN) | (aHi << approxHighBitsN)
  1892  
  1893  		var f1 int64
  1894  		// from this point on c1 aliases for g0
  1895  		f1, c1 = updateFactorsDecompose(c1)
  1896  		bHi := b.linearCombNonModular(&s, f1, &b, c1)
  1897  		if bHi&signBitSelector != 0 {
  1898  			// if bHi < 0
  1899  			f1, c1 = -f1, -c1
  1900  			bHi = negL(&b, bHi)
  1901  		}
  1902  		// right-shift b by k-1 bits
  1903  		b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN)
  1904  		b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN)
  1905  		b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN)
  1906  		b[3] = (b[3] >> approxLowBitsN) | ((b[4]) << approxHighBitsN)
  1907  		b[4] = (b[4] >> approxLowBitsN) | ((b[5]) << approxHighBitsN)
  1908  		b[5] = (b[5] >> approxLowBitsN) | ((b[6]) << approxHighBitsN)
  1909  		b[6] = (b[6] >> approxLowBitsN) | ((b[7]) << approxHighBitsN)
  1910  		b[7] = (b[7] >> approxLowBitsN) | ((b[8]) << approxHighBitsN)
  1911  		b[8] = (b[8] >> approxLowBitsN) | ((b[9]) << approxHighBitsN)
  1912  		b[9] = (b[9] >> approxLowBitsN) | (bHi << approxHighBitsN)
  1913  
  1914  		if i&1 == 1 {
  1915  			// Combine current update factors with previously stored ones
  1916  			// [F₀, G₀; F₁, G₁] ← [f₀, g₀; f₁, g₁] [pf₀, pg₀; pf₁, pg₁], with capital letters denoting new combined values
  1917  			// We get |F₀| = | f₀pf₀ + g₀pf₁ | ≤ |f₀pf₀| + |g₀pf₁| = |f₀| |pf₀| + |g₀| |pf₁| ≤ 2ᵏ⁻¹|pf₀| + 2ᵏ⁻¹|pf₁|
  1918  			// = 2ᵏ⁻¹ (|pf₀| + |pf₁|) < 2ᵏ⁻¹ 2ᵏ = 2²ᵏ⁻¹
  1919  			// So |F₀| < 2²ᵏ⁻¹ meaning it fits in a 2k-bit signed register
  1920  
  1921  			// c₀ aliases f₀, c₁ aliases g₁
  1922  			c0, g0, f1, c1 = c0*pf0+g0*pf1,
  1923  				c0*pg0+g0*pg1,
  1924  				f1*pf0+c1*pf1,
  1925  				f1*pg0+c1*pg1
  1926  
  1927  			s = u
  1928  
  1929  			// 0 ≤ u, v < 2²⁵⁵
  1930  			// |F₀|, |G₀| < 2⁶³
  1931  			u.linearComb(&u, c0, &v, g0)
  1932  			// |F₁|, |G₁| < 2⁶³
  1933  			v.linearComb(&s, f1, &v, c1)
  1934  
  1935  		} else {
  1936  			// Save update factors
  1937  			pf0, pg0, pf1, pg1 = c0, g0, f1, c1
  1938  		}
  1939  	}
  1940  
  1941  	// For every iteration that we miss, v is not being multiplied by 2ᵏ⁻²
  1942  	const pSq uint64 = 1 << (2 * (k - 1))
  1943  	a = Element{pSq}
  1944  	// If the function is constant-time ish, this loop will not run (no need to take it out explicitly)
  1945  	for ; i < invIterationsN; i += 2 {
  1946  		// could optimize further with mul by word routine or by pre-computing a table since with k=26,
  1947  		// we would multiply by pSq up to 13times;
  1948  		// on x86, the assembly routine outperforms generic code for mul by word
  1949  		// on arm64, we may loose up to ~5% for 6 limbs
  1950  		v.Mul(&v, &a)
  1951  	}
  1952  
  1953  	u.Set(x) // for correctness check
  1954  
  1955  	z.Mul(&v, &Element{
  1956  		inversionCorrectionFactorWord0,
  1957  		inversionCorrectionFactorWord1,
  1958  		inversionCorrectionFactorWord2,
  1959  		inversionCorrectionFactorWord3,
  1960  		inversionCorrectionFactorWord4,
  1961  		inversionCorrectionFactorWord5,
  1962  		inversionCorrectionFactorWord6,
  1963  		inversionCorrectionFactorWord7,
  1964  		inversionCorrectionFactorWord8,
  1965  		inversionCorrectionFactorWord9,
  1966  	})
  1967  
  1968  	// correctness check
  1969  	v.Mul(&u, z)
  1970  	if !v.IsOne() && !u.IsZero() {
  1971  		return z.inverseExp(u)
  1972  	}
  1973  
  1974  	return z
  1975  }
  1976  
  1977  // inverseExp computes z = x⁻¹ (mod q) = x**(q-2) (mod q)
  1978  func (z *Element) inverseExp(x Element) *Element {
  1979  	// e == q-2
  1980  	e := Modulus()
  1981  	e.Sub(e, big.NewInt(2))
  1982  
  1983  	z.Set(&x)
  1984  
  1985  	for i := e.BitLen() - 2; i >= 0; i-- {
  1986  		z.Square(z)
  1987  		if e.Bit(i) == 1 {
  1988  			z.Mul(z, &x)
  1989  		}
  1990  	}
  1991  
  1992  	return z
  1993  }
  1994  
  1995  // approximate a big number x into a single 64 bit word using its uppermost and lowermost bits
  1996  // if x fits in a word as is, no approximation necessary
  1997  func approximate(x *Element, nBits int) uint64 {
  1998  
  1999  	if nBits <= 64 {
  2000  		return x[0]
  2001  	}
  2002  
  2003  	const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones
  2004  	lo := mask & x[0]
  2005  
  2006  	hiWordIndex := (nBits - 1) / 64
  2007  
  2008  	hiWordBitsAvailable := nBits - hiWordIndex*64
  2009  	hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN)
  2010  
  2011  	mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1))
  2012  	hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable)
  2013  
  2014  	mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1)
  2015  	mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed
  2016  
  2017  	return lo | mid | hi
  2018  }
  2019  
  2020  // linearComb z = xC * x + yC * y;
  2021  // 0 ≤ x, y < 2⁶³³
  2022  // |xC|, |yC| < 2⁶³
  2023  func (z *Element) linearComb(x *Element, xC int64, y *Element, yC int64) {
  2024  	// | (hi, z) | < 2 * 2⁶³ * 2⁶³³ = 2⁶⁹⁷
  2025  	// therefore | hi | < 2⁵⁷ ≤ 2⁶³
  2026  	hi := z.linearCombNonModular(x, xC, y, yC)
  2027  	z.montReduceSigned(z, hi)
  2028  }
  2029  
  2030  // montReduceSigned z = (xHi * r + x) * r⁻¹ using the SOS algorithm
  2031  // Requires |xHi| < 2⁶³. Most significant bit of xHi is the sign bit.
  2032  func (z *Element) montReduceSigned(x *Element, xHi uint64) {
  2033  	const signBitRemover = ^signBitSelector
  2034  	mustNeg := xHi&signBitSelector != 0
  2035  	// the SOS implementation requires that most significant bit is 0
  2036  	// Let X be xHi*r + x
  2037  	// If X is negative we would have initially stored it as 2⁶⁴ r + X (à la 2's complement)
  2038  	xHi &= signBitRemover
  2039  	// with this a negative X is now represented as 2⁶³ r + X
  2040  
  2041  	var t [2*Limbs - 1]uint64
  2042  	var C uint64
  2043  
  2044  	m := x[0] * qInvNeg
  2045  
  2046  	C = madd0(m, q0, x[0])
  2047  	C, t[1] = madd2(m, q1, x[1], C)
  2048  	C, t[2] = madd2(m, q2, x[2], C)
  2049  	C, t[3] = madd2(m, q3, x[3], C)
  2050  	C, t[4] = madd2(m, q4, x[4], C)
  2051  	C, t[5] = madd2(m, q5, x[5], C)
  2052  	C, t[6] = madd2(m, q6, x[6], C)
  2053  	C, t[7] = madd2(m, q7, x[7], C)
  2054  	C, t[8] = madd2(m, q8, x[8], C)
  2055  	C, t[9] = madd2(m, q9, x[9], C)
  2056  
  2057  	// m * qElement[9] ≤ (2⁶⁴ - 1) * (2⁶³ - 1) = 2¹²⁷ - 2⁶⁴ - 2⁶³ + 1
  2058  	// x[9] + C ≤ 2*(2⁶⁴ - 1) = 2⁶⁵ - 2
  2059  	// On LHS, (C, t[9]) ≤ 2¹²⁷ - 2⁶⁴ - 2⁶³ + 1 + 2⁶⁵ - 2 = 2¹²⁷ + 2⁶³ - 1
  2060  	// So on LHS, C ≤ 2⁶³
  2061  	t[10] = xHi + C
  2062  	// xHi + C < 2⁶³ + 2⁶³ = 2⁶⁴
  2063  
  2064  	// <standard SOS>
  2065  	{
  2066  		const i = 1
  2067  		m = t[i] * qInvNeg
  2068  
  2069  		C = madd0(m, q0, t[i+0])
  2070  		C, t[i+1] = madd2(m, q1, t[i+1], C)
  2071  		C, t[i+2] = madd2(m, q2, t[i+2], C)
  2072  		C, t[i+3] = madd2(m, q3, t[i+3], C)
  2073  		C, t[i+4] = madd2(m, q4, t[i+4], C)
  2074  		C, t[i+5] = madd2(m, q5, t[i+5], C)
  2075  		C, t[i+6] = madd2(m, q6, t[i+6], C)
  2076  		C, t[i+7] = madd2(m, q7, t[i+7], C)
  2077  		C, t[i+8] = madd2(m, q8, t[i+8], C)
  2078  		C, t[i+9] = madd2(m, q9, t[i+9], C)
  2079  
  2080  		t[i+Limbs] += C
  2081  	}
  2082  	{
  2083  		const i = 2
  2084  		m = t[i] * qInvNeg
  2085  
  2086  		C = madd0(m, q0, t[i+0])
  2087  		C, t[i+1] = madd2(m, q1, t[i+1], C)
  2088  		C, t[i+2] = madd2(m, q2, t[i+2], C)
  2089  		C, t[i+3] = madd2(m, q3, t[i+3], C)
  2090  		C, t[i+4] = madd2(m, q4, t[i+4], C)
  2091  		C, t[i+5] = madd2(m, q5, t[i+5], C)
  2092  		C, t[i+6] = madd2(m, q6, t[i+6], C)
  2093  		C, t[i+7] = madd2(m, q7, t[i+7], C)
  2094  		C, t[i+8] = madd2(m, q8, t[i+8], C)
  2095  		C, t[i+9] = madd2(m, q9, t[i+9], C)
  2096  
  2097  		t[i+Limbs] += C
  2098  	}
  2099  	{
  2100  		const i = 3
  2101  		m = t[i] * qInvNeg
  2102  
  2103  		C = madd0(m, q0, t[i+0])
  2104  		C, t[i+1] = madd2(m, q1, t[i+1], C)
  2105  		C, t[i+2] = madd2(m, q2, t[i+2], C)
  2106  		C, t[i+3] = madd2(m, q3, t[i+3], C)
  2107  		C, t[i+4] = madd2(m, q4, t[i+4], C)
  2108  		C, t[i+5] = madd2(m, q5, t[i+5], C)
  2109  		C, t[i+6] = madd2(m, q6, t[i+6], C)
  2110  		C, t[i+7] = madd2(m, q7, t[i+7], C)
  2111  		C, t[i+8] = madd2(m, q8, t[i+8], C)
  2112  		C, t[i+9] = madd2(m, q9, t[i+9], C)
  2113  
  2114  		t[i+Limbs] += C
  2115  	}
  2116  	{
  2117  		const i = 4
  2118  		m = t[i] * qInvNeg
  2119  
  2120  		C = madd0(m, q0, t[i+0])
  2121  		C, t[i+1] = madd2(m, q1, t[i+1], C)
  2122  		C, t[i+2] = madd2(m, q2, t[i+2], C)
  2123  		C, t[i+3] = madd2(m, q3, t[i+3], C)
  2124  		C, t[i+4] = madd2(m, q4, t[i+4], C)
  2125  		C, t[i+5] = madd2(m, q5, t[i+5], C)
  2126  		C, t[i+6] = madd2(m, q6, t[i+6], C)
  2127  		C, t[i+7] = madd2(m, q7, t[i+7], C)
  2128  		C, t[i+8] = madd2(m, q8, t[i+8], C)
  2129  		C, t[i+9] = madd2(m, q9, t[i+9], C)
  2130  
  2131  		t[i+Limbs] += C
  2132  	}
  2133  	{
  2134  		const i = 5
  2135  		m = t[i] * qInvNeg
  2136  
  2137  		C = madd0(m, q0, t[i+0])
  2138  		C, t[i+1] = madd2(m, q1, t[i+1], C)
  2139  		C, t[i+2] = madd2(m, q2, t[i+2], C)
  2140  		C, t[i+3] = madd2(m, q3, t[i+3], C)
  2141  		C, t[i+4] = madd2(m, q4, t[i+4], C)
  2142  		C, t[i+5] = madd2(m, q5, t[i+5], C)
  2143  		C, t[i+6] = madd2(m, q6, t[i+6], C)
  2144  		C, t[i+7] = madd2(m, q7, t[i+7], C)
  2145  		C, t[i+8] = madd2(m, q8, t[i+8], C)
  2146  		C, t[i+9] = madd2(m, q9, t[i+9], C)
  2147  
  2148  		t[i+Limbs] += C
  2149  	}
  2150  	{
  2151  		const i = 6
  2152  		m = t[i] * qInvNeg
  2153  
  2154  		C = madd0(m, q0, t[i+0])
  2155  		C, t[i+1] = madd2(m, q1, t[i+1], C)
  2156  		C, t[i+2] = madd2(m, q2, t[i+2], C)
  2157  		C, t[i+3] = madd2(m, q3, t[i+3], C)
  2158  		C, t[i+4] = madd2(m, q4, t[i+4], C)
  2159  		C, t[i+5] = madd2(m, q5, t[i+5], C)
  2160  		C, t[i+6] = madd2(m, q6, t[i+6], C)
  2161  		C, t[i+7] = madd2(m, q7, t[i+7], C)
  2162  		C, t[i+8] = madd2(m, q8, t[i+8], C)
  2163  		C, t[i+9] = madd2(m, q9, t[i+9], C)
  2164  
  2165  		t[i+Limbs] += C
  2166  	}
  2167  	{
  2168  		const i = 7
  2169  		m = t[i] * qInvNeg
  2170  
  2171  		C = madd0(m, q0, t[i+0])
  2172  		C, t[i+1] = madd2(m, q1, t[i+1], C)
  2173  		C, t[i+2] = madd2(m, q2, t[i+2], C)
  2174  		C, t[i+3] = madd2(m, q3, t[i+3], C)
  2175  		C, t[i+4] = madd2(m, q4, t[i+4], C)
  2176  		C, t[i+5] = madd2(m, q5, t[i+5], C)
  2177  		C, t[i+6] = madd2(m, q6, t[i+6], C)
  2178  		C, t[i+7] = madd2(m, q7, t[i+7], C)
  2179  		C, t[i+8] = madd2(m, q8, t[i+8], C)
  2180  		C, t[i+9] = madd2(m, q9, t[i+9], C)
  2181  
  2182  		t[i+Limbs] += C
  2183  	}
  2184  	{
  2185  		const i = 8
  2186  		m = t[i] * qInvNeg
  2187  
  2188  		C = madd0(m, q0, t[i+0])
  2189  		C, t[i+1] = madd2(m, q1, t[i+1], C)
  2190  		C, t[i+2] = madd2(m, q2, t[i+2], C)
  2191  		C, t[i+3] = madd2(m, q3, t[i+3], C)
  2192  		C, t[i+4] = madd2(m, q4, t[i+4], C)
  2193  		C, t[i+5] = madd2(m, q5, t[i+5], C)
  2194  		C, t[i+6] = madd2(m, q6, t[i+6], C)
  2195  		C, t[i+7] = madd2(m, q7, t[i+7], C)
  2196  		C, t[i+8] = madd2(m, q8, t[i+8], C)
  2197  		C, t[i+9] = madd2(m, q9, t[i+9], C)
  2198  
  2199  		t[i+Limbs] += C
  2200  	}
  2201  	{
  2202  		const i = 9
  2203  		m := t[i] * qInvNeg
  2204  
  2205  		C = madd0(m, q0, t[i+0])
  2206  		C, z[0] = madd2(m, q1, t[i+1], C)
  2207  		C, z[1] = madd2(m, q2, t[i+2], C)
  2208  		C, z[2] = madd2(m, q3, t[i+3], C)
  2209  		C, z[3] = madd2(m, q4, t[i+4], C)
  2210  		C, z[4] = madd2(m, q5, t[i+5], C)
  2211  		C, z[5] = madd2(m, q6, t[i+6], C)
  2212  		C, z[6] = madd2(m, q7, t[i+7], C)
  2213  		C, z[7] = madd2(m, q8, t[i+8], C)
  2214  		z[9], z[8] = madd2(m, q9, t[i+9], C)
  2215  	}
  2216  
  2217  	// if z ⩾ q → z -= q
  2218  	if !z.smallerThanModulus() {
  2219  		var b uint64
  2220  		z[0], b = bits.Sub64(z[0], q0, 0)
  2221  		z[1], b = bits.Sub64(z[1], q1, b)
  2222  		z[2], b = bits.Sub64(z[2], q2, b)
  2223  		z[3], b = bits.Sub64(z[3], q3, b)
  2224  		z[4], b = bits.Sub64(z[4], q4, b)
  2225  		z[5], b = bits.Sub64(z[5], q5, b)
  2226  		z[6], b = bits.Sub64(z[6], q6, b)
  2227  		z[7], b = bits.Sub64(z[7], q7, b)
  2228  		z[8], b = bits.Sub64(z[8], q8, b)
  2229  		z[9], _ = bits.Sub64(z[9], q9, b)
  2230  	}
  2231  	// </standard SOS>
  2232  
  2233  	if mustNeg {
  2234  		// We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead
  2235  		var b uint64
  2236  		z[0], b = bits.Sub64(z[0], signBitSelector, 0)
  2237  		z[1], b = bits.Sub64(z[1], 0, b)
  2238  		z[2], b = bits.Sub64(z[2], 0, b)
  2239  		z[3], b = bits.Sub64(z[3], 0, b)
  2240  		z[4], b = bits.Sub64(z[4], 0, b)
  2241  		z[5], b = bits.Sub64(z[5], 0, b)
  2242  		z[6], b = bits.Sub64(z[6], 0, b)
  2243  		z[7], b = bits.Sub64(z[7], 0, b)
  2244  		z[8], b = bits.Sub64(z[8], 0, b)
  2245  		z[9], b = bits.Sub64(z[9], 0, b)
  2246  
  2247  		// Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0
  2248  
  2249  		if b != 0 {
  2250  			// z[9] = -1
  2251  			// negative: add q
  2252  			const neg1 = 0xFFFFFFFFFFFFFFFF
  2253  
  2254  			var carry uint64
  2255  
  2256  			z[0], carry = bits.Add64(z[0], q0, 0)
  2257  			z[1], carry = bits.Add64(z[1], q1, carry)
  2258  			z[2], carry = bits.Add64(z[2], q2, carry)
  2259  			z[3], carry = bits.Add64(z[3], q3, carry)
  2260  			z[4], carry = bits.Add64(z[4], q4, carry)
  2261  			z[5], carry = bits.Add64(z[5], q5, carry)
  2262  			z[6], carry = bits.Add64(z[6], q6, carry)
  2263  			z[7], carry = bits.Add64(z[7], q7, carry)
  2264  			z[8], carry = bits.Add64(z[8], q8, carry)
  2265  			z[9], _ = bits.Add64(neg1, q9, carry)
  2266  		}
  2267  	}
  2268  }
  2269  
  2270  const (
  2271  	updateFactorsConversionBias    int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1)
  2272  	updateFactorIdentityMatrixRow0       = 1
  2273  	updateFactorIdentityMatrixRow1       = 1 << 32
  2274  )
  2275  
  2276  func updateFactorsDecompose(c int64) (int64, int64) {
  2277  	c += updateFactorsConversionBias
  2278  	const low32BitsFilter int64 = 0xFFFFFFFF
  2279  	f := c&low32BitsFilter - 0x7FFFFFFF
  2280  	g := c>>32&low32BitsFilter - 0x7FFFFFFF
  2281  	return f, g
  2282  }
  2283  
  2284  // negL negates in place [x | xHi] and return the new most significant word xHi
  2285  func negL(x *Element, xHi uint64) uint64 {
  2286  	var b uint64
  2287  
  2288  	x[0], b = bits.Sub64(0, x[0], 0)
  2289  	x[1], b = bits.Sub64(0, x[1], b)
  2290  	x[2], b = bits.Sub64(0, x[2], b)
  2291  	x[3], b = bits.Sub64(0, x[3], b)
  2292  	x[4], b = bits.Sub64(0, x[4], b)
  2293  	x[5], b = bits.Sub64(0, x[5], b)
  2294  	x[6], b = bits.Sub64(0, x[6], b)
  2295  	x[7], b = bits.Sub64(0, x[7], b)
  2296  	x[8], b = bits.Sub64(0, x[8], b)
  2297  	x[9], b = bits.Sub64(0, x[9], b)
  2298  	xHi, _ = bits.Sub64(0, xHi, b)
  2299  
  2300  	return xHi
  2301  }
  2302  
  2303  // mulWNonModular multiplies by one word in non-montgomery, without reducing
  2304  func (z *Element) mulWNonModular(x *Element, y int64) uint64 {
  2305  
  2306  	// w := abs(y)
  2307  	m := y >> 63
  2308  	w := uint64((y ^ m) - m)
  2309  
  2310  	var c uint64
  2311  	c, z[0] = bits.Mul64(x[0], w)
  2312  	c, z[1] = madd1(x[1], w, c)
  2313  	c, z[2] = madd1(x[2], w, c)
  2314  	c, z[3] = madd1(x[3], w, c)
  2315  	c, z[4] = madd1(x[4], w, c)
  2316  	c, z[5] = madd1(x[5], w, c)
  2317  	c, z[6] = madd1(x[6], w, c)
  2318  	c, z[7] = madd1(x[7], w, c)
  2319  	c, z[8] = madd1(x[8], w, c)
  2320  	c, z[9] = madd1(x[9], w, c)
  2321  
  2322  	if y < 0 {
  2323  		c = negL(z, c)
  2324  	}
  2325  
  2326  	return c
  2327  }
  2328  
  2329  // linearCombNonModular computes a linear combination without modular reduction
  2330  func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 {
  2331  	var yTimes Element
  2332  
  2333  	yHi := yTimes.mulWNonModular(y, yC)
  2334  	xHi := z.mulWNonModular(x, xC)
  2335  
  2336  	var carry uint64
  2337  	z[0], carry = bits.Add64(z[0], yTimes[0], 0)
  2338  	z[1], carry = bits.Add64(z[1], yTimes[1], carry)
  2339  	z[2], carry = bits.Add64(z[2], yTimes[2], carry)
  2340  	z[3], carry = bits.Add64(z[3], yTimes[3], carry)
  2341  	z[4], carry = bits.Add64(z[4], yTimes[4], carry)
  2342  	z[5], carry = bits.Add64(z[5], yTimes[5], carry)
  2343  	z[6], carry = bits.Add64(z[6], yTimes[6], carry)
  2344  	z[7], carry = bits.Add64(z[7], yTimes[7], carry)
  2345  	z[8], carry = bits.Add64(z[8], yTimes[8], carry)
  2346  	z[9], carry = bits.Add64(z[9], yTimes[9], carry)
  2347  
  2348  	yHi, _ = bits.Add64(xHi, yHi, carry)
  2349  
  2350  	return yHi
  2351  }