github.com/consensys/gnark-crypto@v0.14.0/ecc/bls12-377/fr/element.go (about)

     1  // Copyright 2020 ConsenSys Software Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Code generated by consensys/gnark-crypto DO NOT EDIT
    16  
    17  package fr
    18  
    19  import (
    20  	"crypto/rand"
    21  	"encoding/binary"
    22  	"errors"
    23  	"io"
    24  	"math/big"
    25  	"math/bits"
    26  	"reflect"
    27  	"strconv"
    28  	"strings"
    29  
    30  	"github.com/bits-and-blooms/bitset"
    31  	"github.com/consensys/gnark-crypto/field/hash"
    32  	"github.com/consensys/gnark-crypto/field/pool"
    33  )
    34  
    35  // Element represents a field element stored on 4 words (uint64)
    36  //
    37  // Element are assumed to be in Montgomery form in all methods.
    38  //
    39  // Modulus q =
    40  //
    41  //	q[base10] = 8444461749428370424248824938781546531375899335154063827935233455917409239041
    42  //	q[base16] = 0x12ab655e9a2ca55660b44d1e5c37b00159aa76fed00000010a11800000000001
    43  //
    44  // # Warning
    45  //
    46  // This code has not been audited and is provided as-is. In particular, there is no security guarantees such as constant time implementation or side-channel attack resistance.
    47  type Element [4]uint64
    48  
    49  const (
    50  	Limbs = 4   // number of 64 bits words needed to represent a Element
    51  	Bits  = 253 // number of bits needed to represent a Element
    52  	Bytes = 32  // number of bytes needed to represent a Element
    53  )
    54  
    55  // Field modulus q
    56  const (
    57  	q0 uint64 = 725501752471715841
    58  	q1 uint64 = 6461107452199829505
    59  	q2 uint64 = 6968279316240510977
    60  	q3 uint64 = 1345280370688173398
    61  )
    62  
    63  var qElement = Element{
    64  	q0,
    65  	q1,
    66  	q2,
    67  	q3,
    68  }
    69  
    70  var _modulus big.Int // q stored as big.Int
    71  
    72  // Modulus returns q as a big.Int
    73  //
    74  //	q[base10] = 8444461749428370424248824938781546531375899335154063827935233455917409239041
    75  //	q[base16] = 0x12ab655e9a2ca55660b44d1e5c37b00159aa76fed00000010a11800000000001
    76  func Modulus() *big.Int {
    77  	return new(big.Int).Set(&_modulus)
    78  }
    79  
    80  // q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r
    81  // used for Montgomery reduction
    82  const qInvNeg uint64 = 725501752471715839
    83  
    84  func init() {
    85  	_modulus.SetString("12ab655e9a2ca55660b44d1e5c37b00159aa76fed00000010a11800000000001", 16)
    86  }
    87  
    88  // NewElement returns a new Element from a uint64 value
    89  //
    90  // it is equivalent to
    91  //
    92  //	var v Element
    93  //	v.SetUint64(...)
    94  func NewElement(v uint64) Element {
    95  	z := Element{v}
    96  	z.Mul(&z, &rSquare)
    97  	return z
    98  }
    99  
   100  // SetUint64 sets z to v and returns z
   101  func (z *Element) SetUint64(v uint64) *Element {
   102  	//  sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
   103  	*z = Element{v}
   104  	return z.Mul(z, &rSquare) // z.toMont()
   105  }
   106  
   107  // SetInt64 sets z to v and returns z
   108  func (z *Element) SetInt64(v int64) *Element {
   109  
   110  	// absolute value of v
   111  	m := v >> 63
   112  	z.SetUint64(uint64((v ^ m) - m))
   113  
   114  	if m != 0 {
   115  		// v is negative
   116  		z.Neg(z)
   117  	}
   118  
   119  	return z
   120  }
   121  
   122  // Set z = x and returns z
   123  func (z *Element) Set(x *Element) *Element {
   124  	z[0] = x[0]
   125  	z[1] = x[1]
   126  	z[2] = x[2]
   127  	z[3] = x[3]
   128  	return z
   129  }
   130  
   131  // SetInterface converts provided interface into Element
   132  // returns an error if provided type is not supported
   133  // supported types:
   134  //
   135  //	Element
   136  //	*Element
   137  //	uint64
   138  //	int
   139  //	string (see SetString for valid formats)
   140  //	*big.Int
   141  //	big.Int
   142  //	[]byte
   143  func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
   144  	if i1 == nil {
   145  		return nil, errors.New("can't set fr.Element with <nil>")
   146  	}
   147  
   148  	switch c1 := i1.(type) {
   149  	case Element:
   150  		return z.Set(&c1), nil
   151  	case *Element:
   152  		if c1 == nil {
   153  			return nil, errors.New("can't set fr.Element with <nil>")
   154  		}
   155  		return z.Set(c1), nil
   156  	case uint8:
   157  		return z.SetUint64(uint64(c1)), nil
   158  	case uint16:
   159  		return z.SetUint64(uint64(c1)), nil
   160  	case uint32:
   161  		return z.SetUint64(uint64(c1)), nil
   162  	case uint:
   163  		return z.SetUint64(uint64(c1)), nil
   164  	case uint64:
   165  		return z.SetUint64(c1), nil
   166  	case int8:
   167  		return z.SetInt64(int64(c1)), nil
   168  	case int16:
   169  		return z.SetInt64(int64(c1)), nil
   170  	case int32:
   171  		return z.SetInt64(int64(c1)), nil
   172  	case int64:
   173  		return z.SetInt64(c1), nil
   174  	case int:
   175  		return z.SetInt64(int64(c1)), nil
   176  	case string:
   177  		return z.SetString(c1)
   178  	case *big.Int:
   179  		if c1 == nil {
   180  			return nil, errors.New("can't set fr.Element with <nil>")
   181  		}
   182  		return z.SetBigInt(c1), nil
   183  	case big.Int:
   184  		return z.SetBigInt(&c1), nil
   185  	case []byte:
   186  		return z.SetBytes(c1), nil
   187  	default:
   188  		return nil, errors.New("can't set fr.Element from type " + reflect.TypeOf(i1).String())
   189  	}
   190  }
   191  
   192  // SetZero z = 0
   193  func (z *Element) SetZero() *Element {
   194  	z[0] = 0
   195  	z[1] = 0
   196  	z[2] = 0
   197  	z[3] = 0
   198  	return z
   199  }
   200  
   201  // SetOne z = 1 (in Montgomery form)
   202  func (z *Element) SetOne() *Element {
   203  	z[0] = 9015221291577245683
   204  	z[1] = 8239323489949974514
   205  	z[2] = 1646089257421115374
   206  	z[3] = 958099254763297437
   207  	return z
   208  }
   209  
   210  // Div z = x*y⁻¹ (mod q)
   211  func (z *Element) Div(x, y *Element) *Element {
   212  	var yInv Element
   213  	yInv.Inverse(y)
   214  	z.Mul(x, &yInv)
   215  	return z
   216  }
   217  
   218  // Equal returns z == x; constant-time
   219  func (z *Element) Equal(x *Element) bool {
   220  	return z.NotEqual(x) == 0
   221  }
   222  
   223  // NotEqual returns 0 if and only if z == x; constant-time
   224  func (z *Element) NotEqual(x *Element) uint64 {
   225  	return (z[3] ^ x[3]) | (z[2] ^ x[2]) | (z[1] ^ x[1]) | (z[0] ^ x[0])
   226  }
   227  
   228  // IsZero returns z == 0
   229  func (z *Element) IsZero() bool {
   230  	return (z[3] | z[2] | z[1] | z[0]) == 0
   231  }
   232  
   233  // IsOne returns z == 1
   234  func (z *Element) IsOne() bool {
   235  	return ((z[3] ^ 958099254763297437) | (z[2] ^ 1646089257421115374) | (z[1] ^ 8239323489949974514) | (z[0] ^ 9015221291577245683)) == 0
   236  }
   237  
   238  // IsUint64 reports whether z can be represented as an uint64.
   239  func (z *Element) IsUint64() bool {
   240  	zz := *z
   241  	zz.fromMont()
   242  	return zz.FitsOnOneWord()
   243  }
   244  
   245  // Uint64 returns the uint64 representation of x. If x cannot be represented in a uint64, the result is undefined.
   246  func (z *Element) Uint64() uint64 {
   247  	return z.Bits()[0]
   248  }
   249  
   250  // FitsOnOneWord reports whether z words (except the least significant word) are 0
   251  //
   252  // It is the responsibility of the caller to convert from Montgomery to Regular form if needed.
   253  func (z *Element) FitsOnOneWord() bool {
   254  	return (z[3] | z[2] | z[1]) == 0
   255  }
   256  
   257  // Cmp compares (lexicographic order) z and x and returns:
   258  //
   259  //	-1 if z <  x
   260  //	 0 if z == x
   261  //	+1 if z >  x
   262  func (z *Element) Cmp(x *Element) int {
   263  	_z := z.Bits()
   264  	_x := x.Bits()
   265  	if _z[3] > _x[3] {
   266  		return 1
   267  	} else if _z[3] < _x[3] {
   268  		return -1
   269  	}
   270  	if _z[2] > _x[2] {
   271  		return 1
   272  	} else if _z[2] < _x[2] {
   273  		return -1
   274  	}
   275  	if _z[1] > _x[1] {
   276  		return 1
   277  	} else if _z[1] < _x[1] {
   278  		return -1
   279  	}
   280  	if _z[0] > _x[0] {
   281  		return 1
   282  	} else if _z[0] < _x[0] {
   283  		return -1
   284  	}
   285  	return 0
   286  }
   287  
   288  // LexicographicallyLargest returns true if this element is strictly lexicographically
   289  // larger than its negation, false otherwise
   290  func (z *Element) LexicographicallyLargest() bool {
   291  	// adapted from github.com/zkcrypto/bls12_381
   292  	// we check if the element is larger than (q-1) / 2
   293  	// if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2
   294  
   295  	_z := z.Bits()
   296  
   297  	var b uint64
   298  	_, b = bits.Sub64(_z[0], 9586122913090633729, 0)
   299  	_, b = bits.Sub64(_z[1], 12453925762954690560, b)
   300  	_, b = bits.Sub64(_z[2], 3484139658120255488, b)
   301  	_, b = bits.Sub64(_z[3], 672640185344086699, b)
   302  
   303  	return b == 0
   304  }
   305  
   306  // SetRandom sets z to a uniform random value in [0, q).
   307  //
   308  // This might error only if reading from crypto/rand.Reader errors,
   309  // in which case, value of z is undefined.
   310  func (z *Element) SetRandom() (*Element, error) {
   311  	// this code is generated for all modulus
   312  	// and derived from go/src/crypto/rand/util.go
   313  
   314  	// l is number of limbs * 8; the number of bytes needed to reconstruct 4 uint64
   315  	const l = 32
   316  
   317  	// bitLen is the maximum bit length needed to encode a value < q.
   318  	const bitLen = 253
   319  
   320  	// k is the maximum byte length needed to encode a value < q.
   321  	const k = (bitLen + 7) / 8
   322  
   323  	// b is the number of bits in the most significant byte of q-1.
   324  	b := uint(bitLen % 8)
   325  	if b == 0 {
   326  		b = 8
   327  	}
   328  
   329  	var bytes [l]byte
   330  
   331  	for {
   332  		// note that bytes[k:l] is always 0
   333  		if _, err := io.ReadFull(rand.Reader, bytes[:k]); err != nil {
   334  			return nil, err
   335  		}
   336  
   337  		// Clear unused bits in in the most significant byte to increase probability
   338  		// that the candidate is < q.
   339  		bytes[k-1] &= uint8(int(1<<b) - 1)
   340  		z[0] = binary.LittleEndian.Uint64(bytes[0:8])
   341  		z[1] = binary.LittleEndian.Uint64(bytes[8:16])
   342  		z[2] = binary.LittleEndian.Uint64(bytes[16:24])
   343  		z[3] = binary.LittleEndian.Uint64(bytes[24:32])
   344  
   345  		if !z.smallerThanModulus() {
   346  			continue // ignore the candidate and re-sample
   347  		}
   348  
   349  		return z, nil
   350  	}
   351  }
   352  
   353  // smallerThanModulus returns true if z < q
   354  // This is not constant time
   355  func (z *Element) smallerThanModulus() bool {
   356  	return (z[3] < q3 || (z[3] == q3 && (z[2] < q2 || (z[2] == q2 && (z[1] < q1 || (z[1] == q1 && (z[0] < q0)))))))
   357  }
   358  
   359  // One returns 1
   360  func One() Element {
   361  	var one Element
   362  	one.SetOne()
   363  	return one
   364  }
   365  
   366  // Halve sets z to z / 2 (mod q)
   367  func (z *Element) Halve() {
   368  	var carry uint64
   369  
   370  	if z[0]&1 == 1 {
   371  		// z = z + q
   372  		z[0], carry = bits.Add64(z[0], q0, 0)
   373  		z[1], carry = bits.Add64(z[1], q1, carry)
   374  		z[2], carry = bits.Add64(z[2], q2, carry)
   375  		z[3], _ = bits.Add64(z[3], q3, carry)
   376  
   377  	}
   378  	// z = z >> 1
   379  	z[0] = z[0]>>1 | z[1]<<63
   380  	z[1] = z[1]>>1 | z[2]<<63
   381  	z[2] = z[2]>>1 | z[3]<<63
   382  	z[3] >>= 1
   383  
   384  }
   385  
   386  // fromMont converts z in place (i.e. mutates) from Montgomery to regular representation
   387  // sets and returns z = z * 1
   388  func (z *Element) fromMont() *Element {
   389  	fromMont(z)
   390  	return z
   391  }
   392  
   393  // Add z = x + y (mod q)
   394  func (z *Element) Add(x, y *Element) *Element {
   395  
   396  	var carry uint64
   397  	z[0], carry = bits.Add64(x[0], y[0], 0)
   398  	z[1], carry = bits.Add64(x[1], y[1], carry)
   399  	z[2], carry = bits.Add64(x[2], y[2], carry)
   400  	z[3], _ = bits.Add64(x[3], y[3], carry)
   401  
   402  	// if z ⩾ q → z -= q
   403  	if !z.smallerThanModulus() {
   404  		var b uint64
   405  		z[0], b = bits.Sub64(z[0], q0, 0)
   406  		z[1], b = bits.Sub64(z[1], q1, b)
   407  		z[2], b = bits.Sub64(z[2], q2, b)
   408  		z[3], _ = bits.Sub64(z[3], q3, b)
   409  	}
   410  	return z
   411  }
   412  
   413  // Double z = x + x (mod q), aka Lsh 1
   414  func (z *Element) Double(x *Element) *Element {
   415  
   416  	var carry uint64
   417  	z[0], carry = bits.Add64(x[0], x[0], 0)
   418  	z[1], carry = bits.Add64(x[1], x[1], carry)
   419  	z[2], carry = bits.Add64(x[2], x[2], carry)
   420  	z[3], _ = bits.Add64(x[3], x[3], carry)
   421  
   422  	// if z ⩾ q → z -= q
   423  	if !z.smallerThanModulus() {
   424  		var b uint64
   425  		z[0], b = bits.Sub64(z[0], q0, 0)
   426  		z[1], b = bits.Sub64(z[1], q1, b)
   427  		z[2], b = bits.Sub64(z[2], q2, b)
   428  		z[3], _ = bits.Sub64(z[3], q3, b)
   429  	}
   430  	return z
   431  }
   432  
   433  // Sub z = x - y (mod q)
   434  func (z *Element) Sub(x, y *Element) *Element {
   435  	var b uint64
   436  	z[0], b = bits.Sub64(x[0], y[0], 0)
   437  	z[1], b = bits.Sub64(x[1], y[1], b)
   438  	z[2], b = bits.Sub64(x[2], y[2], b)
   439  	z[3], b = bits.Sub64(x[3], y[3], b)
   440  	if b != 0 {
   441  		var c uint64
   442  		z[0], c = bits.Add64(z[0], q0, 0)
   443  		z[1], c = bits.Add64(z[1], q1, c)
   444  		z[2], c = bits.Add64(z[2], q2, c)
   445  		z[3], _ = bits.Add64(z[3], q3, c)
   446  	}
   447  	return z
   448  }
   449  
   450  // Neg z = q - x
   451  func (z *Element) Neg(x *Element) *Element {
   452  	if x.IsZero() {
   453  		z.SetZero()
   454  		return z
   455  	}
   456  	var borrow uint64
   457  	z[0], borrow = bits.Sub64(q0, x[0], 0)
   458  	z[1], borrow = bits.Sub64(q1, x[1], borrow)
   459  	z[2], borrow = bits.Sub64(q2, x[2], borrow)
   460  	z[3], _ = bits.Sub64(q3, x[3], borrow)
   461  	return z
   462  }
   463  
   464  // Select is a constant-time conditional move.
   465  // If c=0, z = x0. Else z = x1
   466  func (z *Element) Select(c int, x0 *Element, x1 *Element) *Element {
   467  	cC := uint64((int64(c) | -int64(c)) >> 63) // "canonicized" into: 0 if c=0, -1 otherwise
   468  	z[0] = x0[0] ^ cC&(x0[0]^x1[0])
   469  	z[1] = x0[1] ^ cC&(x0[1]^x1[1])
   470  	z[2] = x0[2] ^ cC&(x0[2]^x1[2])
   471  	z[3] = x0[3] ^ cC&(x0[3]^x1[3])
   472  	return z
   473  }
   474  
   475  // _mulGeneric is unoptimized textbook CIOS
   476  // it is a fallback solution on x86 when ADX instruction set is not available
   477  // and is used for testing purposes.
   478  func _mulGeneric(z, x, y *Element) {
   479  
   480  	// Implements CIOS multiplication -- section 2.3.2 of Tolga Acar's thesis
   481  	// https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf
   482  	//
   483  	// The algorithm:
   484  	//
   485  	// for i=0 to N-1
   486  	// 		C := 0
   487  	// 		for j=0 to N-1
   488  	// 			(C,t[j]) := t[j] + x[j]*y[i] + C
   489  	// 		(t[N+1],t[N]) := t[N] + C
   490  	//
   491  	// 		C := 0
   492  	// 		m := t[0]*q'[0] mod D
   493  	// 		(C,_) := t[0] + m*q[0]
   494  	// 		for j=1 to N-1
   495  	// 			(C,t[j-1]) := t[j] + m*q[j] + C
   496  	//
   497  	// 		(C,t[N-1]) := t[N] + C
   498  	// 		t[N] := t[N+1] + C
   499  	//
   500  	// → N is the number of machine words needed to store the modulus q
   501  	// → D is the word size. For example, on a 64-bit architecture D is 2	64
   502  	// → x[i], y[i], q[i] is the ith word of the numbers x,y,q
   503  	// → q'[0] is the lowest word of the number -q⁻¹ mod r. This quantity is pre-computed, as it does not depend on the inputs.
   504  	// → t is a temporary array of size N+2
   505  	// → C, S are machine words. A pair (C,S) refers to (hi-bits, lo-bits) of a two-word number
   506  
   507  	var t [5]uint64
   508  	var D uint64
   509  	var m, C uint64
   510  	// -----------------------------------
   511  	// First loop
   512  
   513  	C, t[0] = bits.Mul64(y[0], x[0])
   514  	C, t[1] = madd1(y[0], x[1], C)
   515  	C, t[2] = madd1(y[0], x[2], C)
   516  	C, t[3] = madd1(y[0], x[3], C)
   517  
   518  	t[4], D = bits.Add64(t[4], C, 0)
   519  
   520  	// m = t[0]n'[0] mod W
   521  	m = t[0] * qInvNeg
   522  
   523  	// -----------------------------------
   524  	// Second loop
   525  	C = madd0(m, q0, t[0])
   526  	C, t[0] = madd2(m, q1, t[1], C)
   527  	C, t[1] = madd2(m, q2, t[2], C)
   528  	C, t[2] = madd2(m, q3, t[3], C)
   529  
   530  	t[3], C = bits.Add64(t[4], C, 0)
   531  	t[4], _ = bits.Add64(0, D, C)
   532  	// -----------------------------------
   533  	// First loop
   534  
   535  	C, t[0] = madd1(y[1], x[0], t[0])
   536  	C, t[1] = madd2(y[1], x[1], t[1], C)
   537  	C, t[2] = madd2(y[1], x[2], t[2], C)
   538  	C, t[3] = madd2(y[1], x[3], t[3], C)
   539  
   540  	t[4], D = bits.Add64(t[4], C, 0)
   541  
   542  	// m = t[0]n'[0] mod W
   543  	m = t[0] * qInvNeg
   544  
   545  	// -----------------------------------
   546  	// Second loop
   547  	C = madd0(m, q0, t[0])
   548  	C, t[0] = madd2(m, q1, t[1], C)
   549  	C, t[1] = madd2(m, q2, t[2], C)
   550  	C, t[2] = madd2(m, q3, t[3], C)
   551  
   552  	t[3], C = bits.Add64(t[4], C, 0)
   553  	t[4], _ = bits.Add64(0, D, C)
   554  	// -----------------------------------
   555  	// First loop
   556  
   557  	C, t[0] = madd1(y[2], x[0], t[0])
   558  	C, t[1] = madd2(y[2], x[1], t[1], C)
   559  	C, t[2] = madd2(y[2], x[2], t[2], C)
   560  	C, t[3] = madd2(y[2], x[3], t[3], C)
   561  
   562  	t[4], D = bits.Add64(t[4], C, 0)
   563  
   564  	// m = t[0]n'[0] mod W
   565  	m = t[0] * qInvNeg
   566  
   567  	// -----------------------------------
   568  	// Second loop
   569  	C = madd0(m, q0, t[0])
   570  	C, t[0] = madd2(m, q1, t[1], C)
   571  	C, t[1] = madd2(m, q2, t[2], C)
   572  	C, t[2] = madd2(m, q3, t[3], C)
   573  
   574  	t[3], C = bits.Add64(t[4], C, 0)
   575  	t[4], _ = bits.Add64(0, D, C)
   576  	// -----------------------------------
   577  	// First loop
   578  
   579  	C, t[0] = madd1(y[3], x[0], t[0])
   580  	C, t[1] = madd2(y[3], x[1], t[1], C)
   581  	C, t[2] = madd2(y[3], x[2], t[2], C)
   582  	C, t[3] = madd2(y[3], x[3], t[3], C)
   583  
   584  	t[4], D = bits.Add64(t[4], C, 0)
   585  
   586  	// m = t[0]n'[0] mod W
   587  	m = t[0] * qInvNeg
   588  
   589  	// -----------------------------------
   590  	// Second loop
   591  	C = madd0(m, q0, t[0])
   592  	C, t[0] = madd2(m, q1, t[1], C)
   593  	C, t[1] = madd2(m, q2, t[2], C)
   594  	C, t[2] = madd2(m, q3, t[3], C)
   595  
   596  	t[3], C = bits.Add64(t[4], C, 0)
   597  	t[4], _ = bits.Add64(0, D, C)
   598  
   599  	if t[4] != 0 {
   600  		// we need to reduce, we have a result on 5 words
   601  		var b uint64
   602  		z[0], b = bits.Sub64(t[0], q0, 0)
   603  		z[1], b = bits.Sub64(t[1], q1, b)
   604  		z[2], b = bits.Sub64(t[2], q2, b)
   605  		z[3], _ = bits.Sub64(t[3], q3, b)
   606  		return
   607  	}
   608  
   609  	// copy t into z
   610  	z[0] = t[0]
   611  	z[1] = t[1]
   612  	z[2] = t[2]
   613  	z[3] = t[3]
   614  
   615  	// if z ⩾ q → z -= q
   616  	if !z.smallerThanModulus() {
   617  		var b uint64
   618  		z[0], b = bits.Sub64(z[0], q0, 0)
   619  		z[1], b = bits.Sub64(z[1], q1, b)
   620  		z[2], b = bits.Sub64(z[2], q2, b)
   621  		z[3], _ = bits.Sub64(z[3], q3, b)
   622  	}
   623  }
   624  
   625  func _fromMontGeneric(z *Element) {
   626  	// the following lines implement z = z * 1
   627  	// with a modified CIOS montgomery multiplication
   628  	// see Mul for algorithm documentation
   629  	{
   630  		// m = z[0]n'[0] mod W
   631  		m := z[0] * qInvNeg
   632  		C := madd0(m, q0, z[0])
   633  		C, z[0] = madd2(m, q1, z[1], C)
   634  		C, z[1] = madd2(m, q2, z[2], C)
   635  		C, z[2] = madd2(m, q3, z[3], C)
   636  		z[3] = C
   637  	}
   638  	{
   639  		// m = z[0]n'[0] mod W
   640  		m := z[0] * qInvNeg
   641  		C := madd0(m, q0, z[0])
   642  		C, z[0] = madd2(m, q1, z[1], C)
   643  		C, z[1] = madd2(m, q2, z[2], C)
   644  		C, z[2] = madd2(m, q3, z[3], C)
   645  		z[3] = C
   646  	}
   647  	{
   648  		// m = z[0]n'[0] mod W
   649  		m := z[0] * qInvNeg
   650  		C := madd0(m, q0, z[0])
   651  		C, z[0] = madd2(m, q1, z[1], C)
   652  		C, z[1] = madd2(m, q2, z[2], C)
   653  		C, z[2] = madd2(m, q3, z[3], C)
   654  		z[3] = C
   655  	}
   656  	{
   657  		// m = z[0]n'[0] mod W
   658  		m := z[0] * qInvNeg
   659  		C := madd0(m, q0, z[0])
   660  		C, z[0] = madd2(m, q1, z[1], C)
   661  		C, z[1] = madd2(m, q2, z[2], C)
   662  		C, z[2] = madd2(m, q3, z[3], C)
   663  		z[3] = C
   664  	}
   665  
   666  	// if z ⩾ q → z -= q
   667  	if !z.smallerThanModulus() {
   668  		var b uint64
   669  		z[0], b = bits.Sub64(z[0], q0, 0)
   670  		z[1], b = bits.Sub64(z[1], q1, b)
   671  		z[2], b = bits.Sub64(z[2], q2, b)
   672  		z[3], _ = bits.Sub64(z[3], q3, b)
   673  	}
   674  }
   675  
   676  func _reduceGeneric(z *Element) {
   677  
   678  	// if z ⩾ q → z -= q
   679  	if !z.smallerThanModulus() {
   680  		var b uint64
   681  		z[0], b = bits.Sub64(z[0], q0, 0)
   682  		z[1], b = bits.Sub64(z[1], q1, b)
   683  		z[2], b = bits.Sub64(z[2], q2, b)
   684  		z[3], _ = bits.Sub64(z[3], q3, b)
   685  	}
   686  }
   687  
   688  // BatchInvert returns a new slice with every element inverted.
   689  // Uses Montgomery batch inversion trick
   690  func BatchInvert(a []Element) []Element {
   691  	res := make([]Element, len(a))
   692  	if len(a) == 0 {
   693  		return res
   694  	}
   695  
   696  	zeroes := bitset.New(uint(len(a)))
   697  	accumulator := One()
   698  
   699  	for i := 0; i < len(a); i++ {
   700  		if a[i].IsZero() {
   701  			zeroes.Set(uint(i))
   702  			continue
   703  		}
   704  		res[i] = accumulator
   705  		accumulator.Mul(&accumulator, &a[i])
   706  	}
   707  
   708  	accumulator.Inverse(&accumulator)
   709  
   710  	for i := len(a) - 1; i >= 0; i-- {
   711  		if zeroes.Test(uint(i)) {
   712  			continue
   713  		}
   714  		res[i].Mul(&res[i], &accumulator)
   715  		accumulator.Mul(&accumulator, &a[i])
   716  	}
   717  
   718  	return res
   719  }
   720  
   721  func _butterflyGeneric(a, b *Element) {
   722  	t := *a
   723  	a.Add(a, b)
   724  	b.Sub(&t, b)
   725  }
   726  
   727  // BitLen returns the minimum number of bits needed to represent z
   728  // returns 0 if z == 0
   729  func (z *Element) BitLen() int {
   730  	if z[3] != 0 {
   731  		return 192 + bits.Len64(z[3])
   732  	}
   733  	if z[2] != 0 {
   734  		return 128 + bits.Len64(z[2])
   735  	}
   736  	if z[1] != 0 {
   737  		return 64 + bits.Len64(z[1])
   738  	}
   739  	return bits.Len64(z[0])
   740  }
   741  
   742  // Hash msg to count prime field elements.
   743  // https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-5.2
   744  func Hash(msg, dst []byte, count int) ([]Element, error) {
   745  	// 128 bits of security
   746  	// L = ceil((ceil(log2(p)) + k) / 8), where k is the security parameter = 128
   747  	const Bytes = 1 + (Bits-1)/8
   748  	const L = 16 + Bytes
   749  
   750  	lenInBytes := count * L
   751  	pseudoRandomBytes, err := hash.ExpandMsgXmd(msg, dst, lenInBytes)
   752  	if err != nil {
   753  		return nil, err
   754  	}
   755  
   756  	// get temporary big int from the pool
   757  	vv := pool.BigInt.Get()
   758  
   759  	res := make([]Element, count)
   760  	for i := 0; i < count; i++ {
   761  		vv.SetBytes(pseudoRandomBytes[i*L : (i+1)*L])
   762  		res[i].SetBigInt(vv)
   763  	}
   764  
   765  	// release object into pool
   766  	pool.BigInt.Put(vv)
   767  
   768  	return res, nil
   769  }
   770  
   771  // Exp z = xᵏ (mod q)
   772  func (z *Element) Exp(x Element, k *big.Int) *Element {
   773  	if k.IsUint64() && k.Uint64() == 0 {
   774  		return z.SetOne()
   775  	}
   776  
   777  	e := k
   778  	if k.Sign() == -1 {
   779  		// negative k, we invert
   780  		// if k < 0: xᵏ (mod q) == (x⁻¹)ᵏ (mod q)
   781  		x.Inverse(&x)
   782  
   783  		// we negate k in a temp big.Int since
   784  		// Int.Bit(_) of k and -k is different
   785  		e = pool.BigInt.Get()
   786  		defer pool.BigInt.Put(e)
   787  		e.Neg(k)
   788  	}
   789  
   790  	z.Set(&x)
   791  
   792  	for i := e.BitLen() - 2; i >= 0; i-- {
   793  		z.Square(z)
   794  		if e.Bit(i) == 1 {
   795  			z.Mul(z, &x)
   796  		}
   797  	}
   798  
   799  	return z
   800  }
   801  
   802  // rSquare where r is the Montgommery constant
   803  // see section 2.3.2 of Tolga Acar's thesis
   804  // https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf
   805  var rSquare = Element{
   806  	2726216793283724667,
   807  	14712177743343147295,
   808  	12091039717619697043,
   809  	81024008013859129,
   810  }
   811  
   812  // toMont converts z to Montgomery form
   813  // sets and returns z = z * r²
   814  func (z *Element) toMont() *Element {
   815  	return z.Mul(z, &rSquare)
   816  }
   817  
   818  // String returns the decimal representation of z as generated by
   819  // z.Text(10).
   820  func (z *Element) String() string {
   821  	return z.Text(10)
   822  }
   823  
   824  // toBigInt returns z as a big.Int in Montgomery form
   825  func (z *Element) toBigInt(res *big.Int) *big.Int {
   826  	var b [Bytes]byte
   827  	binary.BigEndian.PutUint64(b[24:32], z[0])
   828  	binary.BigEndian.PutUint64(b[16:24], z[1])
   829  	binary.BigEndian.PutUint64(b[8:16], z[2])
   830  	binary.BigEndian.PutUint64(b[0:8], z[3])
   831  
   832  	return res.SetBytes(b[:])
   833  }
   834  
   835  // Text returns the string representation of z in the given base.
   836  // Base must be between 2 and 36, inclusive. The result uses the
   837  // lower-case letters 'a' to 'z' for digit values 10 to 35.
   838  // No prefix (such as "0x") is added to the string. If z is a nil
   839  // pointer it returns "<nil>".
   840  // If base == 10 and -z fits in a uint16 prefix "-" is added to the string.
   841  func (z *Element) Text(base int) string {
   842  	if base < 2 || base > 36 {
   843  		panic("invalid base")
   844  	}
   845  	if z == nil {
   846  		return "<nil>"
   847  	}
   848  
   849  	const maxUint16 = 65535
   850  	if base == 10 {
   851  		var zzNeg Element
   852  		zzNeg.Neg(z)
   853  		zzNeg.fromMont()
   854  		if zzNeg.FitsOnOneWord() && zzNeg[0] <= maxUint16 && zzNeg[0] != 0 {
   855  			return "-" + strconv.FormatUint(zzNeg[0], base)
   856  		}
   857  	}
   858  	zz := *z
   859  	zz.fromMont()
   860  	if zz.FitsOnOneWord() {
   861  		return strconv.FormatUint(zz[0], base)
   862  	}
   863  	vv := pool.BigInt.Get()
   864  	r := zz.toBigInt(vv).Text(base)
   865  	pool.BigInt.Put(vv)
   866  	return r
   867  }
   868  
   869  // BigInt sets and return z as a *big.Int
   870  func (z *Element) BigInt(res *big.Int) *big.Int {
   871  	_z := *z
   872  	_z.fromMont()
   873  	return _z.toBigInt(res)
   874  }
   875  
   876  // ToBigIntRegular returns z as a big.Int in regular form
   877  //
   878  // Deprecated: use BigInt(*big.Int) instead
   879  func (z Element) ToBigIntRegular(res *big.Int) *big.Int {
   880  	z.fromMont()
   881  	return z.toBigInt(res)
   882  }
   883  
   884  // Bits provides access to z by returning its value as a little-endian [4]uint64 array.
   885  // Bits is intended to support implementation of missing low-level Element
   886  // functionality outside this package; it should be avoided otherwise.
   887  func (z *Element) Bits() [4]uint64 {
   888  	_z := *z
   889  	fromMont(&_z)
   890  	return _z
   891  }
   892  
   893  // Bytes returns the value of z as a big-endian byte array
   894  func (z *Element) Bytes() (res [Bytes]byte) {
   895  	BigEndian.PutElement(&res, *z)
   896  	return
   897  }
   898  
   899  // Marshal returns the value of z as a big-endian byte slice
   900  func (z *Element) Marshal() []byte {
   901  	b := z.Bytes()
   902  	return b[:]
   903  }
   904  
   905  // Unmarshal is an alias for SetBytes, it sets z to the value of e.
   906  func (z *Element) Unmarshal(e []byte) {
   907  	z.SetBytes(e)
   908  }
   909  
   910  // SetBytes interprets e as the bytes of a big-endian unsigned integer,
   911  // sets z to that value, and returns z.
   912  func (z *Element) SetBytes(e []byte) *Element {
   913  	if len(e) == Bytes {
   914  		// fast path
   915  		v, err := BigEndian.Element((*[Bytes]byte)(e))
   916  		if err == nil {
   917  			*z = v
   918  			return z
   919  		}
   920  	}
   921  
   922  	// slow path.
   923  	// get a big int from our pool
   924  	vv := pool.BigInt.Get()
   925  	vv.SetBytes(e)
   926  
   927  	// set big int
   928  	z.SetBigInt(vv)
   929  
   930  	// put temporary object back in pool
   931  	pool.BigInt.Put(vv)
   932  
   933  	return z
   934  }
   935  
   936  // SetBytesCanonical interprets e as the bytes of a big-endian 32-byte integer.
   937  // If e is not a 32-byte slice or encodes a value higher than q,
   938  // SetBytesCanonical returns an error.
   939  func (z *Element) SetBytesCanonical(e []byte) error {
   940  	if len(e) != Bytes {
   941  		return errors.New("invalid fr.Element encoding")
   942  	}
   943  	v, err := BigEndian.Element((*[Bytes]byte)(e))
   944  	if err != nil {
   945  		return err
   946  	}
   947  	*z = v
   948  	return nil
   949  }
   950  
   951  // SetBigInt sets z to v and returns z
   952  func (z *Element) SetBigInt(v *big.Int) *Element {
   953  	z.SetZero()
   954  
   955  	var zero big.Int
   956  
   957  	// fast path
   958  	c := v.Cmp(&_modulus)
   959  	if c == 0 {
   960  		// v == 0
   961  		return z
   962  	} else if c != 1 && v.Cmp(&zero) != -1 {
   963  		// 0 < v < q
   964  		return z.setBigInt(v)
   965  	}
   966  
   967  	// get temporary big int from the pool
   968  	vv := pool.BigInt.Get()
   969  
   970  	// copy input + modular reduction
   971  	vv.Mod(v, &_modulus)
   972  
   973  	// set big int byte value
   974  	z.setBigInt(vv)
   975  
   976  	// release object into pool
   977  	pool.BigInt.Put(vv)
   978  	return z
   979  }
   980  
   981  // setBigInt assumes 0 ⩽ v < q
   982  func (z *Element) setBigInt(v *big.Int) *Element {
   983  	vBits := v.Bits()
   984  
   985  	if bits.UintSize == 64 {
   986  		for i := 0; i < len(vBits); i++ {
   987  			z[i] = uint64(vBits[i])
   988  		}
   989  	} else {
   990  		for i := 0; i < len(vBits); i++ {
   991  			if i%2 == 0 {
   992  				z[i/2] = uint64(vBits[i])
   993  			} else {
   994  				z[i/2] |= uint64(vBits[i]) << 32
   995  			}
   996  		}
   997  	}
   998  
   999  	return z.toMont()
  1000  }
  1001  
  1002  // SetString creates a big.Int with number and calls SetBigInt on z
  1003  //
  1004  // The number prefix determines the actual base: A prefix of
  1005  // ”0b” or ”0B” selects base 2, ”0”, ”0o” or ”0O” selects base 8,
  1006  // and ”0x” or ”0X” selects base 16. Otherwise, the selected base is 10
  1007  // and no prefix is accepted.
  1008  //
  1009  // For base 16, lower and upper case letters are considered the same:
  1010  // The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15.
  1011  //
  1012  // An underscore character ”_” may appear between a base
  1013  // prefix and an adjacent digit, and between successive digits; such
  1014  // underscores do not change the value of the number.
  1015  // Incorrect placement of underscores is reported as a panic if there
  1016  // are no other errors.
  1017  //
  1018  // If the number is invalid this method leaves z unchanged and returns nil, error.
  1019  func (z *Element) SetString(number string) (*Element, error) {
  1020  	// get temporary big int from the pool
  1021  	vv := pool.BigInt.Get()
  1022  
  1023  	if _, ok := vv.SetString(number, 0); !ok {
  1024  		return nil, errors.New("Element.SetString failed -> can't parse number into a big.Int " + number)
  1025  	}
  1026  
  1027  	z.SetBigInt(vv)
  1028  
  1029  	// release object into pool
  1030  	pool.BigInt.Put(vv)
  1031  
  1032  	return z, nil
  1033  }
  1034  
  1035  // MarshalJSON returns json encoding of z (z.Text(10))
  1036  // If z == nil, returns null
  1037  func (z *Element) MarshalJSON() ([]byte, error) {
  1038  	if z == nil {
  1039  		return []byte("null"), nil
  1040  	}
  1041  	const maxSafeBound = 15 // we encode it as number if it's small
  1042  	s := z.Text(10)
  1043  	if len(s) <= maxSafeBound {
  1044  		return []byte(s), nil
  1045  	}
  1046  	var sbb strings.Builder
  1047  	sbb.WriteByte('"')
  1048  	sbb.WriteString(s)
  1049  	sbb.WriteByte('"')
  1050  	return []byte(sbb.String()), nil
  1051  }
  1052  
  1053  // UnmarshalJSON accepts numbers and strings as input
  1054  // See Element.SetString for valid prefixes (0x, 0b, ...)
  1055  func (z *Element) UnmarshalJSON(data []byte) error {
  1056  	s := string(data)
  1057  	if len(s) > Bits*3 {
  1058  		return errors.New("value too large (max = Element.Bits * 3)")
  1059  	}
  1060  
  1061  	// we accept numbers and strings, remove leading and trailing quotes if any
  1062  	if len(s) > 0 && s[0] == '"' {
  1063  		s = s[1:]
  1064  	}
  1065  	if len(s) > 0 && s[len(s)-1] == '"' {
  1066  		s = s[:len(s)-1]
  1067  	}
  1068  
  1069  	// get temporary big int from the pool
  1070  	vv := pool.BigInt.Get()
  1071  
  1072  	if _, ok := vv.SetString(s, 0); !ok {
  1073  		return errors.New("can't parse into a big.Int: " + s)
  1074  	}
  1075  
  1076  	z.SetBigInt(vv)
  1077  
  1078  	// release object into pool
  1079  	pool.BigInt.Put(vv)
  1080  	return nil
  1081  }
  1082  
  1083  // A ByteOrder specifies how to convert byte slices into a Element
  1084  type ByteOrder interface {
  1085  	Element(*[Bytes]byte) (Element, error)
  1086  	PutElement(*[Bytes]byte, Element)
  1087  	String() string
  1088  }
  1089  
  1090  // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder.
  1091  var BigEndian bigEndian
  1092  
  1093  type bigEndian struct{}
  1094  
  1095  // Element interpret b is a big-endian 32-byte slice.
  1096  // If b encodes a value higher than q, Element returns error.
  1097  func (bigEndian) Element(b *[Bytes]byte) (Element, error) {
  1098  	var z Element
  1099  	z[0] = binary.BigEndian.Uint64((*b)[24:32])
  1100  	z[1] = binary.BigEndian.Uint64((*b)[16:24])
  1101  	z[2] = binary.BigEndian.Uint64((*b)[8:16])
  1102  	z[3] = binary.BigEndian.Uint64((*b)[0:8])
  1103  
  1104  	if !z.smallerThanModulus() {
  1105  		return Element{}, errors.New("invalid fr.Element encoding")
  1106  	}
  1107  
  1108  	z.toMont()
  1109  	return z, nil
  1110  }
  1111  
  1112  func (bigEndian) PutElement(b *[Bytes]byte, e Element) {
  1113  	e.fromMont()
  1114  	binary.BigEndian.PutUint64((*b)[24:32], e[0])
  1115  	binary.BigEndian.PutUint64((*b)[16:24], e[1])
  1116  	binary.BigEndian.PutUint64((*b)[8:16], e[2])
  1117  	binary.BigEndian.PutUint64((*b)[0:8], e[3])
  1118  }
  1119  
  1120  func (bigEndian) String() string { return "BigEndian" }
  1121  
  1122  // LittleEndian is the little-endian implementation of ByteOrder and AppendByteOrder.
  1123  var LittleEndian littleEndian
  1124  
  1125  type littleEndian struct{}
  1126  
  1127  func (littleEndian) Element(b *[Bytes]byte) (Element, error) {
  1128  	var z Element
  1129  	z[0] = binary.LittleEndian.Uint64((*b)[0:8])
  1130  	z[1] = binary.LittleEndian.Uint64((*b)[8:16])
  1131  	z[2] = binary.LittleEndian.Uint64((*b)[16:24])
  1132  	z[3] = binary.LittleEndian.Uint64((*b)[24:32])
  1133  
  1134  	if !z.smallerThanModulus() {
  1135  		return Element{}, errors.New("invalid fr.Element encoding")
  1136  	}
  1137  
  1138  	z.toMont()
  1139  	return z, nil
  1140  }
  1141  
  1142  func (littleEndian) PutElement(b *[Bytes]byte, e Element) {
  1143  	e.fromMont()
  1144  	binary.LittleEndian.PutUint64((*b)[0:8], e[0])
  1145  	binary.LittleEndian.PutUint64((*b)[8:16], e[1])
  1146  	binary.LittleEndian.PutUint64((*b)[16:24], e[2])
  1147  	binary.LittleEndian.PutUint64((*b)[24:32], e[3])
  1148  }
  1149  
  1150  func (littleEndian) String() string { return "LittleEndian" }
  1151  
  1152  // Legendre returns the Legendre symbol of z (either +1, -1, or 0.)
  1153  func (z *Element) Legendre() int {
  1154  	var l Element
  1155  	// z^((q-1)/2)
  1156  	l.expByLegendreExp(*z)
  1157  
  1158  	if l.IsZero() {
  1159  		return 0
  1160  	}
  1161  
  1162  	// if l == 1
  1163  	if l.IsOne() {
  1164  		return 1
  1165  	}
  1166  	return -1
  1167  }
  1168  
  1169  // Sqrt z = √x (mod q)
  1170  // if the square root doesn't exist (x is not a square mod q)
  1171  // Sqrt leaves z unchanged and returns nil
  1172  func (z *Element) Sqrt(x *Element) *Element {
  1173  	// q ≡ 1 (mod 4)
  1174  	// see modSqrtTonelliShanks in math/big/int.go
  1175  	// using https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf
  1176  
  1177  	var y, b, t, w Element
  1178  	// w = x^((s-1)/2))
  1179  	w.expBySqrtExp(*x)
  1180  
  1181  	// y = x^((s+1)/2)) = w * x
  1182  	y.Mul(x, &w)
  1183  
  1184  	// b = xˢ = w * w * x = y * x
  1185  	b.Mul(&w, &y)
  1186  
  1187  	// g = nonResidue ^ s
  1188  	var g = Element{
  1189  		4340692304772210610,
  1190  		11102725085307959083,
  1191  		15540458298643990566,
  1192  		944526744080888988,
  1193  	}
  1194  	r := uint64(47)
  1195  
  1196  	// compute legendre symbol
  1197  	// t = x^((q-1)/2) = r-1 squaring of xˢ
  1198  	t = b
  1199  	for i := uint64(0); i < r-1; i++ {
  1200  		t.Square(&t)
  1201  	}
  1202  	if t.IsZero() {
  1203  		return z.SetZero()
  1204  	}
  1205  	if !t.IsOne() {
  1206  		// t != 1, we don't have a square root
  1207  		return nil
  1208  	}
  1209  	for {
  1210  		var m uint64
  1211  		t = b
  1212  
  1213  		// for t != 1
  1214  		for !t.IsOne() {
  1215  			t.Square(&t)
  1216  			m++
  1217  		}
  1218  
  1219  		if m == 0 {
  1220  			return z.Set(&y)
  1221  		}
  1222  		// t = g^(2^(r-m-1)) (mod q)
  1223  		ge := int(r - m - 1)
  1224  		t = g
  1225  		for ge > 0 {
  1226  			t.Square(&t)
  1227  			ge--
  1228  		}
  1229  
  1230  		g.Square(&t)
  1231  		y.Mul(&y, &t)
  1232  		b.Mul(&b, &g)
  1233  		r = m
  1234  	}
  1235  }
  1236  
  1237  const (
  1238  	k               = 32 // word size / 2
  1239  	signBitSelector = uint64(1) << 63
  1240  	approxLowBitsN  = k - 1
  1241  	approxHighBitsN = k + 1
  1242  )
  1243  
  1244  const (
  1245  	inversionCorrectionFactorWord0 = 10532255328610800637
  1246  	inversionCorrectionFactorWord1 = 12368705355905441295
  1247  	inversionCorrectionFactorWord2 = 4360254653162267807
  1248  	inversionCorrectionFactorWord3 = 835375988631323795
  1249  	invIterationsN                 = 18
  1250  )
  1251  
  1252  // Inverse z = x⁻¹ (mod q)
  1253  //
  1254  // if x == 0, sets and returns z = x
  1255  func (z *Element) Inverse(x *Element) *Element {
  1256  	// Implements "Optimized Binary GCD for Modular Inversion"
  1257  	// https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
  1258  
  1259  	a := *x
  1260  	b := Element{
  1261  		q0,
  1262  		q1,
  1263  		q2,
  1264  		q3,
  1265  	} // b := q
  1266  
  1267  	u := Element{1}
  1268  
  1269  	// Update factors: we get [u; v] ← [f₀ g₀; f₁ g₁] [u; v]
  1270  	// cᵢ = fᵢ + 2³¹ - 1 + 2³² * (gᵢ + 2³¹ - 1)
  1271  	var c0, c1 int64
  1272  
  1273  	// Saved update factors to reduce the number of field multiplications
  1274  	var pf0, pf1, pg0, pg1 int64
  1275  
  1276  	var i uint
  1277  
  1278  	var v, s Element
  1279  
  1280  	// Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations
  1281  	// This also lets us get away with half as many updates to u,v
  1282  	// To make this constant-time-ish, replace the condition with i < invIterationsN
  1283  	for i = 0; i&1 == 1 || !a.IsZero(); i++ {
  1284  		n := max(a.BitLen(), b.BitLen())
  1285  		aApprox, bApprox := approximate(&a, n), approximate(&b, n)
  1286  
  1287  		// f₀, g₀, f₁, g₁ = 1, 0, 0, 1
  1288  		c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1
  1289  
  1290  		for j := 0; j < approxLowBitsN; j++ {
  1291  
  1292  			// -2ʲ < f₀, f₁ ≤ 2ʲ
  1293  			// |f₀| + |f₁| < 2ʲ⁺¹
  1294  
  1295  			if aApprox&1 == 0 {
  1296  				aApprox /= 2
  1297  			} else {
  1298  				s, borrow := bits.Sub64(aApprox, bApprox, 0)
  1299  				if borrow == 1 {
  1300  					s = bApprox - aApprox
  1301  					bApprox = aApprox
  1302  					c0, c1 = c1, c0
  1303  					// invariants unchanged
  1304  				}
  1305  
  1306  				aApprox = s / 2
  1307  				c0 = c0 - c1
  1308  
  1309  				// Now |f₀| < 2ʲ⁺¹ ≤ 2ʲ⁺¹ (only the weaker inequality is needed, strictly speaking)
  1310  				// Started with f₀ > -2ʲ and f₁ ≤ 2ʲ, so f₀ - f₁ > -2ʲ⁺¹
  1311  				// Invariants unchanged for f₁
  1312  			}
  1313  
  1314  			c1 *= 2
  1315  			// -2ʲ⁺¹ < f₁ ≤ 2ʲ⁺¹
  1316  			// So now |f₀| + |f₁| < 2ʲ⁺²
  1317  		}
  1318  
  1319  		s = a
  1320  
  1321  		var g0 int64
  1322  		// from this point on c0 aliases for f0
  1323  		c0, g0 = updateFactorsDecompose(c0)
  1324  		aHi := a.linearCombNonModular(&s, c0, &b, g0)
  1325  		if aHi&signBitSelector != 0 {
  1326  			// if aHi < 0
  1327  			c0, g0 = -c0, -g0
  1328  			aHi = negL(&a, aHi)
  1329  		}
  1330  		// right-shift a by k-1 bits
  1331  		a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN)
  1332  		a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN)
  1333  		a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN)
  1334  		a[3] = (a[3] >> approxLowBitsN) | (aHi << approxHighBitsN)
  1335  
  1336  		var f1 int64
  1337  		// from this point on c1 aliases for g0
  1338  		f1, c1 = updateFactorsDecompose(c1)
  1339  		bHi := b.linearCombNonModular(&s, f1, &b, c1)
  1340  		if bHi&signBitSelector != 0 {
  1341  			// if bHi < 0
  1342  			f1, c1 = -f1, -c1
  1343  			bHi = negL(&b, bHi)
  1344  		}
  1345  		// right-shift b by k-1 bits
  1346  		b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN)
  1347  		b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN)
  1348  		b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN)
  1349  		b[3] = (b[3] >> approxLowBitsN) | (bHi << approxHighBitsN)
  1350  
  1351  		if i&1 == 1 {
  1352  			// Combine current update factors with previously stored ones
  1353  			// [F₀, G₀; F₁, G₁] ← [f₀, g₀; f₁, g₁] [pf₀, pg₀; pf₁, pg₁], with capital letters denoting new combined values
  1354  			// We get |F₀| = | f₀pf₀ + g₀pf₁ | ≤ |f₀pf₀| + |g₀pf₁| = |f₀| |pf₀| + |g₀| |pf₁| ≤ 2ᵏ⁻¹|pf₀| + 2ᵏ⁻¹|pf₁|
  1355  			// = 2ᵏ⁻¹ (|pf₀| + |pf₁|) < 2ᵏ⁻¹ 2ᵏ = 2²ᵏ⁻¹
  1356  			// So |F₀| < 2²ᵏ⁻¹ meaning it fits in a 2k-bit signed register
  1357  
  1358  			// c₀ aliases f₀, c₁ aliases g₁
  1359  			c0, g0, f1, c1 = c0*pf0+g0*pf1,
  1360  				c0*pg0+g0*pg1,
  1361  				f1*pf0+c1*pf1,
  1362  				f1*pg0+c1*pg1
  1363  
  1364  			s = u
  1365  
  1366  			// 0 ≤ u, v < 2²⁵⁵
  1367  			// |F₀|, |G₀| < 2⁶³
  1368  			u.linearComb(&u, c0, &v, g0)
  1369  			// |F₁|, |G₁| < 2⁶³
  1370  			v.linearComb(&s, f1, &v, c1)
  1371  
  1372  		} else {
  1373  			// Save update factors
  1374  			pf0, pg0, pf1, pg1 = c0, g0, f1, c1
  1375  		}
  1376  	}
  1377  
  1378  	// For every iteration that we miss, v is not being multiplied by 2ᵏ⁻²
  1379  	const pSq uint64 = 1 << (2 * (k - 1))
  1380  	a = Element{pSq}
  1381  	// If the function is constant-time ish, this loop will not run (no need to take it out explicitly)
  1382  	for ; i < invIterationsN; i += 2 {
  1383  		// could optimize further with mul by word routine or by pre-computing a table since with k=26,
  1384  		// we would multiply by pSq up to 13times;
  1385  		// on x86, the assembly routine outperforms generic code for mul by word
  1386  		// on arm64, we may loose up to ~5% for 6 limbs
  1387  		v.Mul(&v, &a)
  1388  	}
  1389  
  1390  	u.Set(x) // for correctness check
  1391  
  1392  	z.Mul(&v, &Element{
  1393  		inversionCorrectionFactorWord0,
  1394  		inversionCorrectionFactorWord1,
  1395  		inversionCorrectionFactorWord2,
  1396  		inversionCorrectionFactorWord3,
  1397  	})
  1398  
  1399  	// correctness check
  1400  	v.Mul(&u, z)
  1401  	if !v.IsOne() && !u.IsZero() {
  1402  		return z.inverseExp(u)
  1403  	}
  1404  
  1405  	return z
  1406  }
  1407  
  1408  // inverseExp computes z = x⁻¹ (mod q) = x**(q-2) (mod q)
  1409  func (z *Element) inverseExp(x Element) *Element {
  1410  	// e == q-2
  1411  	e := Modulus()
  1412  	e.Sub(e, big.NewInt(2))
  1413  
  1414  	z.Set(&x)
  1415  
  1416  	for i := e.BitLen() - 2; i >= 0; i-- {
  1417  		z.Square(z)
  1418  		if e.Bit(i) == 1 {
  1419  			z.Mul(z, &x)
  1420  		}
  1421  	}
  1422  
  1423  	return z
  1424  }
  1425  
  1426  // approximate a big number x into a single 64 bit word using its uppermost and lowermost bits
  1427  // if x fits in a word as is, no approximation necessary
  1428  func approximate(x *Element, nBits int) uint64 {
  1429  
  1430  	if nBits <= 64 {
  1431  		return x[0]
  1432  	}
  1433  
  1434  	const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones
  1435  	lo := mask & x[0]
  1436  
  1437  	hiWordIndex := (nBits - 1) / 64
  1438  
  1439  	hiWordBitsAvailable := nBits - hiWordIndex*64
  1440  	hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN)
  1441  
  1442  	mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1))
  1443  	hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable)
  1444  
  1445  	mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1)
  1446  	mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed
  1447  
  1448  	return lo | mid | hi
  1449  }
  1450  
  1451  // linearComb z = xC * x + yC * y;
  1452  // 0 ≤ x, y < 2²⁵³
  1453  // |xC|, |yC| < 2⁶³
  1454  func (z *Element) linearComb(x *Element, xC int64, y *Element, yC int64) {
  1455  	// | (hi, z) | < 2 * 2⁶³ * 2²⁵³ = 2³¹⁷
  1456  	// therefore | hi | < 2⁶¹ ≤ 2⁶³
  1457  	hi := z.linearCombNonModular(x, xC, y, yC)
  1458  	z.montReduceSigned(z, hi)
  1459  }
  1460  
  1461  // montReduceSigned z = (xHi * r + x) * r⁻¹ using the SOS algorithm
  1462  // Requires |xHi| < 2⁶³. Most significant bit of xHi is the sign bit.
  1463  func (z *Element) montReduceSigned(x *Element, xHi uint64) {
  1464  	const signBitRemover = ^signBitSelector
  1465  	mustNeg := xHi&signBitSelector != 0
  1466  	// the SOS implementation requires that most significant bit is 0
  1467  	// Let X be xHi*r + x
  1468  	// If X is negative we would have initially stored it as 2⁶⁴ r + X (à la 2's complement)
  1469  	xHi &= signBitRemover
  1470  	// with this a negative X is now represented as 2⁶³ r + X
  1471  
  1472  	var t [2*Limbs - 1]uint64
  1473  	var C uint64
  1474  
  1475  	m := x[0] * qInvNeg
  1476  
  1477  	C = madd0(m, q0, x[0])
  1478  	C, t[1] = madd2(m, q1, x[1], C)
  1479  	C, t[2] = madd2(m, q2, x[2], C)
  1480  	C, t[3] = madd2(m, q3, x[3], C)
  1481  
  1482  	// m * qElement[3] ≤ (2⁶⁴ - 1) * (2⁶³ - 1) = 2¹²⁷ - 2⁶⁴ - 2⁶³ + 1
  1483  	// x[3] + C ≤ 2*(2⁶⁴ - 1) = 2⁶⁵ - 2
  1484  	// On LHS, (C, t[3]) ≤ 2¹²⁷ - 2⁶⁴ - 2⁶³ + 1 + 2⁶⁵ - 2 = 2¹²⁷ + 2⁶³ - 1
  1485  	// So on LHS, C ≤ 2⁶³
  1486  	t[4] = xHi + C
  1487  	// xHi + C < 2⁶³ + 2⁶³ = 2⁶⁴
  1488  
  1489  	// <standard SOS>
  1490  	{
  1491  		const i = 1
  1492  		m = t[i] * qInvNeg
  1493  
  1494  		C = madd0(m, q0, t[i+0])
  1495  		C, t[i+1] = madd2(m, q1, t[i+1], C)
  1496  		C, t[i+2] = madd2(m, q2, t[i+2], C)
  1497  		C, t[i+3] = madd2(m, q3, t[i+3], C)
  1498  
  1499  		t[i+Limbs] += C
  1500  	}
  1501  	{
  1502  		const i = 2
  1503  		m = t[i] * qInvNeg
  1504  
  1505  		C = madd0(m, q0, t[i+0])
  1506  		C, t[i+1] = madd2(m, q1, t[i+1], C)
  1507  		C, t[i+2] = madd2(m, q2, t[i+2], C)
  1508  		C, t[i+3] = madd2(m, q3, t[i+3], C)
  1509  
  1510  		t[i+Limbs] += C
  1511  	}
  1512  	{
  1513  		const i = 3
  1514  		m := t[i] * qInvNeg
  1515  
  1516  		C = madd0(m, q0, t[i+0])
  1517  		C, z[0] = madd2(m, q1, t[i+1], C)
  1518  		C, z[1] = madd2(m, q2, t[i+2], C)
  1519  		z[3], z[2] = madd2(m, q3, t[i+3], C)
  1520  	}
  1521  
  1522  	// if z ⩾ q → z -= q
  1523  	if !z.smallerThanModulus() {
  1524  		var b uint64
  1525  		z[0], b = bits.Sub64(z[0], q0, 0)
  1526  		z[1], b = bits.Sub64(z[1], q1, b)
  1527  		z[2], b = bits.Sub64(z[2], q2, b)
  1528  		z[3], _ = bits.Sub64(z[3], q3, b)
  1529  	}
  1530  	// </standard SOS>
  1531  
  1532  	if mustNeg {
  1533  		// We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead
  1534  		var b uint64
  1535  		z[0], b = bits.Sub64(z[0], signBitSelector, 0)
  1536  		z[1], b = bits.Sub64(z[1], 0, b)
  1537  		z[2], b = bits.Sub64(z[2], 0, b)
  1538  		z[3], b = bits.Sub64(z[3], 0, b)
  1539  
  1540  		// Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0
  1541  
  1542  		if b != 0 {
  1543  			// z[3] = -1
  1544  			// negative: add q
  1545  			const neg1 = 0xFFFFFFFFFFFFFFFF
  1546  
  1547  			var carry uint64
  1548  
  1549  			z[0], carry = bits.Add64(z[0], q0, 0)
  1550  			z[1], carry = bits.Add64(z[1], q1, carry)
  1551  			z[2], carry = bits.Add64(z[2], q2, carry)
  1552  			z[3], _ = bits.Add64(neg1, q3, carry)
  1553  		}
  1554  	}
  1555  }
  1556  
  1557  const (
  1558  	updateFactorsConversionBias    int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1)
  1559  	updateFactorIdentityMatrixRow0       = 1
  1560  	updateFactorIdentityMatrixRow1       = 1 << 32
  1561  )
  1562  
  1563  func updateFactorsDecompose(c int64) (int64, int64) {
  1564  	c += updateFactorsConversionBias
  1565  	const low32BitsFilter int64 = 0xFFFFFFFF
  1566  	f := c&low32BitsFilter - 0x7FFFFFFF
  1567  	g := c>>32&low32BitsFilter - 0x7FFFFFFF
  1568  	return f, g
  1569  }
  1570  
  1571  // negL negates in place [x | xHi] and return the new most significant word xHi
  1572  func negL(x *Element, xHi uint64) uint64 {
  1573  	var b uint64
  1574  
  1575  	x[0], b = bits.Sub64(0, x[0], 0)
  1576  	x[1], b = bits.Sub64(0, x[1], b)
  1577  	x[2], b = bits.Sub64(0, x[2], b)
  1578  	x[3], b = bits.Sub64(0, x[3], b)
  1579  	xHi, _ = bits.Sub64(0, xHi, b)
  1580  
  1581  	return xHi
  1582  }
  1583  
  1584  // mulWNonModular multiplies by one word in non-montgomery, without reducing
  1585  func (z *Element) mulWNonModular(x *Element, y int64) uint64 {
  1586  
  1587  	// w := abs(y)
  1588  	m := y >> 63
  1589  	w := uint64((y ^ m) - m)
  1590  
  1591  	var c uint64
  1592  	c, z[0] = bits.Mul64(x[0], w)
  1593  	c, z[1] = madd1(x[1], w, c)
  1594  	c, z[2] = madd1(x[2], w, c)
  1595  	c, z[3] = madd1(x[3], w, c)
  1596  
  1597  	if y < 0 {
  1598  		c = negL(z, c)
  1599  	}
  1600  
  1601  	return c
  1602  }
  1603  
  1604  // linearCombNonModular computes a linear combination without modular reduction
  1605  func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 {
  1606  	var yTimes Element
  1607  
  1608  	yHi := yTimes.mulWNonModular(y, yC)
  1609  	xHi := z.mulWNonModular(x, xC)
  1610  
  1611  	var carry uint64
  1612  	z[0], carry = bits.Add64(z[0], yTimes[0], 0)
  1613  	z[1], carry = bits.Add64(z[1], yTimes[1], carry)
  1614  	z[2], carry = bits.Add64(z[2], yTimes[2], carry)
  1615  	z[3], carry = bits.Add64(z[3], yTimes[3], carry)
  1616  
  1617  	yHi, _ = bits.Add64(xHi, yHi, carry)
  1618  
  1619  	return yHi
  1620  }