github.com/emmansun/gmsm@v0.29.1/sm9/bn256/gfp_generic.go (about)

     1  //go:build purego || !(amd64 || arm64 || ppc64 || ppc64le)
     2  
     3  package bn256
     4  
     5  import (
     6  	"math/bits"
     7  )
     8  
     9  func gfpCarry(a *gfP, head uint64) {
    10  	b := &gfP{}
    11  
    12  	var carry uint64
    13  	for i, pi := range p2 {
    14  		b[i], carry = bits.Sub64(a[i], pi, carry)
    15  	}
    16  	carry = carry &^ head
    17  
    18  	// If b is negative, then return a.
    19  	// Else return b.
    20  	carry = -carry
    21  	ncarry := ^carry
    22  	for i := 0; i < 4; i++ {
    23  		a[i] = (a[i] & carry) | (b[i] & ncarry)
    24  	}
    25  }
    26  
    27  func gfpNeg(c, a *gfP) {
    28  	var carry uint64
    29  	for i, pi := range p2 {
    30  		c[i], carry = bits.Sub64(pi, a[i], carry)
    31  	}
    32  	// required for "zero", bn256 treats infinity point as valid
    33  	gfpCarry(c, 0)
    34  }
    35  
    36  func gfpAdd(c, a, b *gfP) {
    37  	var carry uint64
    38  	for i, ai := range a {
    39  		c[i], carry = bits.Add64(ai, b[i], carry)
    40  	}
    41  	gfpCarry(c, carry)
    42  }
    43  
    44  func gfpDouble(c, a *gfP) {
    45  	gfpAdd(c, a, a)
    46  }
    47  
    48  func gfpTriple(c, a *gfP) {
    49  	t := &gfP{}
    50  	gfpAdd(t, a, a)
    51  	gfpAdd(c, t, a)
    52  }
    53  
    54  func gfpSub(c, a, b *gfP) {
    55  	t := &gfP{}
    56  
    57  	var carry, underflow uint64
    58  
    59  	for i, ai := range a {
    60  		c[i], underflow = bits.Sub64(ai, b[i], underflow)
    61  	}
    62  
    63  	for i, pi := range p2 {
    64  		t[i], carry = bits.Add64(pi, c[i], carry)
    65  	}
    66  
    67  	mask := -underflow
    68  	for i, ci := range c {
    69  		c[i] ^= mask & (ci ^ t[i])
    70  	}
    71  }
    72  
    73  // addMulVVW multiplies the multi-word value x by the single-word value y,
    74  // adding the result to the multi-word value z and returning the final carry.
    75  // It can be thought of as one row of a pen-and-paper column multiplication.
    76  func addMulVVW(z, x []uint64, y uint64) (carry uint64) {
    77  	_ = x[len(z)-1] // bounds check elimination hint
    78  	for i := range z {
    79  		hi, lo := bits.Mul64(x[i], y)
    80  		lo, c := bits.Add64(lo, z[i], 0)
    81  		// We use bits.Add with zero to get an add-with-carry instruction that
    82  		// absorbs the carry from the previous bits.Add.
    83  		hi, _ = bits.Add64(hi, 0, c)
    84  		lo, c = bits.Add64(lo, carry, 0)
    85  		hi, _ = bits.Add64(hi, 0, c)
    86  		carry = hi
    87  		z[i] = lo
    88  	}
    89  	return carry
    90  }
    91  
    92  func gfpMul(c, a, b *gfP) {
    93  	var T [8]uint64
    94  	// This loop implements Word-by-Word Montgomery Multiplication, as
    95  	// described in Algorithm 4 (Fig. 3) of "Efficient Software
    96  	// Implementations of Modular Exponentiation" by Shay Gueron
    97  	// [https://eprint.iacr.org/2011/239.pdf].
    98  	var carry uint64
    99  	for i := 0; i < 4; i++ {
   100  		// Step 1 (T = a × b) is computed as a large pen-and-paper column
   101  		// multiplication of two numbers with n base-2^_W digits. If we just
   102  		// wanted to produce 2n-wide T, we would do
   103  		//
   104  		//   for i := 0; i < n; i++ {
   105  		//       d := bLimbs[i]
   106  		//       T[n+i] = addMulVVW(T[i:n+i], aLimbs, d)
   107  		//   }
   108  		//
   109  		// where d is a digit of the multiplier, T[i:n+i] is the shifted
   110  		// position of the product of that digit, and T[n+i] is the final carry.
   111  		// Note that T[i] isn't modified after processing the i-th digit.
   112  		//
   113  		// Instead of running two loops, one for Step 1 and one for Steps 2–6,
   114  		// the result of Step 1 is computed during the next loop. This is
   115  		// possible because each iteration only uses T[i] in Step 2 and then
   116  		// discards it in Step 6.
   117  		d := b[i]
   118  
   119  		c1 := addMulVVW(T[i:4+i], a[:], d)
   120  
   121  		// Step 6 is replaced by shifting the virtual window we operate
   122  		// over: T of the algorithm is T[i:] for us. That means that T1 in
   123  		// Step 2 (T mod 2^_W) is simply T[i]. k0 in Step 3 is our m0inv.
   124  		Y := T[i] * np[0]
   125  
   126  		// Step 4 and 5 add Y × m to T, which as mentioned above is stored
   127  		// at T[i:]. The two carries (from a × d and Y × m) are added up in
   128  		// the next word T[n+i], and the carry bit from that addition is
   129  		// brought forward to the next iteration.
   130  		c2 := addMulVVW(T[i:4+i], p2[:], Y)
   131  		T[4+i], carry = bits.Add64(c1, c2, carry)
   132  	}
   133  
   134  	*c = gfP{T[4], T[5], T[6], T[7]}
   135  	gfpCarry(c, carry)
   136  }
   137  
   138  func gfpSqr(res, in *gfP, n int) {
   139  	gfpMul(res, in, in)
   140  	for i := 1; i < n; i++ {
   141  		gfpMul(res, res, res)
   142  	}
   143  }
   144  
   145  func gfpFromMont(res, in *gfP) {
   146  	var T [8]uint64
   147  	var carry uint64
   148  	copy(T[:], in[:])
   149  	for i := 0; i < 4; i++ {
   150  		Y := T[i] * np[0]
   151  		c2 := addMulVVW(T[i:4+i], p2[:], Y)
   152  		T[4+i], carry = bits.Add64(uint64(0), c2, carry)
   153  	}
   154  
   155  	*res = gfP{T[4], T[5], T[6], T[7]}
   156  	gfpCarry(res, carry)
   157  }
   158  
   159  func gfpMarshal(out *[32]byte, in *gfP) {
   160  	for w := uint(0); w < 4; w++ {
   161  		for b := uint(0); b < 8; b++ {
   162  			out[8*w+b] = byte(in[3-w] >> (56 - 8*b))
   163  		}
   164  	}
   165  }
   166  
   167  func gfpUnmarshal(out *gfP, in *[32]byte) {
   168  	for w := uint(0); w < 4; w++ {
   169  		out[3-w] = 0
   170  		for b := uint(0); b < 8; b++ {
   171  			out[3-w] += uint64(in[8*w+b]) << (56 - 8*b)
   172  		}
   173  	}
   174  }