github.com/emmansun/gmsm@v0.29.1/sm9/bn256/gfp_generic.go (about) 1 //go:build purego || !(amd64 || arm64 || ppc64 || ppc64le) 2 3 package bn256 4 5 import ( 6 "math/bits" 7 ) 8 9 func gfpCarry(a *gfP, head uint64) { 10 b := &gfP{} 11 12 var carry uint64 13 for i, pi := range p2 { 14 b[i], carry = bits.Sub64(a[i], pi, carry) 15 } 16 carry = carry &^ head 17 18 // If b is negative, then return a. 19 // Else return b. 20 carry = -carry 21 ncarry := ^carry 22 for i := 0; i < 4; i++ { 23 a[i] = (a[i] & carry) | (b[i] & ncarry) 24 } 25 } 26 27 func gfpNeg(c, a *gfP) { 28 var carry uint64 29 for i, pi := range p2 { 30 c[i], carry = bits.Sub64(pi, a[i], carry) 31 } 32 // required for "zero", bn256 treats infinity point as valid 33 gfpCarry(c, 0) 34 } 35 36 func gfpAdd(c, a, b *gfP) { 37 var carry uint64 38 for i, ai := range a { 39 c[i], carry = bits.Add64(ai, b[i], carry) 40 } 41 gfpCarry(c, carry) 42 } 43 44 func gfpDouble(c, a *gfP) { 45 gfpAdd(c, a, a) 46 } 47 48 func gfpTriple(c, a *gfP) { 49 t := &gfP{} 50 gfpAdd(t, a, a) 51 gfpAdd(c, t, a) 52 } 53 54 func gfpSub(c, a, b *gfP) { 55 t := &gfP{} 56 57 var carry, underflow uint64 58 59 for i, ai := range a { 60 c[i], underflow = bits.Sub64(ai, b[i], underflow) 61 } 62 63 for i, pi := range p2 { 64 t[i], carry = bits.Add64(pi, c[i], carry) 65 } 66 67 mask := -underflow 68 for i, ci := range c { 69 c[i] ^= mask & (ci ^ t[i]) 70 } 71 } 72 73 // addMulVVW multiplies the multi-word value x by the single-word value y, 74 // adding the result to the multi-word value z and returning the final carry. 75 // It can be thought of as one row of a pen-and-paper column multiplication. 76 func addMulVVW(z, x []uint64, y uint64) (carry uint64) { 77 _ = x[len(z)-1] // bounds check elimination hint 78 for i := range z { 79 hi, lo := bits.Mul64(x[i], y) 80 lo, c := bits.Add64(lo, z[i], 0) 81 // We use bits.Add with zero to get an add-with-carry instruction that 82 // absorbs the carry from the previous bits.Add. 83 hi, _ = bits.Add64(hi, 0, c) 84 lo, c = bits.Add64(lo, carry, 0) 85 hi, _ = bits.Add64(hi, 0, c) 86 carry = hi 87 z[i] = lo 88 } 89 return carry 90 } 91 92 func gfpMul(c, a, b *gfP) { 93 var T [8]uint64 94 // This loop implements Word-by-Word Montgomery Multiplication, as 95 // described in Algorithm 4 (Fig. 3) of "Efficient Software 96 // Implementations of Modular Exponentiation" by Shay Gueron 97 // [https://eprint.iacr.org/2011/239.pdf]. 98 var carry uint64 99 for i := 0; i < 4; i++ { 100 // Step 1 (T = a × b) is computed as a large pen-and-paper column 101 // multiplication of two numbers with n base-2^_W digits. If we just 102 // wanted to produce 2n-wide T, we would do 103 // 104 // for i := 0; i < n; i++ { 105 // d := bLimbs[i] 106 // T[n+i] = addMulVVW(T[i:n+i], aLimbs, d) 107 // } 108 // 109 // where d is a digit of the multiplier, T[i:n+i] is the shifted 110 // position of the product of that digit, and T[n+i] is the final carry. 111 // Note that T[i] isn't modified after processing the i-th digit. 112 // 113 // Instead of running two loops, one for Step 1 and one for Steps 2–6, 114 // the result of Step 1 is computed during the next loop. This is 115 // possible because each iteration only uses T[i] in Step 2 and then 116 // discards it in Step 6. 117 d := b[i] 118 119 c1 := addMulVVW(T[i:4+i], a[:], d) 120 121 // Step 6 is replaced by shifting the virtual window we operate 122 // over: T of the algorithm is T[i:] for us. That means that T1 in 123 // Step 2 (T mod 2^_W) is simply T[i]. k0 in Step 3 is our m0inv. 124 Y := T[i] * np[0] 125 126 // Step 4 and 5 add Y × m to T, which as mentioned above is stored 127 // at T[i:]. The two carries (from a × d and Y × m) are added up in 128 // the next word T[n+i], and the carry bit from that addition is 129 // brought forward to the next iteration. 130 c2 := addMulVVW(T[i:4+i], p2[:], Y) 131 T[4+i], carry = bits.Add64(c1, c2, carry) 132 } 133 134 *c = gfP{T[4], T[5], T[6], T[7]} 135 gfpCarry(c, carry) 136 } 137 138 func gfpSqr(res, in *gfP, n int) { 139 gfpMul(res, in, in) 140 for i := 1; i < n; i++ { 141 gfpMul(res, res, res) 142 } 143 } 144 145 func gfpFromMont(res, in *gfP) { 146 var T [8]uint64 147 var carry uint64 148 copy(T[:], in[:]) 149 for i := 0; i < 4; i++ { 150 Y := T[i] * np[0] 151 c2 := addMulVVW(T[i:4+i], p2[:], Y) 152 T[4+i], carry = bits.Add64(uint64(0), c2, carry) 153 } 154 155 *res = gfP{T[4], T[5], T[6], T[7]} 156 gfpCarry(res, carry) 157 } 158 159 func gfpMarshal(out *[32]byte, in *gfP) { 160 for w := uint(0); w < 4; w++ { 161 for b := uint(0); b < 8; b++ { 162 out[8*w+b] = byte(in[3-w] >> (56 - 8*b)) 163 } 164 } 165 } 166 167 func gfpUnmarshal(out *gfP, in *[32]byte) { 168 for w := uint(0); w < 4; w++ { 169 out[3-w] = 0 170 for b := uint(0); b < 8; b++ { 171 out[3-w] += uint64(in[8*w+b]) << (56 - 8*b) 172 } 173 } 174 }