github.com/consensys/gnark-crypto@v0.14.0/internal/generator/tower/asm/amd64/e2_bls381.go (about) 1 // Copyright 2020 ConsenSys Software Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package amd64 16 17 import ( 18 "github.com/consensys/bavard/amd64" 19 ) 20 21 func (fq2 *Fq2Amd64) generateMulByNonResidueE2BLS381() { 22 // // MulByNonResidue multiplies a E2 by (1,1) 23 // func (z *E2) MulByNonResidue(x *E2) *E2 { 24 // var a fp.Element 25 // a.Sub(&x.A0, &x.A1) 26 // z.A1.Add(&x.A0, &x.A1) 27 // z.A0.Set(&a) 28 // return z 29 // } 30 registers := fq2.FnHeader("mulNonResE2", 0, 16) 31 32 a := registers.PopN(fq2.NbWords) 33 b := registers.PopN(fq2.NbWords) 34 x := registers.Pop() 35 tr := registers.Pop() // zero or r 36 fq2.XORQ(tr, tr) // set to zero 37 38 fq2.MOVQ("x+8(FP)", x) 39 fq2.Mov(x, a) // a = a0 40 41 // a = x.A0 - x.A1 42 fq2.Sub(x, a, fq2.NbWords) 43 fq2.modReduceAfterSubScratch(tr, a, b) 44 // b = x.A0 + x.A1 45 fq2.Mov(x, b, fq2.NbWords) // b = a1 46 fq2.Add(x, b) 47 48 fq2.MOVQ("res+0(FP)", tr) 49 fq2.Mov(a, tr) 50 fq2.ReduceElement(b, a) 51 fq2.Mov(b, tr, 0, fq2.NbWords) 52 53 fq2.RET() 54 } 55 56 func (fq2 *Fq2Amd64) generateSquareE2BLS381(forceCheck bool) { 57 // // Square sets z to the E2-product of x,x returns z 58 // func (z *E2) Square(x *E2) *E2 { 59 // // adapted from algo 22 https://eprint.iacr.org/2010/354.pdf 60 // var a, b fp.Element 61 // a.Add(&x.A0, &x.A1) 62 // b.Sub(&x.A0, &x.A1) 63 // a.Mul(&a, &b) 64 // b.Mul(&x.A0, &x.A1).Double(&b) 65 // z.A0.Set(&a) 66 // z.A1.Set(&b) 67 // return z 68 // } 69 const argSize = 16 70 minStackSize := 0 71 if forceCheck { 72 minStackSize = argSize 73 } 74 stackSize := fq2.StackSize(fq2.NbWords*3, 2, minStackSize) 75 registers := fq2.FnHeader("squareAdxE2", stackSize, argSize, amd64.DX, amd64.AX) 76 defer fq2.AssertCleanStack(stackSize, minStackSize) 77 fq2.WriteLn("NO_LOCAL_POINTERS") 78 79 fq2.WriteLn(` 80 // z.A0 = (x.A0 + x.A1) * (x.A0 - x.A1) 81 // z.A1 = 2 * x.A0 * x.A1 82 `) 83 84 noAdx := fq2.NewLabel() 85 if forceCheck { 86 // check ADX instruction support 87 fq2.CMPB("·supportAdx(SB)", 1) 88 fq2.JNE(noAdx) 89 } 90 91 // used in the mul operation 92 op1 := registers.PopN(fq2.NbWords) 93 res := registers.PopN(fq2.NbWords) 94 95 xat := func(i int) string { 96 return string(op1[i]) 97 } 98 99 ax := amd64.AX 100 dx := amd64.DX 101 102 // b = a0 * a1 * 2 103 104 fq2.Comment("2 * x.A0 * x.A1") 105 fq2.MOVQ("x+8(FP)", ax) 106 107 fq2.LabelRegisters("2 * x.A1", op1...) 108 fq2.Mov(ax, op1, fq2.NbWords) 109 fq2.Add(op1, op1) // op1, no reduce 110 111 fq2.MulADX(®isters, xat, func(i int) string { 112 fq2.MOVQ("x+8(FP)", dx) 113 return dx.At(i) 114 }, res) 115 fq2.ReduceElement(res, op1) 116 117 fq2.MOVQ("x+8(FP)", ax) 118 119 fq2.LabelRegisters("x.A1", op1...) 120 fq2.Mov(ax, op1, fq2.NbWords) 121 122 fq2.MOVQ("res+0(FP)", dx) 123 fq2.Mov(res, dx, 0, fq2.NbWords) 124 fq2.Mov(op1, res) 125 126 // op1 and res both contains x.A1 at this point 127 // res+0(FP) (z.A1) must not be referenced. 128 129 // a = a0 + a1 130 fq2.Comment("Add(&x.A0, &x.A1)") 131 fq2.Add(ax, op1) 132 //--> must save on stack 133 a0a1 := fq2.PopN(®isters, true) 134 fq2.Mov(op1, a0a1) 135 136 zero := amd64.BP 137 fq2.XORQ(zero, zero) 138 139 // b = a0 - a1 140 fq2.Comment("Sub(&x.A0, &x.A1)") 141 fq2.Mov(ax, op1) 142 fq2.Sub(res, op1) 143 fq2.modReduceAfterSubScratch(zero, op1, res) // using res as scratch registers 144 145 // a = a * b 146 fq2.MulADX(®isters, xat, func(i int) string { return string(a0a1[i]) }, res) 147 fq2.ReduceElement(res, op1) 148 149 fq2.MOVQ("res+0(FP)", ax) 150 fq2.Mov(res, ax) 151 152 // result.a0 = a 153 fq2.RET() 154 155 // No adx 156 if forceCheck { 157 fq2.LABEL(noAdx) 158 fq2.MOVQ("res+0(FP)", amd64.AX) 159 fq2.MOVQ(amd64.AX, "(SP)") 160 fq2.MOVQ("x+8(FP)", amd64.AX) 161 fq2.MOVQ(amd64.AX, "8(SP)") 162 fq2.WriteLn("CALL ·squareGenericE2(SB)") 163 fq2.RET() 164 } 165 166 fq2.Push(®isters, a0a1...) 167 } 168 169 func (fq2 *Fq2Amd64) generateMulE2BLS381(forceCheck bool) { 170 // var a, b, c fp.Element 171 // a.Add(&x.A0, &x.A1) 172 // b.Add(&y.A0, &y.A1) 173 // a.Mul(&a, &b) 174 // b.Mul(&x.A0, &y.A0) 175 // c.Mul(&x.A1, &y.A1) 176 // z.A1.Sub(&a, &b).Sub(&z.A1, &c) 177 // z.A0.Sub(&b, &c) 178 179 // we need a bit of stack space to store the results of the xA0yA0 and xA1yA1 multiplications 180 const argSize = 24 181 minStackSize := 0 182 if forceCheck { 183 minStackSize = argSize 184 } 185 stackSize := fq2.StackSize(fq2.NbWords*4, 2, minStackSize) 186 registers := fq2.FnHeader("mulAdxE2", stackSize, argSize, amd64.DX, amd64.AX) 187 defer fq2.AssertCleanStack(stackSize, minStackSize) 188 189 fq2.WriteLn("NO_LOCAL_POINTERS") 190 191 fq2.WriteLn(` 192 // var a, b, c fp.Element 193 // a.Add(&x.A0, &x.A1) 194 // b.Add(&y.A0, &y.A1) 195 // a.Mul(&a, &b) 196 // b.Mul(&x.A0, &y.A0) 197 // c.Mul(&x.A1, &y.A1) 198 // z.A1.Sub(&a, &b).Sub(&z.A1, &c) 199 // z.A0.Sub(&b, &c) 200 `) 201 202 lblNoAdx := fq2.NewLabel() 203 // check ADX instruction support 204 if forceCheck { 205 fq2.CMPB("·supportAdx(SB)", 1) 206 fq2.JNE(lblNoAdx) 207 } 208 209 // used in the mul operation 210 op1 := registers.PopN(fq2.NbWords) 211 res := registers.PopN(fq2.NbWords) 212 213 xat := func(i int) string { 214 return string(op1[i]) 215 } 216 217 ax := amd64.AX 218 dx := amd64.DX 219 220 aStack := fq2.PopN(®isters, true) 221 cStack := fq2.PopN(®isters, true) 222 223 fq2.MOVQ("x+8(FP)", ax) 224 225 // c = x.A1 * y.A1 226 fq2.Mov(ax, op1, fq2.NbWords) 227 fq2.MulADX(®isters, xat, func(i int) string { 228 fq2.MOVQ("y+16(FP)", dx) 229 return dx.At(i + fq2.NbWords) 230 }, res) 231 fq2.ReduceElement(res, op1) 232 // res = x.A1 * y.A1 233 // pushing on stack for later use. 234 fq2.Mov(res, cStack) 235 236 fq2.MOVQ("x+8(FP)", ax) 237 fq2.MOVQ("y+16(FP)", dx) 238 239 // a = x.a0 + x.a1 240 fq2.Mov(ax, op1, fq2.NbWords) 241 fq2.Add(ax, op1) 242 fq2.Mov(op1, aStack) 243 244 // b = y.a0 + y.a1 245 fq2.Mov(dx, op1) 246 fq2.Add(dx, op1, fq2.NbWords) 247 // --> note, we don't reduce, as this is used as input to the mul which accept input of size D-1/2 -1 248 249 // a = a * b = (x.a0 + x.a1) * (y.a0 + y.a1) 250 fq2.MulADX(®isters, xat, func(i int) string { 251 return string(aStack[i]) 252 }, res) 253 fq2.ReduceElement(res, op1) 254 255 // moving result to the stack. 256 fq2.Mov(res, aStack) 257 258 // b = x.A0 * y.AO 259 fq2.MOVQ("x+8(FP)", ax) 260 261 fq2.Mov(ax, op1) 262 263 fq2.MulADX(®isters, xat, func(i int) string { 264 fq2.MOVQ("y+16(FP)", dx) 265 return dx.At(i) 266 }, res) 267 fq2.ReduceElement(res, op1) 268 269 zero := dx 270 fq2.XORQ(zero, zero) 271 272 // a = a - b -c 273 fq2.Mov(aStack, op1) 274 fq2.Sub(res, op1) // a -= b 275 fq2.Mov(res, aStack) 276 fq2.modReduceAfterSubScratch(zero, op1, res) 277 278 fq2.Sub(cStack, op1) // a -= c 279 fq2.modReduceAfterSubScratch(zero, op1, res) 280 281 fq2.MOVQ("z+0(FP)", ax) 282 fq2.Mov(op1, ax, 0, fq2.NbWords) 283 284 // b = b - c 285 fq2.Mov(aStack, res) 286 fq2.Sub(cStack, res) // b -= c 287 fq2.modReduceAfterSubScratch(zero, res, op1) 288 289 fq2.Mov(res, ax) 290 291 fq2.RET() 292 293 // No adx 294 if forceCheck { 295 fq2.LABEL(lblNoAdx) 296 fq2.MOVQ("z+0(FP)", amd64.AX) 297 fq2.MOVQ(amd64.AX, "(SP)") 298 fq2.MOVQ("x+8(FP)", amd64.AX) 299 fq2.MOVQ(amd64.AX, "8(SP)") 300 fq2.MOVQ("y+16(FP)", amd64.AX) 301 fq2.MOVQ(amd64.AX, "16(SP)") 302 fq2.WriteLn("CALL ·mulGenericE2(SB)") 303 fq2.RET() 304 305 } 306 307 fq2.Push(®isters, aStack...) 308 fq2.Push(®isters, cStack...) 309 310 }