github.com/consensys/gnark-crypto@v0.14.0/internal/generator/tower/asm/amd64/e2_bn254.go (about) 1 // Copyright 2020 ConsenSys Software Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package amd64 16 17 import ( 18 "bytes" 19 "html/template" 20 "io" 21 "strings" 22 23 "github.com/consensys/bavard/amd64" 24 gamd64 "github.com/consensys/gnark-crypto/field/generator/asm/amd64" 25 ) 26 27 func (fq2 *Fq2Amd64) generateMulByNonResidueE2BN254() { 28 // var a, b fp.Element 29 // a.Double(&x.A0).Double(&a).Double(&a).fq2.Add(&a, &x.A0).fq2.Sub(&a, &x.A1) 30 // b.Double(&x.A1).Double(&b).Double(&b).fq2.Add(&b, &x.A1).fq2.Add(&b, &x.A0) 31 // z.A0.Set(&a) 32 // z.A1.Set(&b) 33 registers := fq2.FnHeader("mulNonResE2", 0, 16) 34 35 a := registers.PopN(fq2.NbWords) 36 b := registers.PopN(fq2.NbWords) 37 x := registers.Pop() 38 39 fq2.MOVQ("x+8(FP)", x) 40 fq2.Mov(x, a) // a = a0 41 42 fq2.Add(a, a) 43 fq2.Reduce(®isters, a) 44 45 fq2.Add(a, a) 46 fq2.Reduce(®isters, a) 47 48 fq2.Add(a, a) 49 fq2.Reduce(®isters, a) 50 51 fq2.Add(x, a) 52 fq2.Reduce(®isters, a) 53 54 fq2.Mov(x, b, fq2.NbWords) // b = a1 55 zero := registers.Pop() 56 fq2.XORQ(zero, zero) 57 fq2.Sub(b, a) 58 fq2.modReduceAfterSub(®isters, zero, a) 59 registers.Push(zero) 60 61 fq2.Add(b, b) 62 fq2.Reduce(®isters, b) 63 64 fq2.Add(b, b) 65 fq2.Reduce(®isters, b) 66 67 fq2.Add(b, b) 68 fq2.Reduce(®isters, b) 69 70 fq2.Add(x, b, fq2.NbWords) 71 fq2.Reduce(®isters, b) 72 fq2.Add(x, b) 73 fq2.Reduce(®isters, b) 74 75 fq2.MOVQ("res+0(FP)", x) 76 fq2.Mov(a, x) 77 fq2.Mov(b, x, 0, fq2.NbWords) 78 79 fq2.RET() 80 } 81 82 func (fq2 *Fq2Amd64) generateSquareE2BN254(forceCheck bool) { 83 84 const argSize = 16 85 minStackSize := 0 86 if forceCheck { 87 minStackSize = argSize 88 } 89 90 stackSize := fq2.StackSize(fq2.NbWords*3, 2, minStackSize) 91 registers := fq2.FnHeader("squareAdxE2", stackSize, argSize, amd64.DX, amd64.AX) 92 defer fq2.AssertCleanStack(stackSize, minStackSize) 93 fq2.WriteLn("NO_LOCAL_POINTERS") 94 95 fq2.WriteLn(` 96 // z.A0 = (x.A0 + x.A1) * (x.A0 - x.A1) 97 // z.A1 = 2 * x.A0 * x.A1 98 `) 99 100 // check ADX instruction support 101 lblNoAdx := fq2.NewLabel() 102 if forceCheck { 103 fq2.CMPB("·supportAdx(SB)", 1) 104 fq2.JNE(lblNoAdx) 105 } 106 107 // used in the mul operation 108 op1 := registers.PopN(fq2.NbWords) 109 op2 := registers.PopN(fq2.NbWords) 110 res := registers.PopN(fq2.NbWords) 111 112 ax := amd64.AX 113 dx := amd64.DX 114 // b = a0 * a1 * 2 115 116 fq2.Comment("2 * x.A0 * x.A1") 117 fq2.MOVQ("x+8(FP)", ax) 118 119 fq2.LabelRegisters("x.A0", op2...) 120 fq2.Mov(ax, op2) 121 122 fq2.LabelRegisters("2 * x.A1", op1...) 123 fq2.Mov(ax, op1, fq2.NbWords) 124 fq2.Add(op1, op1) // op1, no reduce 125 126 fq2.mulElement() 127 fq2.ReduceElement(res, op1) 128 129 fq2.MOVQ("x+8(FP)", ax) 130 131 fq2.LabelRegisters("x.A1", op1...) 132 fq2.Mov(ax, op1, fq2.NbWords) 133 134 fq2.MOVQ("res+0(FP)", dx) 135 fq2.Mov(res, dx, 0, fq2.NbWords) 136 fq2.Mov(op1, res) 137 138 // a = a0 + a1 139 fq2.Comment("Add(&x.A0, &x.A1)") 140 fq2.Add(op2, op1) 141 142 zero := amd64.BP 143 fq2.XORQ(zero, zero) 144 145 // b = a0 - a1 146 fq2.Comment("Sub(&x.A0, &x.A1)") 147 fq2.Sub(res, op2) 148 fq2.modReduceAfterSubScratch(zero, op2, res) // using res as scratch registers 149 150 // a = a * b 151 fq2.mulElement() 152 fq2.ReduceElement(res, op1) 153 154 fq2.MOVQ("res+0(FP)", ax) 155 fq2.Mov(res, ax) 156 157 // result.a0 = a 158 fq2.RET() 159 160 // No adx 161 if forceCheck { 162 fq2.LABEL(lblNoAdx) 163 fq2.MOVQ("res+0(FP)", amd64.AX) 164 fq2.MOVQ(amd64.AX, "(SP)") 165 fq2.MOVQ("x+8(FP)", amd64.AX) 166 fq2.MOVQ(amd64.AX, "8(SP)") 167 fq2.WriteLn("CALL ·squareGenericE2(SB)") 168 fq2.RET() 169 } 170 171 } 172 173 func (fq2 *Fq2Amd64) generateMulE2BN254(forceCheck bool) { 174 const argSize = 24 175 minStackSize := 0 176 if forceCheck { 177 minStackSize = argSize 178 } 179 stackSize := fq2.StackSize(fq2.NbWords*5, 2, minStackSize) 180 registers := fq2.FnHeader("mulAdxE2", stackSize, argSize, amd64.DX, amd64.AX) 181 defer fq2.AssertCleanStack(stackSize, minStackSize) 182 183 fq2.WriteLn("NO_LOCAL_POINTERS") 184 fq2.WriteLn(` 185 // var a, b, c fp.Element 186 // a.Add(&x.A0, &x.A1) 187 // b.Add(&y.A0, &y.A1) 188 // a.Mul(&a, &b) 189 // b.Mul(&x.A0, &y.A0) 190 // c.Mul(&x.A1, &y.A1) 191 // z.A1.Sub(&a, &b).Sub(&z.A1, &c) 192 // z.A0.Sub(&b, &c) 193 `) 194 lblNoAdx := fq2.NewLabel() 195 196 if forceCheck { 197 fq2.CMPB("·supportAdx(SB)", 1) 198 fq2.JNE(lblNoAdx) 199 } 200 201 // used in the mul operation 202 op1 := registers.PopN(fq2.NbWords) 203 op2 := registers.PopN(fq2.NbWords) 204 res := registers.PopN(fq2.NbWords) 205 206 ax := amd64.AX 207 dx := amd64.DX 208 209 aStack := fq2.PopN(®isters, true) 210 cStack := fq2.PopN(®isters, true) 211 212 fq2.MOVQ("x+8(FP)", ax) 213 fq2.MOVQ("y+16(FP)", dx) 214 215 // c = x.A1 * y.A1 216 fq2.Mov(ax, op1, fq2.NbWords) 217 fq2.Mov(dx, op2, fq2.NbWords) 218 219 fq2.mulElement() 220 fq2.ReduceElement(res, op2) 221 // res = x.A1 * y.A1 222 // pushing on stack for later use. 223 fq2.Mov(res, cStack) 224 225 fq2.MOVQ("x+8(FP)", ax) 226 fq2.MOVQ("y+16(FP)", dx) 227 228 // a = x.a0 + x.a1 229 fq2.Add(ax, op1) 230 231 // b = y.a0 + y.a1 232 fq2.Mov(dx, op2) 233 fq2.Add(dx, op2, fq2.NbWords) 234 // --> note, we don't reduce, as this is used as input to the mul which accept input of size D-1/2 -1 235 // TODO @gbotrel prove the upper bound / lower bound case for the no_carry mul 236 237 // a = a * b = (x.a0 + x.a1) * (y.a0 + y.a1) 238 fq2.mulElement() 239 fq2.ReduceElement(res, op2) 240 241 // moving result to the stack. 242 fq2.Mov(res, aStack) 243 244 // b = x.A0 * y.AO 245 fq2.MOVQ("x+8(FP)", ax) 246 fq2.MOVQ("y+16(FP)", dx) 247 248 fq2.Mov(ax, op1) 249 fq2.Mov(dx, op2) 250 251 fq2.mulElement() 252 fq2.ReduceElement(res, op2) 253 254 zero := dx 255 fq2.XORQ(zero, zero) 256 257 // a = a - b -c 258 fq2.Mov(aStack, op1) 259 fq2.Sub(res, op1) // a -= b 260 fq2.modReduceAfterSubScratch(zero, op1, op2) 261 262 fq2.Sub(cStack, op1) // a -= c 263 fq2.modReduceAfterSubScratch(zero, op1, op2) 264 265 fq2.MOVQ("res+0(FP)", ax) 266 fq2.Mov(op1, ax, 0, fq2.NbWords) 267 268 // b = b - c 269 fq2.Mov(cStack, op2) 270 fq2.Sub(op2, res) // b -= c 271 fq2.modReduceAfterSubScratch(zero, res, op1) 272 273 fq2.Mov(res, ax) 274 275 fq2.RET() 276 277 // No adx 278 if forceCheck { 279 fq2.LABEL(lblNoAdx) 280 fq2.MOVQ("res+0(FP)", amd64.AX) 281 fq2.MOVQ(amd64.AX, "(SP)") 282 fq2.MOVQ("x+8(FP)", amd64.AX) 283 fq2.MOVQ(amd64.AX, "8(SP)") 284 fq2.MOVQ("y+16(FP)", amd64.AX) 285 fq2.MOVQ(amd64.AX, "16(SP)") 286 fq2.WriteLn("CALL ·mulGenericE2(SB)") 287 fq2.RET() 288 } 289 fq2.Push(®isters, aStack...) 290 fq2.Push(®isters, cStack...) 291 292 } 293 294 func (fq2 *Fq2Amd64) generateMulDefine() { 295 r := amd64.NewRegisters() 296 r.Remove(amd64.DX) 297 r.Remove(amd64.AX) 298 op1 := r.PopN(fq2.NbWords) 299 op2 := r.PopN(fq2.NbWords) 300 res := r.PopN(fq2.NbWords) 301 xat := func(i int) string { 302 return string(op1[i]) 303 } 304 yat := func(i int) string { 305 return string(op2[i]) 306 } 307 308 wd := writerDefine{fq2.w} 309 tw := gamd64.NewFFAmd64(&wd, fq2.F) 310 311 _, _ = io.WriteString(fq2.w, "// this code is generated and identical to fp.Mul(...)\n") 312 _, _ = io.WriteString(fq2.w, "#define MUL() \\ \n") 313 tw.MulADX(&r, xat, yat, res) 314 } 315 316 func (fq2 *Fq2Amd64) mulElement() { 317 r := amd64.NewRegisters() 318 r.Remove(amd64.DX) 319 r.Remove(amd64.AX) 320 op1 := r.PopN(fq2.NbWords) 321 op2 := r.PopN(fq2.NbWords) 322 res := r.PopN(fq2.NbWords) 323 const tmplMul = `// mul ({{- range $i, $a := .A}}{{$a}}{{- if ne $.Last $i}},{{ end}}{{- end}}) with ({{- range $i, $b := .B}}{{$b}}{{- if ne $.Last $i}},{{ end}}{{- end}}) into ({{- range $i, $c := .C}}{{$c}}{{- if ne $.Last $i}},{{ end}}{{- end}}) 324 MUL()` 325 326 var buf bytes.Buffer 327 err := template.Must(template.New(""). 328 Parse(tmplMul)).Execute(&buf, struct { 329 A, B, C []amd64.Register 330 Last int 331 }{op1, op2, res, len(op1) - 1}) 332 333 if err != nil { 334 panic(err) 335 } 336 337 fq2.WriteLn(buf.String()) 338 fq2.WriteLn("") 339 } 340 341 type writerDefine struct { 342 w io.Writer 343 } 344 345 func (w *writerDefine) Write(p []byte) (n int, err error) { 346 line := string(p) 347 line = strings.TrimSpace(line) 348 if strings.HasPrefix(line, "//") { 349 return // drop comments 350 } 351 line = string(p) 352 line = strings.ReplaceAll(line, "\n", "; \\ \n") 353 return io.WriteString(w.w, line) 354 }