github.com/consensys/gnark-crypto@v0.14.0/internal/generator/tower/asm/amd64/e2_bn254.go (about)

     1  // Copyright 2020 ConsenSys Software Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package amd64
    16  
    17  import (
    18  	"bytes"
    19  	"html/template"
    20  	"io"
    21  	"strings"
    22  
    23  	"github.com/consensys/bavard/amd64"
    24  	gamd64 "github.com/consensys/gnark-crypto/field/generator/asm/amd64"
    25  )
    26  
    27  func (fq2 *Fq2Amd64) generateMulByNonResidueE2BN254() {
    28  	// 	var a, b fp.Element
    29  	// 	a.Double(&x.A0).Double(&a).Double(&a).fq2.Add(&a, &x.A0).fq2.Sub(&a, &x.A1)
    30  	// 	b.Double(&x.A1).Double(&b).Double(&b).fq2.Add(&b, &x.A1).fq2.Add(&b, &x.A0)
    31  	// 	z.A0.Set(&a)
    32  	// 	z.A1.Set(&b)
    33  	registers := fq2.FnHeader("mulNonResE2", 0, 16)
    34  
    35  	a := registers.PopN(fq2.NbWords)
    36  	b := registers.PopN(fq2.NbWords)
    37  	x := registers.Pop()
    38  
    39  	fq2.MOVQ("x+8(FP)", x)
    40  	fq2.Mov(x, a) // a = a0
    41  
    42  	fq2.Add(a, a)
    43  	fq2.Reduce(&registers, a)
    44  
    45  	fq2.Add(a, a)
    46  	fq2.Reduce(&registers, a)
    47  
    48  	fq2.Add(a, a)
    49  	fq2.Reduce(&registers, a)
    50  
    51  	fq2.Add(x, a)
    52  	fq2.Reduce(&registers, a)
    53  
    54  	fq2.Mov(x, b, fq2.NbWords) // b = a1
    55  	zero := registers.Pop()
    56  	fq2.XORQ(zero, zero)
    57  	fq2.Sub(b, a)
    58  	fq2.modReduceAfterSub(&registers, zero, a)
    59  	registers.Push(zero)
    60  
    61  	fq2.Add(b, b)
    62  	fq2.Reduce(&registers, b)
    63  
    64  	fq2.Add(b, b)
    65  	fq2.Reduce(&registers, b)
    66  
    67  	fq2.Add(b, b)
    68  	fq2.Reduce(&registers, b)
    69  
    70  	fq2.Add(x, b, fq2.NbWords)
    71  	fq2.Reduce(&registers, b)
    72  	fq2.Add(x, b)
    73  	fq2.Reduce(&registers, b)
    74  
    75  	fq2.MOVQ("res+0(FP)", x)
    76  	fq2.Mov(a, x)
    77  	fq2.Mov(b, x, 0, fq2.NbWords)
    78  
    79  	fq2.RET()
    80  }
    81  
    82  func (fq2 *Fq2Amd64) generateSquareE2BN254(forceCheck bool) {
    83  
    84  	const argSize = 16
    85  	minStackSize := 0
    86  	if forceCheck {
    87  		minStackSize = argSize
    88  	}
    89  
    90  	stackSize := fq2.StackSize(fq2.NbWords*3, 2, minStackSize)
    91  	registers := fq2.FnHeader("squareAdxE2", stackSize, argSize, amd64.DX, amd64.AX)
    92  	defer fq2.AssertCleanStack(stackSize, minStackSize)
    93  	fq2.WriteLn("NO_LOCAL_POINTERS")
    94  
    95  	fq2.WriteLn(`
    96  	// z.A0 = (x.A0 + x.A1) * (x.A0 - x.A1)
    97  	// z.A1 = 2 * x.A0 * x.A1
    98  	`)
    99  
   100  	// check ADX instruction support
   101  	lblNoAdx := fq2.NewLabel()
   102  	if forceCheck {
   103  		fq2.CMPB("·supportAdx(SB)", 1)
   104  		fq2.JNE(lblNoAdx)
   105  	}
   106  
   107  	// used in the mul operation
   108  	op1 := registers.PopN(fq2.NbWords)
   109  	op2 := registers.PopN(fq2.NbWords)
   110  	res := registers.PopN(fq2.NbWords)
   111  
   112  	ax := amd64.AX
   113  	dx := amd64.DX
   114  	// b = a0 * a1 * 2
   115  
   116  	fq2.Comment("2 * x.A0 * x.A1")
   117  	fq2.MOVQ("x+8(FP)", ax)
   118  
   119  	fq2.LabelRegisters("x.A0", op2...)
   120  	fq2.Mov(ax, op2)
   121  
   122  	fq2.LabelRegisters("2 * x.A1", op1...)
   123  	fq2.Mov(ax, op1, fq2.NbWords)
   124  	fq2.Add(op1, op1) // op1, no reduce
   125  
   126  	fq2.mulElement()
   127  	fq2.ReduceElement(res, op1)
   128  
   129  	fq2.MOVQ("x+8(FP)", ax)
   130  
   131  	fq2.LabelRegisters("x.A1", op1...)
   132  	fq2.Mov(ax, op1, fq2.NbWords)
   133  
   134  	fq2.MOVQ("res+0(FP)", dx)
   135  	fq2.Mov(res, dx, 0, fq2.NbWords)
   136  	fq2.Mov(op1, res)
   137  
   138  	// a = a0 + a1
   139  	fq2.Comment("Add(&x.A0, &x.A1)")
   140  	fq2.Add(op2, op1)
   141  
   142  	zero := amd64.BP
   143  	fq2.XORQ(zero, zero)
   144  
   145  	// b = a0 - a1
   146  	fq2.Comment("Sub(&x.A0, &x.A1)")
   147  	fq2.Sub(res, op2)
   148  	fq2.modReduceAfterSubScratch(zero, op2, res) // using res as scratch registers
   149  
   150  	// a = a * b
   151  	fq2.mulElement()
   152  	fq2.ReduceElement(res, op1)
   153  
   154  	fq2.MOVQ("res+0(FP)", ax)
   155  	fq2.Mov(res, ax)
   156  
   157  	// result.a0 = a
   158  	fq2.RET()
   159  
   160  	// No adx
   161  	if forceCheck {
   162  		fq2.LABEL(lblNoAdx)
   163  		fq2.MOVQ("res+0(FP)", amd64.AX)
   164  		fq2.MOVQ(amd64.AX, "(SP)")
   165  		fq2.MOVQ("x+8(FP)", amd64.AX)
   166  		fq2.MOVQ(amd64.AX, "8(SP)")
   167  		fq2.WriteLn("CALL ·squareGenericE2(SB)")
   168  		fq2.RET()
   169  	}
   170  
   171  }
   172  
   173  func (fq2 *Fq2Amd64) generateMulE2BN254(forceCheck bool) {
   174  	const argSize = 24
   175  	minStackSize := 0
   176  	if forceCheck {
   177  		minStackSize = argSize
   178  	}
   179  	stackSize := fq2.StackSize(fq2.NbWords*5, 2, minStackSize)
   180  	registers := fq2.FnHeader("mulAdxE2", stackSize, argSize, amd64.DX, amd64.AX)
   181  	defer fq2.AssertCleanStack(stackSize, minStackSize)
   182  
   183  	fq2.WriteLn("NO_LOCAL_POINTERS")
   184  	fq2.WriteLn(`
   185  	// var a, b, c fp.Element
   186  	// a.Add(&x.A0, &x.A1)
   187  	// b.Add(&y.A0, &y.A1)
   188  	// a.Mul(&a, &b)
   189  	// b.Mul(&x.A0, &y.A0)
   190  	// c.Mul(&x.A1, &y.A1)
   191  	// z.A1.Sub(&a, &b).Sub(&z.A1, &c)
   192  	// z.A0.Sub(&b, &c)
   193  	`)
   194  	lblNoAdx := fq2.NewLabel()
   195  
   196  	if forceCheck {
   197  		fq2.CMPB("·supportAdx(SB)", 1)
   198  		fq2.JNE(lblNoAdx)
   199  	}
   200  
   201  	// used in the mul operation
   202  	op1 := registers.PopN(fq2.NbWords)
   203  	op2 := registers.PopN(fq2.NbWords)
   204  	res := registers.PopN(fq2.NbWords)
   205  
   206  	ax := amd64.AX
   207  	dx := amd64.DX
   208  
   209  	aStack := fq2.PopN(&registers, true)
   210  	cStack := fq2.PopN(&registers, true)
   211  
   212  	fq2.MOVQ("x+8(FP)", ax)
   213  	fq2.MOVQ("y+16(FP)", dx)
   214  
   215  	// c = x.A1 * y.A1
   216  	fq2.Mov(ax, op1, fq2.NbWords)
   217  	fq2.Mov(dx, op2, fq2.NbWords)
   218  
   219  	fq2.mulElement()
   220  	fq2.ReduceElement(res, op2)
   221  	// res = x.A1 * y.A1
   222  	// pushing on stack for later use.
   223  	fq2.Mov(res, cStack)
   224  
   225  	fq2.MOVQ("x+8(FP)", ax)
   226  	fq2.MOVQ("y+16(FP)", dx)
   227  
   228  	// a = x.a0 + x.a1
   229  	fq2.Add(ax, op1)
   230  
   231  	// b = y.a0 + y.a1
   232  	fq2.Mov(dx, op2)
   233  	fq2.Add(dx, op2, fq2.NbWords)
   234  	// --> note, we don't reduce, as this is used as input to the mul which accept input of size D-1/2 -1
   235  	// TODO @gbotrel prove the upper bound / lower bound case for the no_carry mul
   236  
   237  	// a = 	a * b = (x.a0 + x.a1) *  (y.a0 + y.a1)
   238  	fq2.mulElement()
   239  	fq2.ReduceElement(res, op2)
   240  
   241  	// moving result to the stack.
   242  	fq2.Mov(res, aStack)
   243  
   244  	// b = x.A0 * y.AO
   245  	fq2.MOVQ("x+8(FP)", ax)
   246  	fq2.MOVQ("y+16(FP)", dx)
   247  
   248  	fq2.Mov(ax, op1)
   249  	fq2.Mov(dx, op2)
   250  
   251  	fq2.mulElement()
   252  	fq2.ReduceElement(res, op2)
   253  
   254  	zero := dx
   255  	fq2.XORQ(zero, zero)
   256  
   257  	// a = a - b -c
   258  	fq2.Mov(aStack, op1)
   259  	fq2.Sub(res, op1) // a -= b
   260  	fq2.modReduceAfterSubScratch(zero, op1, op2)
   261  
   262  	fq2.Sub(cStack, op1) // a -= c
   263  	fq2.modReduceAfterSubScratch(zero, op1, op2)
   264  
   265  	fq2.MOVQ("res+0(FP)", ax)
   266  	fq2.Mov(op1, ax, 0, fq2.NbWords)
   267  
   268  	// b = b - c
   269  	fq2.Mov(cStack, op2)
   270  	fq2.Sub(op2, res) // b -= c
   271  	fq2.modReduceAfterSubScratch(zero, res, op1)
   272  
   273  	fq2.Mov(res, ax)
   274  
   275  	fq2.RET()
   276  
   277  	// No adx
   278  	if forceCheck {
   279  		fq2.LABEL(lblNoAdx)
   280  		fq2.MOVQ("res+0(FP)", amd64.AX)
   281  		fq2.MOVQ(amd64.AX, "(SP)")
   282  		fq2.MOVQ("x+8(FP)", amd64.AX)
   283  		fq2.MOVQ(amd64.AX, "8(SP)")
   284  		fq2.MOVQ("y+16(FP)", amd64.AX)
   285  		fq2.MOVQ(amd64.AX, "16(SP)")
   286  		fq2.WriteLn("CALL ·mulGenericE2(SB)")
   287  		fq2.RET()
   288  	}
   289  	fq2.Push(&registers, aStack...)
   290  	fq2.Push(&registers, cStack...)
   291  
   292  }
   293  
   294  func (fq2 *Fq2Amd64) generateMulDefine() {
   295  	r := amd64.NewRegisters()
   296  	r.Remove(amd64.DX)
   297  	r.Remove(amd64.AX)
   298  	op1 := r.PopN(fq2.NbWords)
   299  	op2 := r.PopN(fq2.NbWords)
   300  	res := r.PopN(fq2.NbWords)
   301  	xat := func(i int) string {
   302  		return string(op1[i])
   303  	}
   304  	yat := func(i int) string {
   305  		return string(op2[i])
   306  	}
   307  
   308  	wd := writerDefine{fq2.w}
   309  	tw := gamd64.NewFFAmd64(&wd, fq2.F)
   310  
   311  	_, _ = io.WriteString(fq2.w, "// this code is generated and identical to fp.Mul(...)\n")
   312  	_, _ = io.WriteString(fq2.w, "#define MUL() \\ \n")
   313  	tw.MulADX(&r, xat, yat, res)
   314  }
   315  
   316  func (fq2 *Fq2Amd64) mulElement() {
   317  	r := amd64.NewRegisters()
   318  	r.Remove(amd64.DX)
   319  	r.Remove(amd64.AX)
   320  	op1 := r.PopN(fq2.NbWords)
   321  	op2 := r.PopN(fq2.NbWords)
   322  	res := r.PopN(fq2.NbWords)
   323  	const tmplMul = `// mul ({{- range $i, $a := .A}}{{$a}}{{- if ne $.Last $i}},{{ end}}{{- end}}) with ({{- range $i, $b := .B}}{{$b}}{{- if ne $.Last $i}},{{ end}}{{- end}}) into ({{- range $i, $c := .C}}{{$c}}{{- if ne $.Last $i}},{{ end}}{{- end}})
   324  	MUL()`
   325  
   326  	var buf bytes.Buffer
   327  	err := template.Must(template.New("").
   328  		Parse(tmplMul)).Execute(&buf, struct {
   329  		A, B, C []amd64.Register
   330  		Last    int
   331  	}{op1, op2, res, len(op1) - 1})
   332  
   333  	if err != nil {
   334  		panic(err)
   335  	}
   336  
   337  	fq2.WriteLn(buf.String())
   338  	fq2.WriteLn("")
   339  }
   340  
   341  type writerDefine struct {
   342  	w io.Writer
   343  }
   344  
   345  func (w *writerDefine) Write(p []byte) (n int, err error) {
   346  	line := string(p)
   347  	line = strings.TrimSpace(line)
   348  	if strings.HasPrefix(line, "//") {
   349  		return // drop comments
   350  	}
   351  	line = string(p)
   352  	line = strings.ReplaceAll(line, "\n", "; \\ \n")
   353  	return io.WriteString(w.w, line)
   354  }