github.com/consensys/gnark-crypto@v0.14.0/internal/generator/tower/asm/amd64/e2_bls381.go (about)

     1  // Copyright 2020 ConsenSys Software Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package amd64
    16  
    17  import (
    18  	"github.com/consensys/bavard/amd64"
    19  )
    20  
    21  func (fq2 *Fq2Amd64) generateMulByNonResidueE2BLS381() {
    22  	// // MulByNonResidue multiplies a E2 by (1,1)
    23  	// func (z *E2) MulByNonResidue(x *E2) *E2 {
    24  	// 	var a fp.Element
    25  	// 	a.Sub(&x.A0, &x.A1)
    26  	// 	z.A1.Add(&x.A0, &x.A1)
    27  	// 	z.A0.Set(&a)
    28  	// 	return z
    29  	// }
    30  	registers := fq2.FnHeader("mulNonResE2", 0, 16)
    31  
    32  	a := registers.PopN(fq2.NbWords)
    33  	b := registers.PopN(fq2.NbWords)
    34  	x := registers.Pop()
    35  	tr := registers.Pop() // zero or r
    36  	fq2.XORQ(tr, tr)      // set to zero
    37  
    38  	fq2.MOVQ("x+8(FP)", x)
    39  	fq2.Mov(x, a) // a = a0
    40  
    41  	// a = x.A0 - x.A1
    42  	fq2.Sub(x, a, fq2.NbWords)
    43  	fq2.modReduceAfterSubScratch(tr, a, b)
    44  	// b = x.A0 + x.A1
    45  	fq2.Mov(x, b, fq2.NbWords) // b = a1
    46  	fq2.Add(x, b)
    47  
    48  	fq2.MOVQ("res+0(FP)", tr)
    49  	fq2.Mov(a, tr)
    50  	fq2.ReduceElement(b, a)
    51  	fq2.Mov(b, tr, 0, fq2.NbWords)
    52  
    53  	fq2.RET()
    54  }
    55  
    56  func (fq2 *Fq2Amd64) generateSquareE2BLS381(forceCheck bool) {
    57  	// // Square sets z to the E2-product of x,x returns z
    58  	// func (z *E2) Square(x *E2) *E2 {
    59  	// 	// adapted from algo 22 https://eprint.iacr.org/2010/354.pdf
    60  	// 	var a, b fp.Element
    61  	// 	a.Add(&x.A0, &x.A1)
    62  	// 	b.Sub(&x.A0, &x.A1)
    63  	// 	a.Mul(&a, &b)
    64  	// 	b.Mul(&x.A0, &x.A1).Double(&b)
    65  	// 	z.A0.Set(&a)
    66  	// 	z.A1.Set(&b)
    67  	// 	return z
    68  	// }
    69  	const argSize = 16
    70  	minStackSize := 0
    71  	if forceCheck {
    72  		minStackSize = argSize
    73  	}
    74  	stackSize := fq2.StackSize(fq2.NbWords*3, 2, minStackSize)
    75  	registers := fq2.FnHeader("squareAdxE2", stackSize, argSize, amd64.DX, amd64.AX)
    76  	defer fq2.AssertCleanStack(stackSize, minStackSize)
    77  	fq2.WriteLn("NO_LOCAL_POINTERS")
    78  
    79  	fq2.WriteLn(`
    80  	// z.A0 = (x.A0 + x.A1) * (x.A0 - x.A1)
    81  	// z.A1 = 2 * x.A0 * x.A1
    82  	`)
    83  
    84  	noAdx := fq2.NewLabel()
    85  	if forceCheck {
    86  		// check ADX instruction support
    87  		fq2.CMPB("·supportAdx(SB)", 1)
    88  		fq2.JNE(noAdx)
    89  	}
    90  
    91  	// used in the mul operation
    92  	op1 := registers.PopN(fq2.NbWords)
    93  	res := registers.PopN(fq2.NbWords)
    94  
    95  	xat := func(i int) string {
    96  		return string(op1[i])
    97  	}
    98  
    99  	ax := amd64.AX
   100  	dx := amd64.DX
   101  
   102  	// b = a0 * a1 * 2
   103  
   104  	fq2.Comment("2 * x.A0 * x.A1")
   105  	fq2.MOVQ("x+8(FP)", ax)
   106  
   107  	fq2.LabelRegisters("2 * x.A1", op1...)
   108  	fq2.Mov(ax, op1, fq2.NbWords)
   109  	fq2.Add(op1, op1) // op1, no reduce
   110  
   111  	fq2.MulADX(&registers, xat, func(i int) string {
   112  		fq2.MOVQ("x+8(FP)", dx)
   113  		return dx.At(i)
   114  	}, res)
   115  	fq2.ReduceElement(res, op1)
   116  
   117  	fq2.MOVQ("x+8(FP)", ax)
   118  
   119  	fq2.LabelRegisters("x.A1", op1...)
   120  	fq2.Mov(ax, op1, fq2.NbWords)
   121  
   122  	fq2.MOVQ("res+0(FP)", dx)
   123  	fq2.Mov(res, dx, 0, fq2.NbWords)
   124  	fq2.Mov(op1, res)
   125  
   126  	// op1 and res both contains x.A1 at this point
   127  	// res+0(FP) (z.A1) must not be referenced.
   128  
   129  	// a = a0 + a1
   130  	fq2.Comment("Add(&x.A0, &x.A1)")
   131  	fq2.Add(ax, op1)
   132  	//--> must save on stack
   133  	a0a1 := fq2.PopN(&registers, true)
   134  	fq2.Mov(op1, a0a1)
   135  
   136  	zero := amd64.BP
   137  	fq2.XORQ(zero, zero)
   138  
   139  	// b = a0 - a1
   140  	fq2.Comment("Sub(&x.A0, &x.A1)")
   141  	fq2.Mov(ax, op1)
   142  	fq2.Sub(res, op1)
   143  	fq2.modReduceAfterSubScratch(zero, op1, res) // using res as scratch registers
   144  
   145  	// a = a * b
   146  	fq2.MulADX(&registers, xat, func(i int) string { return string(a0a1[i]) }, res)
   147  	fq2.ReduceElement(res, op1)
   148  
   149  	fq2.MOVQ("res+0(FP)", ax)
   150  	fq2.Mov(res, ax)
   151  
   152  	// result.a0 = a
   153  	fq2.RET()
   154  
   155  	// No adx
   156  	if forceCheck {
   157  		fq2.LABEL(noAdx)
   158  		fq2.MOVQ("res+0(FP)", amd64.AX)
   159  		fq2.MOVQ(amd64.AX, "(SP)")
   160  		fq2.MOVQ("x+8(FP)", amd64.AX)
   161  		fq2.MOVQ(amd64.AX, "8(SP)")
   162  		fq2.WriteLn("CALL ·squareGenericE2(SB)")
   163  		fq2.RET()
   164  	}
   165  
   166  	fq2.Push(&registers, a0a1...)
   167  }
   168  
   169  func (fq2 *Fq2Amd64) generateMulE2BLS381(forceCheck bool) {
   170  	// var a, b, c fp.Element
   171  	// a.Add(&x.A0, &x.A1)
   172  	// b.Add(&y.A0, &y.A1)
   173  	// a.Mul(&a, &b)
   174  	// b.Mul(&x.A0, &y.A0)
   175  	// c.Mul(&x.A1, &y.A1)
   176  	// z.A1.Sub(&a, &b).Sub(&z.A1, &c)
   177  	// z.A0.Sub(&b, &c)
   178  
   179  	// we need a bit of stack space to store the results of the xA0yA0 and xA1yA1 multiplications
   180  	const argSize = 24
   181  	minStackSize := 0
   182  	if forceCheck {
   183  		minStackSize = argSize
   184  	}
   185  	stackSize := fq2.StackSize(fq2.NbWords*4, 2, minStackSize)
   186  	registers := fq2.FnHeader("mulAdxE2", stackSize, argSize, amd64.DX, amd64.AX)
   187  	defer fq2.AssertCleanStack(stackSize, minStackSize)
   188  
   189  	fq2.WriteLn("NO_LOCAL_POINTERS")
   190  
   191  	fq2.WriteLn(`
   192  	// var a, b, c fp.Element
   193  	// a.Add(&x.A0, &x.A1)
   194  	// b.Add(&y.A0, &y.A1)
   195  	// a.Mul(&a, &b)
   196  	// b.Mul(&x.A0, &y.A0)
   197  	// c.Mul(&x.A1, &y.A1)
   198  	// z.A1.Sub(&a, &b).Sub(&z.A1, &c)
   199  	// z.A0.Sub(&b, &c)
   200  	`)
   201  
   202  	lblNoAdx := fq2.NewLabel()
   203  	// check ADX instruction support
   204  	if forceCheck {
   205  		fq2.CMPB("·supportAdx(SB)", 1)
   206  		fq2.JNE(lblNoAdx)
   207  	}
   208  
   209  	// used in the mul operation
   210  	op1 := registers.PopN(fq2.NbWords)
   211  	res := registers.PopN(fq2.NbWords)
   212  
   213  	xat := func(i int) string {
   214  		return string(op1[i])
   215  	}
   216  
   217  	ax := amd64.AX
   218  	dx := amd64.DX
   219  
   220  	aStack := fq2.PopN(&registers, true)
   221  	cStack := fq2.PopN(&registers, true)
   222  
   223  	fq2.MOVQ("x+8(FP)", ax)
   224  
   225  	// c = x.A1 * y.A1
   226  	fq2.Mov(ax, op1, fq2.NbWords)
   227  	fq2.MulADX(&registers, xat, func(i int) string {
   228  		fq2.MOVQ("y+16(FP)", dx)
   229  		return dx.At(i + fq2.NbWords)
   230  	}, res)
   231  	fq2.ReduceElement(res, op1)
   232  	// res = x.A1 * y.A1
   233  	// pushing on stack for later use.
   234  	fq2.Mov(res, cStack)
   235  
   236  	fq2.MOVQ("x+8(FP)", ax)
   237  	fq2.MOVQ("y+16(FP)", dx)
   238  
   239  	// a = x.a0 + x.a1
   240  	fq2.Mov(ax, op1, fq2.NbWords)
   241  	fq2.Add(ax, op1)
   242  	fq2.Mov(op1, aStack)
   243  
   244  	// b = y.a0 + y.a1
   245  	fq2.Mov(dx, op1)
   246  	fq2.Add(dx, op1, fq2.NbWords)
   247  	// --> note, we don't reduce, as this is used as input to the mul which accept input of size D-1/2 -1
   248  
   249  	// a = 	a * b = (x.a0 + x.a1) *  (y.a0 + y.a1)
   250  	fq2.MulADX(&registers, xat, func(i int) string {
   251  		return string(aStack[i])
   252  	}, res)
   253  	fq2.ReduceElement(res, op1)
   254  
   255  	// moving result to the stack.
   256  	fq2.Mov(res, aStack)
   257  
   258  	// b = x.A0 * y.AO
   259  	fq2.MOVQ("x+8(FP)", ax)
   260  
   261  	fq2.Mov(ax, op1)
   262  
   263  	fq2.MulADX(&registers, xat, func(i int) string {
   264  		fq2.MOVQ("y+16(FP)", dx)
   265  		return dx.At(i)
   266  	}, res)
   267  	fq2.ReduceElement(res, op1)
   268  
   269  	zero := dx
   270  	fq2.XORQ(zero, zero)
   271  
   272  	// a = a - b -c
   273  	fq2.Mov(aStack, op1)
   274  	fq2.Sub(res, op1) // a -= b
   275  	fq2.Mov(res, aStack)
   276  	fq2.modReduceAfterSubScratch(zero, op1, res)
   277  
   278  	fq2.Sub(cStack, op1) // a -= c
   279  	fq2.modReduceAfterSubScratch(zero, op1, res)
   280  
   281  	fq2.MOVQ("z+0(FP)", ax)
   282  	fq2.Mov(op1, ax, 0, fq2.NbWords)
   283  
   284  	// b = b - c
   285  	fq2.Mov(aStack, res)
   286  	fq2.Sub(cStack, res) // b -= c
   287  	fq2.modReduceAfterSubScratch(zero, res, op1)
   288  
   289  	fq2.Mov(res, ax)
   290  
   291  	fq2.RET()
   292  
   293  	// No adx
   294  	if forceCheck {
   295  		fq2.LABEL(lblNoAdx)
   296  		fq2.MOVQ("z+0(FP)", amd64.AX)
   297  		fq2.MOVQ(amd64.AX, "(SP)")
   298  		fq2.MOVQ("x+8(FP)", amd64.AX)
   299  		fq2.MOVQ(amd64.AX, "8(SP)")
   300  		fq2.MOVQ("y+16(FP)", amd64.AX)
   301  		fq2.MOVQ(amd64.AX, "16(SP)")
   302  		fq2.WriteLn("CALL ·mulGenericE2(SB)")
   303  		fq2.RET()
   304  
   305  	}
   306  
   307  	fq2.Push(&registers, aStack...)
   308  	fq2.Push(&registers, cStack...)
   309  
   310  }