github.com/consensys/gnark-crypto@v0.14.0/internal/generator/tower/asm/amd64/e2.go (about)

     1  // Copyright 2020 ConsenSys Software Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package amd64
    16  
    17  import (
    18  	"io"
    19  
    20  	"github.com/consensys/bavard"
    21  	ramd64 "github.com/consensys/bavard/amd64"
    22  	"github.com/consensys/gnark-crypto/field/generator/asm/amd64"
    23  	field "github.com/consensys/gnark-crypto/field/generator/config"
    24  	"github.com/consensys/gnark-crypto/internal/generator/config"
    25  )
    26  
    27  // Fq2Amd64 ...
    28  type Fq2Amd64 struct {
    29  	*amd64.FFAmd64
    30  	config config.Curve
    31  	w      io.Writer
    32  	F      *field.FieldConfig
    33  }
    34  
    35  // NewFq2Amd64 ...
    36  func NewFq2Amd64(w io.Writer, F *field.FieldConfig, config config.Curve) *Fq2Amd64 {
    37  	return &Fq2Amd64{
    38  		amd64.NewFFAmd64(w, F),
    39  		config,
    40  		w,
    41  		F,
    42  	}
    43  }
    44  
    45  // Generate ...
    46  func (fq2 *Fq2Amd64) Generate(forceADXCheck bool) error {
    47  	fq2.WriteLn(bavard.Apache2Header("ConsenSys Software Inc.", 2020))
    48  
    49  	fq2.WriteLn("#include \"textflag.h\"")
    50  	fq2.WriteLn("#include \"funcdata.h\"")
    51  
    52  	fq2.GenerateDefines()
    53  	if fq2.config.Equal(config.BN254) {
    54  		fq2.generateMulDefine()
    55  	}
    56  
    57  	fq2.generateAddE2()
    58  	fq2.generateDoubleE2()
    59  	fq2.generateSubE2()
    60  	fq2.generateNegE2()
    61  
    62  	if fq2.config.Equal(config.BN254) {
    63  		fq2.generateMulByNonResidueE2BN254()
    64  		fq2.generateMulE2BN254(forceADXCheck)
    65  		fq2.generateSquareE2BN254(forceADXCheck)
    66  	} else if fq2.config.Equal(config.BLS12_381) {
    67  		fq2.generateMulByNonResidueE2BLS381()
    68  		fq2.generateSquareE2BLS381(forceADXCheck)
    69  		fq2.generateMulE2BLS381(forceADXCheck)
    70  	}
    71  
    72  	return nil
    73  }
    74  
    75  func (fq2 *Fq2Amd64) generateAddE2() {
    76  	registers := fq2.FnHeader("addE2", 0, 24)
    77  
    78  	// registers
    79  	x := registers.Pop()
    80  	y := registers.Pop()
    81  	r := registers.Pop()
    82  	t := registers.PopN(fq2.NbWords)
    83  
    84  	fq2.MOVQ("x+8(FP)", x)
    85  
    86  	// move t = x
    87  	fq2.Mov(x, t)
    88  
    89  	fq2.MOVQ("y+16(FP)", y)
    90  
    91  	// t = t + y = x + y
    92  	fq2.Add(y, t)
    93  
    94  	// reduce
    95  	fq2.Reduce(&registers, t)
    96  	fq2.MOVQ("res+0(FP)", r)
    97  	fq2.Mov(t, r)
    98  
    99  	// move x+offset(fq2.NbWords) into t
   100  	fq2.Mov(x, t, fq2.NbWords)
   101  
   102  	// add y+offset(fq2.NbWords) into t
   103  	fq2.Add(y, t, fq2.NbWords)
   104  
   105  	// reduce t into r with offset fq2.NbWords
   106  	fq2.Reduce(&registers, t)
   107  	fq2.Mov(t, r, 0, fq2.NbWords)
   108  
   109  	fq2.RET()
   110  
   111  }
   112  
   113  func (fq2 *Fq2Amd64) generateDoubleE2() {
   114  	// func header
   115  	registers := fq2.FnHeader("doubleE2", 0, 16)
   116  
   117  	// registers
   118  	x := registers.Pop()
   119  	r := registers.Pop()
   120  	t := registers.PopN(fq2.NbWords)
   121  
   122  	fq2.MOVQ("res+0(FP)", r)
   123  	fq2.MOVQ("x+8(FP)", x)
   124  
   125  	fq2.Mov(x, t)
   126  	fq2.Add(t, t)
   127  	fq2.Reduce(&registers, t)
   128  	fq2.Mov(t, r)
   129  	fq2.Mov(x, t, fq2.NbWords)
   130  	fq2.Add(t, t)
   131  	fq2.Reduce(&registers, t)
   132  	fq2.Mov(t, r, 0, fq2.NbWords)
   133  
   134  	fq2.RET()
   135  }
   136  
   137  func (fq2 *Fq2Amd64) generateNegE2() {
   138  	registers := fq2.FnHeader("negE2", 0, 16)
   139  
   140  	nonZeroA := fq2.NewLabel()
   141  	nonZeroB := fq2.NewLabel()
   142  	B := fq2.NewLabel()
   143  
   144  	// registers
   145  	x := registers.Pop()
   146  	r := registers.Pop()
   147  	q := registers.Pop()
   148  	t := registers.PopN(fq2.NbWords)
   149  
   150  	fq2.MOVQ("res+0(FP)", r)
   151  	fq2.MOVQ("x+8(FP)", x)
   152  
   153  	// t = x
   154  	fq2.Mov(x, t)
   155  
   156  	// x = t[0] | ... | t[n]
   157  	fq2.MOVQ(t[0], x)
   158  	for i := 1; i < fq2.NbWords; i++ {
   159  		fq2.ORQ(t[i], x)
   160  	}
   161  
   162  	fq2.TESTQ(x, x)
   163  
   164  	// if x != 0, we jump to nonzero label
   165  	fq2.JNE(nonZeroA)
   166  
   167  	// if x == 0, we set the result to zero and continue
   168  	for i := 0; i < fq2.NbWords; i++ {
   169  		fq2.MOVQ(x, r.At(i))
   170  	}
   171  	fq2.JMP(B)
   172  
   173  	fq2.LABEL(nonZeroA)
   174  
   175  	// z = x - q
   176  	for i := 0; i < fq2.NbWords; i++ {
   177  		fq2.MOVQ(fq2.Q[i], q)
   178  		if i == 0 {
   179  			fq2.SUBQ(t[i], q)
   180  		} else {
   181  			fq2.SBBQ(t[i], q)
   182  		}
   183  		fq2.MOVQ(q, r.At(i))
   184  	}
   185  
   186  	fq2.LABEL(B)
   187  	fq2.MOVQ("x+8(FP)", x)
   188  	fq2.Mov(x, t, fq2.NbWords)
   189  
   190  	// x = t[0] | ... | t[n]
   191  	fq2.MOVQ(t[0], x)
   192  	for i := 1; i < fq2.NbWords; i++ {
   193  		fq2.ORQ(t[i], x)
   194  	}
   195  
   196  	fq2.TESTQ(x, x)
   197  
   198  	// if x != 0, we jump to nonzero label
   199  	fq2.JNE(nonZeroB)
   200  
   201  	// if x == 0, we set the result to zero and return
   202  	for i := 0; i < fq2.NbWords; i++ {
   203  		fq2.MOVQ(x, r.At(i+fq2.NbWords))
   204  	}
   205  	fq2.RET()
   206  
   207  	fq2.LABEL(nonZeroB)
   208  
   209  	// z = x - q
   210  	for i := 0; i < fq2.NbWords; i++ {
   211  		fq2.MOVQ(fq2.Q[i], q)
   212  		if i == 0 {
   213  			fq2.SUBQ(t[i], q)
   214  		} else {
   215  			fq2.SBBQ(t[i], q)
   216  		}
   217  		fq2.MOVQ(q, r.At(i+fq2.NbWords))
   218  	}
   219  
   220  	fq2.RET()
   221  
   222  }
   223  
   224  func (fq2 *Fq2Amd64) generateSubE2() {
   225  	registers := fq2.FnHeader("subE2", 0, 24)
   226  
   227  	// registers
   228  	t := registers.PopN(fq2.NbWords)
   229  	xy := registers.Pop()
   230  
   231  	zero := registers.Pop()
   232  	fq2.XORQ(zero, zero)
   233  
   234  	fq2.MOVQ("x+8(FP)", xy)
   235  	fq2.Mov(xy, t)
   236  
   237  	// z = x - y mod q
   238  	// move t = x
   239  	fq2.MOVQ("y+16(FP)", xy)
   240  	fq2.Sub(xy, t)
   241  	fq2.MOVQ("x+8(FP)", xy)
   242  
   243  	fq2.modReduceAfterSub(&registers, zero, t)
   244  
   245  	r := registers.Pop()
   246  	fq2.MOVQ("res+0(FP)", r)
   247  	fq2.Mov(t, r)
   248  	registers.Push(r)
   249  
   250  	fq2.Mov(xy, t, fq2.NbWords)
   251  
   252  	// z = x - y mod q
   253  	// move t = x
   254  	fq2.MOVQ("y+16(FP)", xy)
   255  	fq2.Sub(xy, t, fq2.NbWords)
   256  
   257  	fq2.modReduceAfterSub(&registers, zero, t)
   258  
   259  	r = xy
   260  	fq2.MOVQ("res+0(FP)", r)
   261  
   262  	fq2.Mov(t, r, 0, fq2.NbWords)
   263  
   264  	fq2.RET()
   265  
   266  }
   267  
   268  func (fq2 *Fq2Amd64) modReduceAfterSub(registers *ramd64.Registers, zero ramd64.Register, t []ramd64.Register) {
   269  	q := registers.PopN(fq2.NbWords)
   270  	fq2.modReduceAfterSubScratch(zero, t, q)
   271  	registers.Push(q...)
   272  }
   273  
   274  func (fq2 *Fq2Amd64) modReduceAfterSubScratch(zero ramd64.Register, t, scratch []ramd64.Register) {
   275  	fq2.Mov(fq2.Q, scratch)
   276  	for i := 0; i < fq2.NbWords; i++ {
   277  		fq2.CMOVQCC(zero, scratch[i])
   278  	}
   279  	// add registers (q or 0) to t, and set to result
   280  	fq2.Add(scratch, t)
   281  }