github.com/consensys/gnark-crypto@v0.14.0/field/generator/asm/amd64/element_butterfly.go (about)

     1  // Copyright 2020 ConsenSys Software Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package amd64
    16  
    17  // Butterfly sets
    18  // a = a + b
    19  // b = a - b
    20  //
    21  //	func Butterfly(a, b *{{.ElementName}}) {
    22  //		t := *a
    23  //		a.Add(a, b)
    24  //		b.Sub(&t, b)
    25  //	}
    26  func (f *FFAmd64) generateButterfly() {
    27  	f.Comment("Butterfly(a, b *Element) sets a = a + b; b = a - b")
    28  
    29  	nbRegisters := f.NbWords*3 + 2
    30  	if f.NbWords > 6 {
    31  		nbRegisters = 2*f.NbWords + 1
    32  	}
    33  	stackSize := f.StackSize(nbRegisters, 0, 0)
    34  	registers := f.FnHeader("Butterfly", stackSize, 16)
    35  	defer f.AssertCleanStack(stackSize, 0)
    36  
    37  	if f.NbWords <= 6 {
    38  		// registers
    39  		a := f.Pop(&registers)
    40  		b := f.Pop(&registers)
    41  		t0 := f.PopN(&registers)
    42  		t1 := f.PopN(&registers)
    43  		q := f.PopN(&registers)
    44  
    45  		// t = a
    46  		f.MOVQ("a+0(FP)", a)
    47  		f.Mov(a, t0)
    48  		f.Mov(t0, t1)
    49  		f.XORQ(a, a) // set a to zero for later reduction
    50  
    51  		f.MOVQ("b+8(FP)", b)
    52  		f.Add(b, t0) // t0 = a + b
    53  		f.Sub(b, t1) // t1 = a - b
    54  
    55  		// reduce t1
    56  		if f.NbWords >= 5 {
    57  			// q is on the stack, can't use for CMOVQCC
    58  			f.Mov(t0, q) // save t0
    59  			f.Mov(f.Q, t0)
    60  			for i := 0; i < f.NbWords; i++ {
    61  				f.CMOVQCC(a, t0[i])
    62  			}
    63  			// add registers (q or 0) to t, and set to result
    64  			f.Add(t0, t1)
    65  			f.Mov(q, t0) // restore t0
    66  		} else {
    67  			f.Mov(f.Q, q)
    68  			for i := 0; i < f.NbWords; i++ {
    69  				f.CMOVQCC(a, q[i])
    70  			}
    71  			// add registers (q or 0) to t, and set to result
    72  			f.Add(q, t1)
    73  		}
    74  
    75  		f.Push(&registers, q...)
    76  
    77  		// save t1
    78  		f.Mov(t1, b)
    79  
    80  		// reduce t0
    81  		f.ReduceElement(t0, t1)
    82  
    83  		// save t0
    84  		f.MOVQ("a+0(FP)", a)
    85  		f.Mov(t0, a)
    86  
    87  		f.RET()
    88  
    89  		f.Push(&registers, t0...)
    90  		f.Push(&registers, t1...)
    91  		f.Push(&registers, a, b)
    92  	} else {
    93  		// registers
    94  		r := f.Pop(&registers)
    95  		t0 := f.PopN(&registers)
    96  		t1 := f.PopN(&registers)
    97  
    98  		// t = a
    99  		f.MOVQ("b+8(FP)", r)
   100  		f.Mov(r, t0)
   101  
   102  		f.MOVQ("a+0(FP)", r)
   103  		f.Add(r, t0)  // t0 = a + b
   104  		f.Mov(t0, t1) // save t1 = t0
   105  		f.Mov(r, t0)  // t0 = a
   106  		f.MOVQ("b+8(FP)", r)
   107  		f.Sub(r, t0) // t0 = a - b
   108  
   109  		// reduce t0
   110  		noReduce := f.NewLabel()
   111  		f.JCC(noReduce)
   112  		q := r
   113  		f.MOVQ(f.Q[0], q)
   114  		f.ADDQ(q, t0[0])
   115  		for i := 1; i < f.NbWords; i++ {
   116  			f.MOVQ(f.Q[i], q)
   117  			f.ADCQ(q, t0[i])
   118  		}
   119  		f.LABEL(noReduce)
   120  
   121  		// save t1
   122  		f.MOVQ("b+8(FP)", r)
   123  		f.Mov(t0, r)
   124  
   125  		// reduce t0
   126  		f.Mov(t1, t0)
   127  		f.ReduceElement(t0, t1)
   128  
   129  		// save t0
   130  		f.MOVQ("a+0(FP)", r)
   131  		f.Mov(t0, r)
   132  
   133  		f.RET()
   134  
   135  		f.Push(&registers, t0...)
   136  		f.Push(&registers, t1...)
   137  		f.Push(&registers, r)
   138  	}
   139  
   140  }