github.com/consensys/gnark-crypto@v0.14.0/field/generator/asm/amd64/element_frommont.go (about)

     1  // Copyright 2020 ConsenSys Software Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package amd64
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/consensys/bavard/amd64"
    21  )
    22  
    23  func (f *FFAmd64) generateFromMont(forceADX bool) {
    24  	const argSize = 8
    25  	minStackSize := argSize
    26  	if forceADX {
    27  		minStackSize = 0
    28  	}
    29  	stackSize := f.StackSize(f.NbWords*2, 2, minStackSize)
    30  
    31  	reserved := []amd64.Register{amd64.DX, amd64.AX}
    32  	if f.NbWords <= 5 {
    33  		// when dynamic linking, R15 is clobbered by a global variable access
    34  		// this is a temporary workaround --> don't use R15 when we can avoid it.
    35  		// see https://github.com/ConsenSys/gnark-crypto/issues/113
    36  		reserved = append(reserved, amd64.R15)
    37  	}
    38  	registers := f.FnHeader("fromMont", stackSize, argSize, reserved...)
    39  	defer f.AssertCleanStack(stackSize, minStackSize)
    40  
    41  	if stackSize > 0 {
    42  		f.WriteLn("NO_LOCAL_POINTERS")
    43  	}
    44  	f.WriteLn(`
    45  	// the algorithm is described here
    46  	// https://hackmd.io/@gnark/modular_multiplication
    47  	// when y = 1 we have: 
    48  	// for i=0 to N-1
    49  	// 		t[i] = x[i]
    50  	// for i=0 to N-1
    51  	// 		m := t[0]*q'[0] mod W
    52  	// 		C,_ := t[0] + m*q[0]
    53  	// 		for j=1 to N-1
    54  	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
    55  	// 		t[N-1] = C`)
    56  
    57  	noAdx := f.NewLabel()
    58  	if !forceADX {
    59  		// check ADX instruction support
    60  		f.CMPB("·supportAdx(SB)", 1)
    61  		f.JNE(noAdx)
    62  	}
    63  
    64  	// registers
    65  	t := registers.PopN(f.NbWords)
    66  
    67  	f.MOVQ("res+0(FP)", amd64.DX)
    68  
    69  	// 	for i=0 to N-1
    70  	//     t[i] = a[i]
    71  	f.Mov(amd64.DX, t)
    72  
    73  	for i := 0; i < f.NbWords; i++ {
    74  
    75  		f.XORQ(amd64.DX, amd64.DX)
    76  
    77  		// m := t[0]*q'[0] mod W
    78  		f.Comment("m := t[0]*q'[0] mod W")
    79  		m := amd64.DX
    80  		f.MOVQ(f.qInv0(), m)
    81  		f.IMULQ(t[0], m)
    82  
    83  		// clear the carry flags
    84  		f.XORQ(amd64.AX, amd64.AX)
    85  
    86  		// C,_ := t[0] + m*q[0]
    87  		f.Comment("C,_ := t[0] + m*q[0]")
    88  
    89  		f.MULXQ(f.qAt(0), amd64.AX, amd64.BP)
    90  		f.ADCXQ(t[0], amd64.AX)
    91  		f.MOVQ(amd64.BP, t[0])
    92  
    93  		// for j=1 to N-1
    94  		//    (C,t[j-1]) := t[j] + m*q[j] + C
    95  		for j := 1; j < f.NbWords; j++ {
    96  			f.Comment(fmt.Sprintf("(C,t[%[1]d]) := t[%[2]d] + m*q[%[2]d] + C", j-1, j))
    97  			f.ADCXQ(t[j], t[j-1])
    98  			f.MULXQ(f.qAt(j), amd64.AX, t[j])
    99  			f.ADOXQ(amd64.AX, t[j-1])
   100  		}
   101  		f.MOVQ(0, amd64.AX)
   102  		f.ADCXQ(amd64.AX, t[f.NbWordsLastIndex])
   103  		f.ADOXQ(amd64.AX, t[f.NbWordsLastIndex])
   104  
   105  	}
   106  
   107  	// ---------------------------------------------------------------------------------------------
   108  	// reduce
   109  	f.Reduce(&registers, t)
   110  	f.MOVQ("res+0(FP)", amd64.AX)
   111  	f.Mov(t, amd64.AX)
   112  	f.RET()
   113  
   114  	// No adx
   115  	if !forceADX {
   116  		f.LABEL(noAdx)
   117  		f.MOVQ("res+0(FP)", amd64.AX)
   118  		f.MOVQ(amd64.AX, "(SP)")
   119  		f.WriteLn("CALL ·_fromMontGeneric(SB)")
   120  		f.RET()
   121  	}
   122  
   123  }