github.com/consensys/gnark-crypto@v0.14.0/field/generator/asm/amd64/element_frommont.go (about) 1 // Copyright 2020 ConsenSys Software Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package amd64 16 17 import ( 18 "fmt" 19 20 "github.com/consensys/bavard/amd64" 21 ) 22 23 func (f *FFAmd64) generateFromMont(forceADX bool) { 24 const argSize = 8 25 minStackSize := argSize 26 if forceADX { 27 minStackSize = 0 28 } 29 stackSize := f.StackSize(f.NbWords*2, 2, minStackSize) 30 31 reserved := []amd64.Register{amd64.DX, amd64.AX} 32 if f.NbWords <= 5 { 33 // when dynamic linking, R15 is clobbered by a global variable access 34 // this is a temporary workaround --> don't use R15 when we can avoid it. 35 // see https://github.com/ConsenSys/gnark-crypto/issues/113 36 reserved = append(reserved, amd64.R15) 37 } 38 registers := f.FnHeader("fromMont", stackSize, argSize, reserved...) 39 defer f.AssertCleanStack(stackSize, minStackSize) 40 41 if stackSize > 0 { 42 f.WriteLn("NO_LOCAL_POINTERS") 43 } 44 f.WriteLn(` 45 // the algorithm is described here 46 // https://hackmd.io/@gnark/modular_multiplication 47 // when y = 1 we have: 48 // for i=0 to N-1 49 // t[i] = x[i] 50 // for i=0 to N-1 51 // m := t[0]*q'[0] mod W 52 // C,_ := t[0] + m*q[0] 53 // for j=1 to N-1 54 // (C,t[j-1]) := t[j] + m*q[j] + C 55 // t[N-1] = C`) 56 57 noAdx := f.NewLabel() 58 if !forceADX { 59 // check ADX instruction support 60 f.CMPB("·supportAdx(SB)", 1) 61 f.JNE(noAdx) 62 } 63 64 // registers 65 t := registers.PopN(f.NbWords) 66 67 f.MOVQ("res+0(FP)", amd64.DX) 68 69 // for i=0 to N-1 70 // t[i] = a[i] 71 f.Mov(amd64.DX, t) 72 73 for i := 0; i < f.NbWords; i++ { 74 75 f.XORQ(amd64.DX, amd64.DX) 76 77 // m := t[0]*q'[0] mod W 78 f.Comment("m := t[0]*q'[0] mod W") 79 m := amd64.DX 80 f.MOVQ(f.qInv0(), m) 81 f.IMULQ(t[0], m) 82 83 // clear the carry flags 84 f.XORQ(amd64.AX, amd64.AX) 85 86 // C,_ := t[0] + m*q[0] 87 f.Comment("C,_ := t[0] + m*q[0]") 88 89 f.MULXQ(f.qAt(0), amd64.AX, amd64.BP) 90 f.ADCXQ(t[0], amd64.AX) 91 f.MOVQ(amd64.BP, t[0]) 92 93 // for j=1 to N-1 94 // (C,t[j-1]) := t[j] + m*q[j] + C 95 for j := 1; j < f.NbWords; j++ { 96 f.Comment(fmt.Sprintf("(C,t[%[1]d]) := t[%[2]d] + m*q[%[2]d] + C", j-1, j)) 97 f.ADCXQ(t[j], t[j-1]) 98 f.MULXQ(f.qAt(j), amd64.AX, t[j]) 99 f.ADOXQ(amd64.AX, t[j-1]) 100 } 101 f.MOVQ(0, amd64.AX) 102 f.ADCXQ(amd64.AX, t[f.NbWordsLastIndex]) 103 f.ADOXQ(amd64.AX, t[f.NbWordsLastIndex]) 104 105 } 106 107 // --------------------------------------------------------------------------------------------- 108 // reduce 109 f.Reduce(®isters, t) 110 f.MOVQ("res+0(FP)", amd64.AX) 111 f.Mov(t, amd64.AX) 112 f.RET() 113 114 // No adx 115 if !forceADX { 116 f.LABEL(noAdx) 117 f.MOVQ("res+0(FP)", amd64.AX) 118 f.MOVQ(amd64.AX, "(SP)") 119 f.WriteLn("CALL ·_fromMontGeneric(SB)") 120 f.RET() 121 } 122 123 }