github.com/consensys/gnark-crypto@v0.14.0/internal/generator/tower/asm/amd64/e2.go (about) 1 // Copyright 2020 ConsenSys Software Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package amd64 16 17 import ( 18 "io" 19 20 "github.com/consensys/bavard" 21 ramd64 "github.com/consensys/bavard/amd64" 22 "github.com/consensys/gnark-crypto/field/generator/asm/amd64" 23 field "github.com/consensys/gnark-crypto/field/generator/config" 24 "github.com/consensys/gnark-crypto/internal/generator/config" 25 ) 26 27 // Fq2Amd64 ... 28 type Fq2Amd64 struct { 29 *amd64.FFAmd64 30 config config.Curve 31 w io.Writer 32 F *field.FieldConfig 33 } 34 35 // NewFq2Amd64 ... 36 func NewFq2Amd64(w io.Writer, F *field.FieldConfig, config config.Curve) *Fq2Amd64 { 37 return &Fq2Amd64{ 38 amd64.NewFFAmd64(w, F), 39 config, 40 w, 41 F, 42 } 43 } 44 45 // Generate ... 46 func (fq2 *Fq2Amd64) Generate(forceADXCheck bool) error { 47 fq2.WriteLn(bavard.Apache2Header("ConsenSys Software Inc.", 2020)) 48 49 fq2.WriteLn("#include \"textflag.h\"") 50 fq2.WriteLn("#include \"funcdata.h\"") 51 52 fq2.GenerateDefines() 53 if fq2.config.Equal(config.BN254) { 54 fq2.generateMulDefine() 55 } 56 57 fq2.generateAddE2() 58 fq2.generateDoubleE2() 59 fq2.generateSubE2() 60 fq2.generateNegE2() 61 62 if fq2.config.Equal(config.BN254) { 63 fq2.generateMulByNonResidueE2BN254() 64 fq2.generateMulE2BN254(forceADXCheck) 65 fq2.generateSquareE2BN254(forceADXCheck) 66 } else if fq2.config.Equal(config.BLS12_381) { 67 fq2.generateMulByNonResidueE2BLS381() 68 fq2.generateSquareE2BLS381(forceADXCheck) 69 fq2.generateMulE2BLS381(forceADXCheck) 70 } 71 72 return nil 73 } 74 75 func (fq2 *Fq2Amd64) generateAddE2() { 76 registers := fq2.FnHeader("addE2", 0, 24) 77 78 // registers 79 x := registers.Pop() 80 y := registers.Pop() 81 r := registers.Pop() 82 t := registers.PopN(fq2.NbWords) 83 84 fq2.MOVQ("x+8(FP)", x) 85 86 // move t = x 87 fq2.Mov(x, t) 88 89 fq2.MOVQ("y+16(FP)", y) 90 91 // t = t + y = x + y 92 fq2.Add(y, t) 93 94 // reduce 95 fq2.Reduce(®isters, t) 96 fq2.MOVQ("res+0(FP)", r) 97 fq2.Mov(t, r) 98 99 // move x+offset(fq2.NbWords) into t 100 fq2.Mov(x, t, fq2.NbWords) 101 102 // add y+offset(fq2.NbWords) into t 103 fq2.Add(y, t, fq2.NbWords) 104 105 // reduce t into r with offset fq2.NbWords 106 fq2.Reduce(®isters, t) 107 fq2.Mov(t, r, 0, fq2.NbWords) 108 109 fq2.RET() 110 111 } 112 113 func (fq2 *Fq2Amd64) generateDoubleE2() { 114 // func header 115 registers := fq2.FnHeader("doubleE2", 0, 16) 116 117 // registers 118 x := registers.Pop() 119 r := registers.Pop() 120 t := registers.PopN(fq2.NbWords) 121 122 fq2.MOVQ("res+0(FP)", r) 123 fq2.MOVQ("x+8(FP)", x) 124 125 fq2.Mov(x, t) 126 fq2.Add(t, t) 127 fq2.Reduce(®isters, t) 128 fq2.Mov(t, r) 129 fq2.Mov(x, t, fq2.NbWords) 130 fq2.Add(t, t) 131 fq2.Reduce(®isters, t) 132 fq2.Mov(t, r, 0, fq2.NbWords) 133 134 fq2.RET() 135 } 136 137 func (fq2 *Fq2Amd64) generateNegE2() { 138 registers := fq2.FnHeader("negE2", 0, 16) 139 140 nonZeroA := fq2.NewLabel() 141 nonZeroB := fq2.NewLabel() 142 B := fq2.NewLabel() 143 144 // registers 145 x := registers.Pop() 146 r := registers.Pop() 147 q := registers.Pop() 148 t := registers.PopN(fq2.NbWords) 149 150 fq2.MOVQ("res+0(FP)", r) 151 fq2.MOVQ("x+8(FP)", x) 152 153 // t = x 154 fq2.Mov(x, t) 155 156 // x = t[0] | ... | t[n] 157 fq2.MOVQ(t[0], x) 158 for i := 1; i < fq2.NbWords; i++ { 159 fq2.ORQ(t[i], x) 160 } 161 162 fq2.TESTQ(x, x) 163 164 // if x != 0, we jump to nonzero label 165 fq2.JNE(nonZeroA) 166 167 // if x == 0, we set the result to zero and continue 168 for i := 0; i < fq2.NbWords; i++ { 169 fq2.MOVQ(x, r.At(i)) 170 } 171 fq2.JMP(B) 172 173 fq2.LABEL(nonZeroA) 174 175 // z = x - q 176 for i := 0; i < fq2.NbWords; i++ { 177 fq2.MOVQ(fq2.Q[i], q) 178 if i == 0 { 179 fq2.SUBQ(t[i], q) 180 } else { 181 fq2.SBBQ(t[i], q) 182 } 183 fq2.MOVQ(q, r.At(i)) 184 } 185 186 fq2.LABEL(B) 187 fq2.MOVQ("x+8(FP)", x) 188 fq2.Mov(x, t, fq2.NbWords) 189 190 // x = t[0] | ... | t[n] 191 fq2.MOVQ(t[0], x) 192 for i := 1; i < fq2.NbWords; i++ { 193 fq2.ORQ(t[i], x) 194 } 195 196 fq2.TESTQ(x, x) 197 198 // if x != 0, we jump to nonzero label 199 fq2.JNE(nonZeroB) 200 201 // if x == 0, we set the result to zero and return 202 for i := 0; i < fq2.NbWords; i++ { 203 fq2.MOVQ(x, r.At(i+fq2.NbWords)) 204 } 205 fq2.RET() 206 207 fq2.LABEL(nonZeroB) 208 209 // z = x - q 210 for i := 0; i < fq2.NbWords; i++ { 211 fq2.MOVQ(fq2.Q[i], q) 212 if i == 0 { 213 fq2.SUBQ(t[i], q) 214 } else { 215 fq2.SBBQ(t[i], q) 216 } 217 fq2.MOVQ(q, r.At(i+fq2.NbWords)) 218 } 219 220 fq2.RET() 221 222 } 223 224 func (fq2 *Fq2Amd64) generateSubE2() { 225 registers := fq2.FnHeader("subE2", 0, 24) 226 227 // registers 228 t := registers.PopN(fq2.NbWords) 229 xy := registers.Pop() 230 231 zero := registers.Pop() 232 fq2.XORQ(zero, zero) 233 234 fq2.MOVQ("x+8(FP)", xy) 235 fq2.Mov(xy, t) 236 237 // z = x - y mod q 238 // move t = x 239 fq2.MOVQ("y+16(FP)", xy) 240 fq2.Sub(xy, t) 241 fq2.MOVQ("x+8(FP)", xy) 242 243 fq2.modReduceAfterSub(®isters, zero, t) 244 245 r := registers.Pop() 246 fq2.MOVQ("res+0(FP)", r) 247 fq2.Mov(t, r) 248 registers.Push(r) 249 250 fq2.Mov(xy, t, fq2.NbWords) 251 252 // z = x - y mod q 253 // move t = x 254 fq2.MOVQ("y+16(FP)", xy) 255 fq2.Sub(xy, t, fq2.NbWords) 256 257 fq2.modReduceAfterSub(®isters, zero, t) 258 259 r = xy 260 fq2.MOVQ("res+0(FP)", r) 261 262 fq2.Mov(t, r, 0, fq2.NbWords) 263 264 fq2.RET() 265 266 } 267 268 func (fq2 *Fq2Amd64) modReduceAfterSub(registers *ramd64.Registers, zero ramd64.Register, t []ramd64.Register) { 269 q := registers.PopN(fq2.NbWords) 270 fq2.modReduceAfterSubScratch(zero, t, q) 271 registers.Push(q...) 272 } 273 274 func (fq2 *Fq2Amd64) modReduceAfterSubScratch(zero ramd64.Register, t, scratch []ramd64.Register) { 275 fq2.Mov(fq2.Q, scratch) 276 for i := 0; i < fq2.NbWords; i++ { 277 fq2.CMOVQCC(zero, scratch[i]) 278 } 279 // add registers (q or 0) to t, and set to result 280 fq2.Add(scratch, t) 281 }