github.com/consensys/gnark-crypto@v0.14.0/ecc/bw6-756/fr/element_ops_amd64.s (about) 1 // +build !purego 2 3 // Copyright 2020 ConsenSys Software Inc. 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 #include "textflag.h" 18 #include "funcdata.h" 19 20 // modulus q 21 DATA q<>+0(SB)/8, $0x9948a20000000001 22 DATA q<>+8(SB)/8, $0xce97f76a822c0000 23 DATA q<>+16(SB)/8, $0x980dc360d0a49d7f 24 DATA q<>+24(SB)/8, $0x84059eb647102326 25 DATA q<>+32(SB)/8, $0x53cb5d240ed107a2 26 DATA q<>+40(SB)/8, $0x03eeb0416684d190 27 GLOBL q<>(SB), (RODATA+NOPTR), $48 28 29 // qInv0 q'[0] 30 DATA qInv0<>(SB)/8, $0x9948a1ffffffffff 31 GLOBL qInv0<>(SB), (RODATA+NOPTR), $8 32 33 #define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \ 34 MOVQ ra0, rb0; \ 35 SUBQ q<>(SB), ra0; \ 36 MOVQ ra1, rb1; \ 37 SBBQ q<>+8(SB), ra1; \ 38 MOVQ ra2, rb2; \ 39 SBBQ q<>+16(SB), ra2; \ 40 MOVQ ra3, rb3; \ 41 SBBQ q<>+24(SB), ra3; \ 42 MOVQ ra4, rb4; \ 43 SBBQ q<>+32(SB), ra4; \ 44 MOVQ ra5, rb5; \ 45 SBBQ q<>+40(SB), ra5; \ 46 CMOVQCS rb0, ra0; \ 47 CMOVQCS rb1, ra1; \ 48 CMOVQCS rb2, ra2; \ 49 CMOVQCS rb3, ra3; \ 50 CMOVQCS rb4, ra4; \ 51 CMOVQCS rb5, ra5; \ 52 53 TEXT ·reduce(SB), NOSPLIT, $0-8 54 MOVQ res+0(FP), AX 55 MOVQ 0(AX), DX 56 MOVQ 8(AX), CX 57 MOVQ 16(AX), BX 58 MOVQ 24(AX), SI 59 MOVQ 32(AX), DI 60 MOVQ 40(AX), R8 61 62 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 63 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 64 65 MOVQ DX, 0(AX) 66 MOVQ CX, 8(AX) 67 MOVQ BX, 16(AX) 68 MOVQ SI, 24(AX) 69 MOVQ DI, 32(AX) 70 MOVQ R8, 40(AX) 71 RET 72 73 // MulBy3(x *Element) 74 TEXT ·MulBy3(SB), NOSPLIT, $0-8 75 MOVQ x+0(FP), AX 76 MOVQ 0(AX), DX 77 MOVQ 8(AX), CX 78 MOVQ 16(AX), BX 79 MOVQ 24(AX), SI 80 MOVQ 32(AX), DI 81 MOVQ 40(AX), R8 82 ADDQ DX, DX 83 ADCQ CX, CX 84 ADCQ BX, BX 85 ADCQ SI, SI 86 ADCQ DI, DI 87 ADCQ R8, R8 88 89 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 90 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 91 92 ADDQ 0(AX), DX 93 ADCQ 8(AX), CX 94 ADCQ 16(AX), BX 95 ADCQ 24(AX), SI 96 ADCQ 32(AX), DI 97 ADCQ 40(AX), R8 98 99 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13) 100 REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13) 101 102 MOVQ DX, 0(AX) 103 MOVQ CX, 8(AX) 104 MOVQ BX, 16(AX) 105 MOVQ SI, 24(AX) 106 MOVQ DI, 32(AX) 107 MOVQ R8, 40(AX) 108 RET 109 110 // MulBy5(x *Element) 111 TEXT ·MulBy5(SB), NOSPLIT, $0-8 112 MOVQ x+0(FP), AX 113 MOVQ 0(AX), DX 114 MOVQ 8(AX), CX 115 MOVQ 16(AX), BX 116 MOVQ 24(AX), SI 117 MOVQ 32(AX), DI 118 MOVQ 40(AX), R8 119 ADDQ DX, DX 120 ADCQ CX, CX 121 ADCQ BX, BX 122 ADCQ SI, SI 123 ADCQ DI, DI 124 ADCQ R8, R8 125 126 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 127 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 128 129 ADDQ DX, DX 130 ADCQ CX, CX 131 ADCQ BX, BX 132 ADCQ SI, SI 133 ADCQ DI, DI 134 ADCQ R8, R8 135 136 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13) 137 REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13) 138 139 ADDQ 0(AX), DX 140 ADCQ 8(AX), CX 141 ADCQ 16(AX), BX 142 ADCQ 24(AX), SI 143 ADCQ 32(AX), DI 144 ADCQ 40(AX), R8 145 146 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R14,R15,R9,R10,R11,R12) 147 REDUCE(DX,CX,BX,SI,DI,R8,R14,R15,R9,R10,R11,R12) 148 149 MOVQ DX, 0(AX) 150 MOVQ CX, 8(AX) 151 MOVQ BX, 16(AX) 152 MOVQ SI, 24(AX) 153 MOVQ DI, 32(AX) 154 MOVQ R8, 40(AX) 155 RET 156 157 // MulBy13(x *Element) 158 TEXT ·MulBy13(SB), $40-8 159 MOVQ x+0(FP), AX 160 MOVQ 0(AX), DX 161 MOVQ 8(AX), CX 162 MOVQ 16(AX), BX 163 MOVQ 24(AX), SI 164 MOVQ 32(AX), DI 165 MOVQ 40(AX), R8 166 ADDQ DX, DX 167 ADCQ CX, CX 168 ADCQ BX, BX 169 ADCQ SI, SI 170 ADCQ DI, DI 171 ADCQ R8, R8 172 173 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 174 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 175 176 ADDQ DX, DX 177 ADCQ CX, CX 178 ADCQ BX, BX 179 ADCQ SI, SI 180 ADCQ DI, DI 181 ADCQ R8, R8 182 183 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP)) 184 REDUCE(DX,CX,BX,SI,DI,R8,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP)) 185 186 MOVQ DX, R15 187 MOVQ CX, s0-8(SP) 188 MOVQ BX, s1-16(SP) 189 MOVQ SI, s2-24(SP) 190 MOVQ DI, s3-32(SP) 191 MOVQ R8, s4-40(SP) 192 ADDQ DX, DX 193 ADCQ CX, CX 194 ADCQ BX, BX 195 ADCQ SI, SI 196 ADCQ DI, DI 197 ADCQ R8, R8 198 199 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 200 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 201 202 ADDQ R15, DX 203 ADCQ s0-8(SP), CX 204 ADCQ s1-16(SP), BX 205 ADCQ s2-24(SP), SI 206 ADCQ s3-32(SP), DI 207 ADCQ s4-40(SP), R8 208 209 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 210 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 211 212 ADDQ 0(AX), DX 213 ADCQ 8(AX), CX 214 ADCQ 16(AX), BX 215 ADCQ 24(AX), SI 216 ADCQ 32(AX), DI 217 ADCQ 40(AX), R8 218 219 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 220 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 221 222 MOVQ DX, 0(AX) 223 MOVQ CX, 8(AX) 224 MOVQ BX, 16(AX) 225 MOVQ SI, 24(AX) 226 MOVQ DI, 32(AX) 227 MOVQ R8, 40(AX) 228 RET 229 230 // Butterfly(a, b *Element) sets a = a + b; b = a - b 231 TEXT ·Butterfly(SB), $48-16 232 MOVQ a+0(FP), AX 233 MOVQ 0(AX), CX 234 MOVQ 8(AX), BX 235 MOVQ 16(AX), SI 236 MOVQ 24(AX), DI 237 MOVQ 32(AX), R8 238 MOVQ 40(AX), R9 239 MOVQ CX, R10 240 MOVQ BX, R11 241 MOVQ SI, R12 242 MOVQ DI, R13 243 MOVQ R8, R14 244 MOVQ R9, R15 245 XORQ AX, AX 246 MOVQ b+8(FP), DX 247 ADDQ 0(DX), CX 248 ADCQ 8(DX), BX 249 ADCQ 16(DX), SI 250 ADCQ 24(DX), DI 251 ADCQ 32(DX), R8 252 ADCQ 40(DX), R9 253 SUBQ 0(DX), R10 254 SBBQ 8(DX), R11 255 SBBQ 16(DX), R12 256 SBBQ 24(DX), R13 257 SBBQ 32(DX), R14 258 SBBQ 40(DX), R15 259 MOVQ CX, s0-8(SP) 260 MOVQ BX, s1-16(SP) 261 MOVQ SI, s2-24(SP) 262 MOVQ DI, s3-32(SP) 263 MOVQ R8, s4-40(SP) 264 MOVQ R9, s5-48(SP) 265 MOVQ $0x9948a20000000001, CX 266 MOVQ $0xce97f76a822c0000, BX 267 MOVQ $0x980dc360d0a49d7f, SI 268 MOVQ $0x84059eb647102326, DI 269 MOVQ $0x53cb5d240ed107a2, R8 270 MOVQ $0x03eeb0416684d190, R9 271 CMOVQCC AX, CX 272 CMOVQCC AX, BX 273 CMOVQCC AX, SI 274 CMOVQCC AX, DI 275 CMOVQCC AX, R8 276 CMOVQCC AX, R9 277 ADDQ CX, R10 278 ADCQ BX, R11 279 ADCQ SI, R12 280 ADCQ DI, R13 281 ADCQ R8, R14 282 ADCQ R9, R15 283 MOVQ s0-8(SP), CX 284 MOVQ s1-16(SP), BX 285 MOVQ s2-24(SP), SI 286 MOVQ s3-32(SP), DI 287 MOVQ s4-40(SP), R8 288 MOVQ s5-48(SP), R9 289 MOVQ R10, 0(DX) 290 MOVQ R11, 8(DX) 291 MOVQ R12, 16(DX) 292 MOVQ R13, 24(DX) 293 MOVQ R14, 32(DX) 294 MOVQ R15, 40(DX) 295 296 // reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15) 297 REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15) 298 299 MOVQ a+0(FP), AX 300 MOVQ CX, 0(AX) 301 MOVQ BX, 8(AX) 302 MOVQ SI, 16(AX) 303 MOVQ DI, 24(AX) 304 MOVQ R8, 32(AX) 305 MOVQ R9, 40(AX) 306 RET