github.com/consensys/gnark-crypto@v0.14.0/ecc/bw6-633/fp/element_ops_amd64.s (about) 1 // +build !purego 2 3 // Copyright 2020 ConsenSys Software Inc. 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 #include "textflag.h" 18 #include "funcdata.h" 19 20 // modulus q 21 DATA q<>+0(SB)/8, $0xd74916ea4570000d 22 DATA q<>+8(SB)/8, $0x3d369bd31147f73c 23 DATA q<>+16(SB)/8, $0xd7b5ce7ab839c225 24 DATA q<>+24(SB)/8, $0x7e0e8850edbda407 25 DATA q<>+32(SB)/8, $0xb8da9f5e83f57c49 26 DATA q<>+40(SB)/8, $0x8152a6c0fadea490 27 DATA q<>+48(SB)/8, $0x4e59769ad9bbda2f 28 DATA q<>+56(SB)/8, $0xa8fcd8c75d79d2c7 29 DATA q<>+64(SB)/8, $0xfc1a174f01d72ab5 30 DATA q<>+72(SB)/8, $0x0126633cc0f35f63 31 GLOBL q<>(SB), (RODATA+NOPTR), $80 32 33 // qInv0 q'[0] 34 DATA qInv0<>(SB)/8, $0xb50f29ab0b03b13b 35 GLOBL qInv0<>(SB), (RODATA+NOPTR), $8 36 37 #define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, ra6, ra7, ra8, ra9, rb0, rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8, rb9) \ 38 MOVQ ra0, rb0; \ 39 SUBQ q<>(SB), ra0; \ 40 MOVQ ra1, rb1; \ 41 SBBQ q<>+8(SB), ra1; \ 42 MOVQ ra2, rb2; \ 43 SBBQ q<>+16(SB), ra2; \ 44 MOVQ ra3, rb3; \ 45 SBBQ q<>+24(SB), ra3; \ 46 MOVQ ra4, rb4; \ 47 SBBQ q<>+32(SB), ra4; \ 48 MOVQ ra5, rb5; \ 49 SBBQ q<>+40(SB), ra5; \ 50 MOVQ ra6, rb6; \ 51 SBBQ q<>+48(SB), ra6; \ 52 MOVQ ra7, rb7; \ 53 SBBQ q<>+56(SB), ra7; \ 54 MOVQ ra8, rb8; \ 55 SBBQ q<>+64(SB), ra8; \ 56 MOVQ ra9, rb9; \ 57 SBBQ q<>+72(SB), ra9; \ 58 CMOVQCS rb0, ra0; \ 59 CMOVQCS rb1, ra1; \ 60 CMOVQCS rb2, ra2; \ 61 CMOVQCS rb3, ra3; \ 62 CMOVQCS rb4, ra4; \ 63 CMOVQCS rb5, ra5; \ 64 CMOVQCS rb6, ra6; \ 65 CMOVQCS rb7, ra7; \ 66 CMOVQCS rb8, ra8; \ 67 CMOVQCS rb9, ra9; \ 68 69 TEXT ·reduce(SB), $56-8 70 MOVQ res+0(FP), AX 71 MOVQ 0(AX), DX 72 MOVQ 8(AX), CX 73 MOVQ 16(AX), BX 74 MOVQ 24(AX), SI 75 MOVQ 32(AX), DI 76 MOVQ 40(AX), R8 77 MOVQ 48(AX), R9 78 MOVQ 56(AX), R10 79 MOVQ 64(AX), R11 80 MOVQ 72(AX), R12 81 82 // reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 83 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 84 85 MOVQ DX, 0(AX) 86 MOVQ CX, 8(AX) 87 MOVQ BX, 16(AX) 88 MOVQ SI, 24(AX) 89 MOVQ DI, 32(AX) 90 MOVQ R8, 40(AX) 91 MOVQ R9, 48(AX) 92 MOVQ R10, 56(AX) 93 MOVQ R11, 64(AX) 94 MOVQ R12, 72(AX) 95 RET 96 97 // MulBy3(x *Element) 98 TEXT ·MulBy3(SB), $56-8 99 MOVQ x+0(FP), AX 100 MOVQ 0(AX), DX 101 MOVQ 8(AX), CX 102 MOVQ 16(AX), BX 103 MOVQ 24(AX), SI 104 MOVQ 32(AX), DI 105 MOVQ 40(AX), R8 106 MOVQ 48(AX), R9 107 MOVQ 56(AX), R10 108 MOVQ 64(AX), R11 109 MOVQ 72(AX), R12 110 ADDQ DX, DX 111 ADCQ CX, CX 112 ADCQ BX, BX 113 ADCQ SI, SI 114 ADCQ DI, DI 115 ADCQ R8, R8 116 ADCQ R9, R9 117 ADCQ R10, R10 118 ADCQ R11, R11 119 ADCQ R12, R12 120 121 // reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 122 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 123 124 ADDQ 0(AX), DX 125 ADCQ 8(AX), CX 126 ADCQ 16(AX), BX 127 ADCQ 24(AX), SI 128 ADCQ 32(AX), DI 129 ADCQ 40(AX), R8 130 ADCQ 48(AX), R9 131 ADCQ 56(AX), R10 132 ADCQ 64(AX), R11 133 ADCQ 72(AX), R12 134 135 // reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 136 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 137 138 MOVQ DX, 0(AX) 139 MOVQ CX, 8(AX) 140 MOVQ BX, 16(AX) 141 MOVQ SI, 24(AX) 142 MOVQ DI, 32(AX) 143 MOVQ R8, 40(AX) 144 MOVQ R9, 48(AX) 145 MOVQ R10, 56(AX) 146 MOVQ R11, 64(AX) 147 MOVQ R12, 72(AX) 148 RET 149 150 // MulBy5(x *Element) 151 TEXT ·MulBy5(SB), $56-8 152 MOVQ x+0(FP), AX 153 MOVQ 0(AX), DX 154 MOVQ 8(AX), CX 155 MOVQ 16(AX), BX 156 MOVQ 24(AX), SI 157 MOVQ 32(AX), DI 158 MOVQ 40(AX), R8 159 MOVQ 48(AX), R9 160 MOVQ 56(AX), R10 161 MOVQ 64(AX), R11 162 MOVQ 72(AX), R12 163 ADDQ DX, DX 164 ADCQ CX, CX 165 ADCQ BX, BX 166 ADCQ SI, SI 167 ADCQ DI, DI 168 ADCQ R8, R8 169 ADCQ R9, R9 170 ADCQ R10, R10 171 ADCQ R11, R11 172 ADCQ R12, R12 173 174 // reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 175 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 176 177 ADDQ DX, DX 178 ADCQ CX, CX 179 ADCQ BX, BX 180 ADCQ SI, SI 181 ADCQ DI, DI 182 ADCQ R8, R8 183 ADCQ R9, R9 184 ADCQ R10, R10 185 ADCQ R11, R11 186 ADCQ R12, R12 187 188 // reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 189 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 190 191 ADDQ 0(AX), DX 192 ADCQ 8(AX), CX 193 ADCQ 16(AX), BX 194 ADCQ 24(AX), SI 195 ADCQ 32(AX), DI 196 ADCQ 40(AX), R8 197 ADCQ 48(AX), R9 198 ADCQ 56(AX), R10 199 ADCQ 64(AX), R11 200 ADCQ 72(AX), R12 201 202 // reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 203 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 204 205 MOVQ DX, 0(AX) 206 MOVQ CX, 8(AX) 207 MOVQ BX, 16(AX) 208 MOVQ SI, 24(AX) 209 MOVQ DI, 32(AX) 210 MOVQ R8, 40(AX) 211 MOVQ R9, 48(AX) 212 MOVQ R10, 56(AX) 213 MOVQ R11, 64(AX) 214 MOVQ R12, 72(AX) 215 RET 216 217 // MulBy13(x *Element) 218 TEXT ·MulBy13(SB), $136-8 219 MOVQ x+0(FP), AX 220 MOVQ 0(AX), DX 221 MOVQ 8(AX), CX 222 MOVQ 16(AX), BX 223 MOVQ 24(AX), SI 224 MOVQ 32(AX), DI 225 MOVQ 40(AX), R8 226 MOVQ 48(AX), R9 227 MOVQ 56(AX), R10 228 MOVQ 64(AX), R11 229 MOVQ 72(AX), R12 230 ADDQ DX, DX 231 ADCQ CX, CX 232 ADCQ BX, BX 233 ADCQ SI, SI 234 ADCQ DI, DI 235 ADCQ R8, R8 236 ADCQ R9, R9 237 ADCQ R10, R10 238 ADCQ R11, R11 239 ADCQ R12, R12 240 241 // reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 242 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 243 244 ADDQ DX, DX 245 ADCQ CX, CX 246 ADCQ BX, BX 247 ADCQ SI, SI 248 ADCQ DI, DI 249 ADCQ R8, R8 250 ADCQ R9, R9 251 ADCQ R10, R10 252 ADCQ R11, R11 253 ADCQ R12, R12 254 255 // reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP),s12-104(SP),s13-112(SP),s14-120(SP),s15-128(SP),s16-136(SP)) 256 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP),s12-104(SP),s13-112(SP),s14-120(SP),s15-128(SP),s16-136(SP)) 257 258 MOVQ DX, s7-64(SP) 259 MOVQ CX, s8-72(SP) 260 MOVQ BX, s9-80(SP) 261 MOVQ SI, s10-88(SP) 262 MOVQ DI, s11-96(SP) 263 MOVQ R8, s12-104(SP) 264 MOVQ R9, s13-112(SP) 265 MOVQ R10, s14-120(SP) 266 MOVQ R11, s15-128(SP) 267 MOVQ R12, s16-136(SP) 268 ADDQ DX, DX 269 ADCQ CX, CX 270 ADCQ BX, BX 271 ADCQ SI, SI 272 ADCQ DI, DI 273 ADCQ R8, R8 274 ADCQ R9, R9 275 ADCQ R10, R10 276 ADCQ R11, R11 277 ADCQ R12, R12 278 279 // reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 280 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 281 282 ADDQ s7-64(SP), DX 283 ADCQ s8-72(SP), CX 284 ADCQ s9-80(SP), BX 285 ADCQ s10-88(SP), SI 286 ADCQ s11-96(SP), DI 287 ADCQ s12-104(SP), R8 288 ADCQ s13-112(SP), R9 289 ADCQ s14-120(SP), R10 290 ADCQ s15-128(SP), R11 291 ADCQ s16-136(SP), R12 292 293 // reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 294 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 295 296 ADDQ 0(AX), DX 297 ADCQ 8(AX), CX 298 ADCQ 16(AX), BX 299 ADCQ 24(AX), SI 300 ADCQ 32(AX), DI 301 ADCQ 40(AX), R8 302 ADCQ 48(AX), R9 303 ADCQ 56(AX), R10 304 ADCQ 64(AX), R11 305 ADCQ 72(AX), R12 306 307 // reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 308 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 309 310 MOVQ DX, 0(AX) 311 MOVQ CX, 8(AX) 312 MOVQ BX, 16(AX) 313 MOVQ SI, 24(AX) 314 MOVQ DI, 32(AX) 315 MOVQ R8, 40(AX) 316 MOVQ R9, 48(AX) 317 MOVQ R10, 56(AX) 318 MOVQ R11, 64(AX) 319 MOVQ R12, 72(AX) 320 RET 321 322 // Butterfly(a, b *Element) sets a = a + b; b = a - b 323 TEXT ·Butterfly(SB), $56-16 324 MOVQ b+8(FP), AX 325 MOVQ 0(AX), DX 326 MOVQ 8(AX), CX 327 MOVQ 16(AX), BX 328 MOVQ 24(AX), SI 329 MOVQ 32(AX), DI 330 MOVQ 40(AX), R8 331 MOVQ 48(AX), R9 332 MOVQ 56(AX), R10 333 MOVQ 64(AX), R11 334 MOVQ 72(AX), R12 335 MOVQ a+0(FP), AX 336 ADDQ 0(AX), DX 337 ADCQ 8(AX), CX 338 ADCQ 16(AX), BX 339 ADCQ 24(AX), SI 340 ADCQ 32(AX), DI 341 ADCQ 40(AX), R8 342 ADCQ 48(AX), R9 343 ADCQ 56(AX), R10 344 ADCQ 64(AX), R11 345 ADCQ 72(AX), R12 346 MOVQ DX, R13 347 MOVQ CX, R14 348 MOVQ BX, R15 349 MOVQ SI, s0-8(SP) 350 MOVQ DI, s1-16(SP) 351 MOVQ R8, s2-24(SP) 352 MOVQ R9, s3-32(SP) 353 MOVQ R10, s4-40(SP) 354 MOVQ R11, s5-48(SP) 355 MOVQ R12, s6-56(SP) 356 MOVQ 0(AX), DX 357 MOVQ 8(AX), CX 358 MOVQ 16(AX), BX 359 MOVQ 24(AX), SI 360 MOVQ 32(AX), DI 361 MOVQ 40(AX), R8 362 MOVQ 48(AX), R9 363 MOVQ 56(AX), R10 364 MOVQ 64(AX), R11 365 MOVQ 72(AX), R12 366 MOVQ b+8(FP), AX 367 SUBQ 0(AX), DX 368 SBBQ 8(AX), CX 369 SBBQ 16(AX), BX 370 SBBQ 24(AX), SI 371 SBBQ 32(AX), DI 372 SBBQ 40(AX), R8 373 SBBQ 48(AX), R9 374 SBBQ 56(AX), R10 375 SBBQ 64(AX), R11 376 SBBQ 72(AX), R12 377 JCC l1 378 MOVQ $0xd74916ea4570000d, AX 379 ADDQ AX, DX 380 MOVQ $0x3d369bd31147f73c, AX 381 ADCQ AX, CX 382 MOVQ $0xd7b5ce7ab839c225, AX 383 ADCQ AX, BX 384 MOVQ $0x7e0e8850edbda407, AX 385 ADCQ AX, SI 386 MOVQ $0xb8da9f5e83f57c49, AX 387 ADCQ AX, DI 388 MOVQ $0x8152a6c0fadea490, AX 389 ADCQ AX, R8 390 MOVQ $0x4e59769ad9bbda2f, AX 391 ADCQ AX, R9 392 MOVQ $0xa8fcd8c75d79d2c7, AX 393 ADCQ AX, R10 394 MOVQ $0xfc1a174f01d72ab5, AX 395 ADCQ AX, R11 396 MOVQ $0x0126633cc0f35f63, AX 397 ADCQ AX, R12 398 399 l1: 400 MOVQ b+8(FP), AX 401 MOVQ DX, 0(AX) 402 MOVQ CX, 8(AX) 403 MOVQ BX, 16(AX) 404 MOVQ SI, 24(AX) 405 MOVQ DI, 32(AX) 406 MOVQ R8, 40(AX) 407 MOVQ R9, 48(AX) 408 MOVQ R10, 56(AX) 409 MOVQ R11, 64(AX) 410 MOVQ R12, 72(AX) 411 MOVQ R13, DX 412 MOVQ R14, CX 413 MOVQ R15, BX 414 MOVQ s0-8(SP), SI 415 MOVQ s1-16(SP), DI 416 MOVQ s2-24(SP), R8 417 MOVQ s3-32(SP), R9 418 MOVQ s4-40(SP), R10 419 MOVQ s5-48(SP), R11 420 MOVQ s6-56(SP), R12 421 422 // reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 423 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP)) 424 425 MOVQ a+0(FP), AX 426 MOVQ DX, 0(AX) 427 MOVQ CX, 8(AX) 428 MOVQ BX, 16(AX) 429 MOVQ SI, 24(AX) 430 MOVQ DI, 32(AX) 431 MOVQ R8, 40(AX) 432 MOVQ R9, 48(AX) 433 MOVQ R10, 56(AX) 434 MOVQ R11, 64(AX) 435 MOVQ R12, 72(AX) 436 RET