github.com/consensys/gnark-crypto@v0.14.0/ecc/bls12-378/internal/fptower/e2_amd64.s (about) 1 // Copyright 2020 ConsenSys Software Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "textflag.h" 16 #include "funcdata.h" 17 18 // modulus q 19 DATA q<>+0(SB)/8, $0x9948a20000000001 20 DATA q<>+8(SB)/8, $0xce97f76a822c0000 21 DATA q<>+16(SB)/8, $0x980dc360d0a49d7f 22 DATA q<>+24(SB)/8, $0x84059eb647102326 23 DATA q<>+32(SB)/8, $0x53cb5d240ed107a2 24 DATA q<>+40(SB)/8, $0x03eeb0416684d190 25 GLOBL q<>(SB), (RODATA+NOPTR), $48 26 27 // qInv0 q'[0] 28 DATA qInv0<>(SB)/8, $0x9948a1ffffffffff 29 GLOBL qInv0<>(SB), (RODATA+NOPTR), $8 30 31 #define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \ 32 MOVQ ra0, rb0; \ 33 SUBQ q<>(SB), ra0; \ 34 MOVQ ra1, rb1; \ 35 SBBQ q<>+8(SB), ra1; \ 36 MOVQ ra2, rb2; \ 37 SBBQ q<>+16(SB), ra2; \ 38 MOVQ ra3, rb3; \ 39 SBBQ q<>+24(SB), ra3; \ 40 MOVQ ra4, rb4; \ 41 SBBQ q<>+32(SB), ra4; \ 42 MOVQ ra5, rb5; \ 43 SBBQ q<>+40(SB), ra5; \ 44 CMOVQCS rb0, ra0; \ 45 CMOVQCS rb1, ra1; \ 46 CMOVQCS rb2, ra2; \ 47 CMOVQCS rb3, ra3; \ 48 CMOVQCS rb4, ra4; \ 49 CMOVQCS rb5, ra5; \ 50 51 TEXT ·addE2(SB), NOSPLIT, $0-24 52 MOVQ x+8(FP), AX 53 MOVQ 0(AX), BX 54 MOVQ 8(AX), SI 55 MOVQ 16(AX), DI 56 MOVQ 24(AX), R8 57 MOVQ 32(AX), R9 58 MOVQ 40(AX), R10 59 MOVQ y+16(FP), DX 60 ADDQ 0(DX), BX 61 ADCQ 8(DX), SI 62 ADCQ 16(DX), DI 63 ADCQ 24(DX), R8 64 ADCQ 32(DX), R9 65 ADCQ 40(DX), R10 66 67 // reduce element(BX,SI,DI,R8,R9,R10) using temp registers (R11,R12,R13,R14,R15,s0-8(SP)) 68 REDUCE(BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP)) 69 70 MOVQ res+0(FP), CX 71 MOVQ BX, 0(CX) 72 MOVQ SI, 8(CX) 73 MOVQ DI, 16(CX) 74 MOVQ R8, 24(CX) 75 MOVQ R9, 32(CX) 76 MOVQ R10, 40(CX) 77 MOVQ 48(AX), BX 78 MOVQ 56(AX), SI 79 MOVQ 64(AX), DI 80 MOVQ 72(AX), R8 81 MOVQ 80(AX), R9 82 MOVQ 88(AX), R10 83 ADDQ 48(DX), BX 84 ADCQ 56(DX), SI 85 ADCQ 64(DX), DI 86 ADCQ 72(DX), R8 87 ADCQ 80(DX), R9 88 ADCQ 88(DX), R10 89 90 // reduce element(BX,SI,DI,R8,R9,R10) using temp registers (R11,R12,R13,R14,R15,s0-8(SP)) 91 REDUCE(BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP)) 92 93 MOVQ BX, 48(CX) 94 MOVQ SI, 56(CX) 95 MOVQ DI, 64(CX) 96 MOVQ R8, 72(CX) 97 MOVQ R9, 80(CX) 98 MOVQ R10, 88(CX) 99 RET 100 101 TEXT ·doubleE2(SB), NOSPLIT, $0-16 102 MOVQ res+0(FP), DX 103 MOVQ x+8(FP), AX 104 MOVQ 0(AX), CX 105 MOVQ 8(AX), BX 106 MOVQ 16(AX), SI 107 MOVQ 24(AX), DI 108 MOVQ 32(AX), R8 109 MOVQ 40(AX), R9 110 ADDQ CX, CX 111 ADCQ BX, BX 112 ADCQ SI, SI 113 ADCQ DI, DI 114 ADCQ R8, R8 115 ADCQ R9, R9 116 117 // reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15) 118 REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15) 119 120 MOVQ CX, 0(DX) 121 MOVQ BX, 8(DX) 122 MOVQ SI, 16(DX) 123 MOVQ DI, 24(DX) 124 MOVQ R8, 32(DX) 125 MOVQ R9, 40(DX) 126 MOVQ 48(AX), CX 127 MOVQ 56(AX), BX 128 MOVQ 64(AX), SI 129 MOVQ 72(AX), DI 130 MOVQ 80(AX), R8 131 MOVQ 88(AX), R9 132 ADDQ CX, CX 133 ADCQ BX, BX 134 ADCQ SI, SI 135 ADCQ DI, DI 136 ADCQ R8, R8 137 ADCQ R9, R9 138 139 // reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15) 140 REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15) 141 142 MOVQ CX, 48(DX) 143 MOVQ BX, 56(DX) 144 MOVQ SI, 64(DX) 145 MOVQ DI, 72(DX) 146 MOVQ R8, 80(DX) 147 MOVQ R9, 88(DX) 148 RET 149 150 TEXT ·subE2(SB), NOSPLIT, $0-24 151 XORQ R9, R9 152 MOVQ x+8(FP), R8 153 MOVQ 0(R8), AX 154 MOVQ 8(R8), DX 155 MOVQ 16(R8), CX 156 MOVQ 24(R8), BX 157 MOVQ 32(R8), SI 158 MOVQ 40(R8), DI 159 MOVQ y+16(FP), R8 160 SUBQ 0(R8), AX 161 SBBQ 8(R8), DX 162 SBBQ 16(R8), CX 163 SBBQ 24(R8), BX 164 SBBQ 32(R8), SI 165 SBBQ 40(R8), DI 166 MOVQ x+8(FP), R8 167 MOVQ $0x9948a20000000001, R10 168 MOVQ $0xce97f76a822c0000, R11 169 MOVQ $0x980dc360d0a49d7f, R12 170 MOVQ $0x84059eb647102326, R13 171 MOVQ $0x53cb5d240ed107a2, R14 172 MOVQ $0x03eeb0416684d190, R15 173 CMOVQCC R9, R10 174 CMOVQCC R9, R11 175 CMOVQCC R9, R12 176 CMOVQCC R9, R13 177 CMOVQCC R9, R14 178 CMOVQCC R9, R15 179 ADDQ R10, AX 180 ADCQ R11, DX 181 ADCQ R12, CX 182 ADCQ R13, BX 183 ADCQ R14, SI 184 ADCQ R15, DI 185 MOVQ res+0(FP), R10 186 MOVQ AX, 0(R10) 187 MOVQ DX, 8(R10) 188 MOVQ CX, 16(R10) 189 MOVQ BX, 24(R10) 190 MOVQ SI, 32(R10) 191 MOVQ DI, 40(R10) 192 MOVQ 48(R8), AX 193 MOVQ 56(R8), DX 194 MOVQ 64(R8), CX 195 MOVQ 72(R8), BX 196 MOVQ 80(R8), SI 197 MOVQ 88(R8), DI 198 MOVQ y+16(FP), R8 199 SUBQ 48(R8), AX 200 SBBQ 56(R8), DX 201 SBBQ 64(R8), CX 202 SBBQ 72(R8), BX 203 SBBQ 80(R8), SI 204 SBBQ 88(R8), DI 205 MOVQ $0x9948a20000000001, R11 206 MOVQ $0xce97f76a822c0000, R12 207 MOVQ $0x980dc360d0a49d7f, R13 208 MOVQ $0x84059eb647102326, R14 209 MOVQ $0x53cb5d240ed107a2, R15 210 MOVQ $0x03eeb0416684d190, R10 211 CMOVQCC R9, R11 212 CMOVQCC R9, R12 213 CMOVQCC R9, R13 214 CMOVQCC R9, R14 215 CMOVQCC R9, R15 216 CMOVQCC R9, R10 217 ADDQ R11, AX 218 ADCQ R12, DX 219 ADCQ R13, CX 220 ADCQ R14, BX 221 ADCQ R15, SI 222 ADCQ R10, DI 223 MOVQ res+0(FP), R8 224 MOVQ AX, 48(R8) 225 MOVQ DX, 56(R8) 226 MOVQ CX, 64(R8) 227 MOVQ BX, 72(R8) 228 MOVQ SI, 80(R8) 229 MOVQ DI, 88(R8) 230 RET 231 232 TEXT ·negE2(SB), NOSPLIT, $0-16 233 MOVQ res+0(FP), DX 234 MOVQ x+8(FP), AX 235 MOVQ 0(AX), BX 236 MOVQ 8(AX), SI 237 MOVQ 16(AX), DI 238 MOVQ 24(AX), R8 239 MOVQ 32(AX), R9 240 MOVQ 40(AX), R10 241 MOVQ BX, AX 242 ORQ SI, AX 243 ORQ DI, AX 244 ORQ R8, AX 245 ORQ R9, AX 246 ORQ R10, AX 247 TESTQ AX, AX 248 JNE l1 249 MOVQ AX, 0(DX) 250 MOVQ AX, 8(DX) 251 MOVQ AX, 16(DX) 252 MOVQ AX, 24(DX) 253 MOVQ AX, 32(DX) 254 MOVQ AX, 40(DX) 255 JMP l3 256 257 l1: 258 MOVQ $0x9948a20000000001, CX 259 SUBQ BX, CX 260 MOVQ CX, 0(DX) 261 MOVQ $0xce97f76a822c0000, CX 262 SBBQ SI, CX 263 MOVQ CX, 8(DX) 264 MOVQ $0x980dc360d0a49d7f, CX 265 SBBQ DI, CX 266 MOVQ CX, 16(DX) 267 MOVQ $0x84059eb647102326, CX 268 SBBQ R8, CX 269 MOVQ CX, 24(DX) 270 MOVQ $0x53cb5d240ed107a2, CX 271 SBBQ R9, CX 272 MOVQ CX, 32(DX) 273 MOVQ $0x03eeb0416684d190, CX 274 SBBQ R10, CX 275 MOVQ CX, 40(DX) 276 277 l3: 278 MOVQ x+8(FP), AX 279 MOVQ 48(AX), BX 280 MOVQ 56(AX), SI 281 MOVQ 64(AX), DI 282 MOVQ 72(AX), R8 283 MOVQ 80(AX), R9 284 MOVQ 88(AX), R10 285 MOVQ BX, AX 286 ORQ SI, AX 287 ORQ DI, AX 288 ORQ R8, AX 289 ORQ R9, AX 290 ORQ R10, AX 291 TESTQ AX, AX 292 JNE l2 293 MOVQ AX, 48(DX) 294 MOVQ AX, 56(DX) 295 MOVQ AX, 64(DX) 296 MOVQ AX, 72(DX) 297 MOVQ AX, 80(DX) 298 MOVQ AX, 88(DX) 299 RET 300 301 l2: 302 MOVQ $0x9948a20000000001, CX 303 SUBQ BX, CX 304 MOVQ CX, 48(DX) 305 MOVQ $0xce97f76a822c0000, CX 306 SBBQ SI, CX 307 MOVQ CX, 56(DX) 308 MOVQ $0x980dc360d0a49d7f, CX 309 SBBQ DI, CX 310 MOVQ CX, 64(DX) 311 MOVQ $0x84059eb647102326, CX 312 SBBQ R8, CX 313 MOVQ CX, 72(DX) 314 MOVQ $0x53cb5d240ed107a2, CX 315 SBBQ R9, CX 316 MOVQ CX, 80(DX) 317 MOVQ $0x03eeb0416684d190, CX 318 SBBQ R10, CX 319 MOVQ CX, 88(DX) 320 RET