github.com/consensys/gnark-crypto@v0.14.0/ecc/bls24-317/fp/element_ops_amd64.s (about) 1 // +build !purego 2 3 // Copyright 2020 ConsenSys Software Inc. 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 #include "textflag.h" 18 #include "funcdata.h" 19 20 // modulus q 21 DATA q<>+0(SB)/8, $0x8d512e565dab2aab 22 DATA q<>+8(SB)/8, $0xd6f339e43424bf7e 23 DATA q<>+16(SB)/8, $0x169a61e684c73446 24 DATA q<>+24(SB)/8, $0xf28fc5a0b7f9d039 25 DATA q<>+32(SB)/8, $0x1058ca226f60892c 26 GLOBL q<>(SB), (RODATA+NOPTR), $40 27 28 // qInv0 q'[0] 29 DATA qInv0<>(SB)/8, $0x55b5e0028b047ffd 30 GLOBL qInv0<>(SB), (RODATA+NOPTR), $8 31 32 #define REDUCE(ra0, ra1, ra2, ra3, ra4, rb0, rb1, rb2, rb3, rb4) \ 33 MOVQ ra0, rb0; \ 34 SUBQ q<>(SB), ra0; \ 35 MOVQ ra1, rb1; \ 36 SBBQ q<>+8(SB), ra1; \ 37 MOVQ ra2, rb2; \ 38 SBBQ q<>+16(SB), ra2; \ 39 MOVQ ra3, rb3; \ 40 SBBQ q<>+24(SB), ra3; \ 41 MOVQ ra4, rb4; \ 42 SBBQ q<>+32(SB), ra4; \ 43 CMOVQCS rb0, ra0; \ 44 CMOVQCS rb1, ra1; \ 45 CMOVQCS rb2, ra2; \ 46 CMOVQCS rb3, ra3; \ 47 CMOVQCS rb4, ra4; \ 48 49 TEXT ·reduce(SB), NOSPLIT, $0-8 50 MOVQ res+0(FP), AX 51 MOVQ 0(AX), DX 52 MOVQ 8(AX), CX 53 MOVQ 16(AX), BX 54 MOVQ 24(AX), SI 55 MOVQ 32(AX), DI 56 57 // reduce element(DX,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12) 58 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) 59 60 MOVQ DX, 0(AX) 61 MOVQ CX, 8(AX) 62 MOVQ BX, 16(AX) 63 MOVQ SI, 24(AX) 64 MOVQ DI, 32(AX) 65 RET 66 67 // MulBy3(x *Element) 68 TEXT ·MulBy3(SB), NOSPLIT, $0-8 69 MOVQ x+0(FP), AX 70 MOVQ 0(AX), DX 71 MOVQ 8(AX), CX 72 MOVQ 16(AX), BX 73 MOVQ 24(AX), SI 74 MOVQ 32(AX), DI 75 ADDQ DX, DX 76 ADCQ CX, CX 77 ADCQ BX, BX 78 ADCQ SI, SI 79 ADCQ DI, DI 80 81 // reduce element(DX,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12) 82 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) 83 84 ADDQ 0(AX), DX 85 ADCQ 8(AX), CX 86 ADCQ 16(AX), BX 87 ADCQ 24(AX), SI 88 ADCQ 32(AX), DI 89 90 // reduce element(DX,CX,BX,SI,DI) using temp registers (R13,R14,R15,R8,R9) 91 REDUCE(DX,CX,BX,SI,DI,R13,R14,R15,R8,R9) 92 93 MOVQ DX, 0(AX) 94 MOVQ CX, 8(AX) 95 MOVQ BX, 16(AX) 96 MOVQ SI, 24(AX) 97 MOVQ DI, 32(AX) 98 RET 99 100 // MulBy5(x *Element) 101 TEXT ·MulBy5(SB), NOSPLIT, $0-8 102 MOVQ x+0(FP), AX 103 MOVQ 0(AX), DX 104 MOVQ 8(AX), CX 105 MOVQ 16(AX), BX 106 MOVQ 24(AX), SI 107 MOVQ 32(AX), DI 108 ADDQ DX, DX 109 ADCQ CX, CX 110 ADCQ BX, BX 111 ADCQ SI, SI 112 ADCQ DI, DI 113 114 // reduce element(DX,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12) 115 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) 116 117 ADDQ DX, DX 118 ADCQ CX, CX 119 ADCQ BX, BX 120 ADCQ SI, SI 121 ADCQ DI, DI 122 123 // reduce element(DX,CX,BX,SI,DI) using temp registers (R13,R14,R15,R8,R9) 124 REDUCE(DX,CX,BX,SI,DI,R13,R14,R15,R8,R9) 125 126 ADDQ 0(AX), DX 127 ADCQ 8(AX), CX 128 ADCQ 16(AX), BX 129 ADCQ 24(AX), SI 130 ADCQ 32(AX), DI 131 132 // reduce element(DX,CX,BX,SI,DI) using temp registers (R10,R11,R12,R13,R14) 133 REDUCE(DX,CX,BX,SI,DI,R10,R11,R12,R13,R14) 134 135 MOVQ DX, 0(AX) 136 MOVQ CX, 8(AX) 137 MOVQ BX, 16(AX) 138 MOVQ SI, 24(AX) 139 MOVQ DI, 32(AX) 140 RET 141 142 // MulBy13(x *Element) 143 TEXT ·MulBy13(SB), $16-8 144 MOVQ x+0(FP), AX 145 MOVQ 0(AX), DX 146 MOVQ 8(AX), CX 147 MOVQ 16(AX), BX 148 MOVQ 24(AX), SI 149 MOVQ 32(AX), DI 150 ADDQ DX, DX 151 ADCQ CX, CX 152 ADCQ BX, BX 153 ADCQ SI, SI 154 ADCQ DI, DI 155 156 // reduce element(DX,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12) 157 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) 158 159 ADDQ DX, DX 160 ADCQ CX, CX 161 ADCQ BX, BX 162 ADCQ SI, SI 163 ADCQ DI, DI 164 165 // reduce element(DX,CX,BX,SI,DI) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP)) 166 REDUCE(DX,CX,BX,SI,DI,R13,R14,R15,s0-8(SP),s1-16(SP)) 167 168 MOVQ DX, R13 169 MOVQ CX, R14 170 MOVQ BX, R15 171 MOVQ SI, s0-8(SP) 172 MOVQ DI, s1-16(SP) 173 ADDQ DX, DX 174 ADCQ CX, CX 175 ADCQ BX, BX 176 ADCQ SI, SI 177 ADCQ DI, DI 178 179 // reduce element(DX,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12) 180 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) 181 182 ADDQ R13, DX 183 ADCQ R14, CX 184 ADCQ R15, BX 185 ADCQ s0-8(SP), SI 186 ADCQ s1-16(SP), DI 187 188 // reduce element(DX,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12) 189 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) 190 191 ADDQ 0(AX), DX 192 ADCQ 8(AX), CX 193 ADCQ 16(AX), BX 194 ADCQ 24(AX), SI 195 ADCQ 32(AX), DI 196 197 // reduce element(DX,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12) 198 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) 199 200 MOVQ DX, 0(AX) 201 MOVQ CX, 8(AX) 202 MOVQ BX, 16(AX) 203 MOVQ SI, 24(AX) 204 MOVQ DI, 32(AX) 205 RET 206 207 // Butterfly(a, b *Element) sets a = a + b; b = a - b 208 TEXT ·Butterfly(SB), $24-16 209 MOVQ a+0(FP), AX 210 MOVQ 0(AX), CX 211 MOVQ 8(AX), BX 212 MOVQ 16(AX), SI 213 MOVQ 24(AX), DI 214 MOVQ 32(AX), R8 215 MOVQ CX, R9 216 MOVQ BX, R10 217 MOVQ SI, R11 218 MOVQ DI, R12 219 MOVQ R8, R13 220 XORQ AX, AX 221 MOVQ b+8(FP), DX 222 ADDQ 0(DX), CX 223 ADCQ 8(DX), BX 224 ADCQ 16(DX), SI 225 ADCQ 24(DX), DI 226 ADCQ 32(DX), R8 227 SUBQ 0(DX), R9 228 SBBQ 8(DX), R10 229 SBBQ 16(DX), R11 230 SBBQ 24(DX), R12 231 SBBQ 32(DX), R13 232 MOVQ CX, R14 233 MOVQ BX, R15 234 MOVQ SI, s0-8(SP) 235 MOVQ DI, s1-16(SP) 236 MOVQ R8, s2-24(SP) 237 MOVQ $0x8d512e565dab2aab, CX 238 MOVQ $0xd6f339e43424bf7e, BX 239 MOVQ $0x169a61e684c73446, SI 240 MOVQ $0xf28fc5a0b7f9d039, DI 241 MOVQ $0x1058ca226f60892c, R8 242 CMOVQCC AX, CX 243 CMOVQCC AX, BX 244 CMOVQCC AX, SI 245 CMOVQCC AX, DI 246 CMOVQCC AX, R8 247 ADDQ CX, R9 248 ADCQ BX, R10 249 ADCQ SI, R11 250 ADCQ DI, R12 251 ADCQ R8, R13 252 MOVQ R14, CX 253 MOVQ R15, BX 254 MOVQ s0-8(SP), SI 255 MOVQ s1-16(SP), DI 256 MOVQ s2-24(SP), R8 257 MOVQ R9, 0(DX) 258 MOVQ R10, 8(DX) 259 MOVQ R11, 16(DX) 260 MOVQ R12, 24(DX) 261 MOVQ R13, 32(DX) 262 263 // reduce element(CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13) 264 REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13) 265 266 MOVQ a+0(FP), AX 267 MOVQ CX, 0(AX) 268 MOVQ BX, 8(AX) 269 MOVQ SI, 16(AX) 270 MOVQ DI, 24(AX) 271 MOVQ R8, 32(AX) 272 RET