github.com/consensys/gnark-crypto@v0.14.0/ecc/stark-curve/fp/element_ops_amd64.s (about) 1 // +build !purego 2 3 // Copyright 2020 ConsenSys Software Inc. 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 #include "textflag.h" 18 #include "funcdata.h" 19 20 // modulus q 21 DATA q<>+0(SB)/8, $1 22 DATA q<>+8(SB)/8, $0 23 DATA q<>+16(SB)/8, $0 24 DATA q<>+24(SB)/8, $0x0800000000000011 25 GLOBL q<>(SB), (RODATA+NOPTR), $32 26 27 // qInv0 q'[0] 28 DATA qInv0<>(SB)/8, $0xffffffffffffffff 29 GLOBL qInv0<>(SB), (RODATA+NOPTR), $8 30 31 #define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \ 32 MOVQ ra0, rb0; \ 33 SUBQ q<>(SB), ra0; \ 34 MOVQ ra1, rb1; \ 35 SBBQ q<>+8(SB), ra1; \ 36 MOVQ ra2, rb2; \ 37 SBBQ q<>+16(SB), ra2; \ 38 MOVQ ra3, rb3; \ 39 SBBQ q<>+24(SB), ra3; \ 40 CMOVQCS rb0, ra0; \ 41 CMOVQCS rb1, ra1; \ 42 CMOVQCS rb2, ra2; \ 43 CMOVQCS rb3, ra3; \ 44 45 TEXT ·reduce(SB), NOSPLIT, $0-8 46 MOVQ res+0(FP), AX 47 MOVQ 0(AX), DX 48 MOVQ 8(AX), CX 49 MOVQ 16(AX), BX 50 MOVQ 24(AX), SI 51 52 // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) 53 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) 54 55 MOVQ DX, 0(AX) 56 MOVQ CX, 8(AX) 57 MOVQ BX, 16(AX) 58 MOVQ SI, 24(AX) 59 RET 60 61 // MulBy3(x *Element) 62 TEXT ·MulBy3(SB), NOSPLIT, $0-8 63 MOVQ x+0(FP), AX 64 MOVQ 0(AX), DX 65 MOVQ 8(AX), CX 66 MOVQ 16(AX), BX 67 MOVQ 24(AX), SI 68 ADDQ DX, DX 69 ADCQ CX, CX 70 ADCQ BX, BX 71 ADCQ SI, SI 72 73 // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) 74 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) 75 76 ADDQ 0(AX), DX 77 ADCQ 8(AX), CX 78 ADCQ 16(AX), BX 79 ADCQ 24(AX), SI 80 81 // reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14) 82 REDUCE(DX,CX,BX,SI,R11,R12,R13,R14) 83 84 MOVQ DX, 0(AX) 85 MOVQ CX, 8(AX) 86 MOVQ BX, 16(AX) 87 MOVQ SI, 24(AX) 88 RET 89 90 // MulBy5(x *Element) 91 TEXT ·MulBy5(SB), NOSPLIT, $0-8 92 MOVQ x+0(FP), AX 93 MOVQ 0(AX), DX 94 MOVQ 8(AX), CX 95 MOVQ 16(AX), BX 96 MOVQ 24(AX), SI 97 ADDQ DX, DX 98 ADCQ CX, CX 99 ADCQ BX, BX 100 ADCQ SI, SI 101 102 // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) 103 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) 104 105 ADDQ DX, DX 106 ADCQ CX, CX 107 ADCQ BX, BX 108 ADCQ SI, SI 109 110 // reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14) 111 REDUCE(DX,CX,BX,SI,R11,R12,R13,R14) 112 113 ADDQ 0(AX), DX 114 ADCQ 8(AX), CX 115 ADCQ 16(AX), BX 116 ADCQ 24(AX), SI 117 118 // reduce element(DX,CX,BX,SI) using temp registers (R15,DI,R8,R9) 119 REDUCE(DX,CX,BX,SI,R15,DI,R8,R9) 120 121 MOVQ DX, 0(AX) 122 MOVQ CX, 8(AX) 123 MOVQ BX, 16(AX) 124 MOVQ SI, 24(AX) 125 RET 126 127 // MulBy13(x *Element) 128 TEXT ·MulBy13(SB), NOSPLIT, $0-8 129 MOVQ x+0(FP), AX 130 MOVQ 0(AX), DX 131 MOVQ 8(AX), CX 132 MOVQ 16(AX), BX 133 MOVQ 24(AX), SI 134 ADDQ DX, DX 135 ADCQ CX, CX 136 ADCQ BX, BX 137 ADCQ SI, SI 138 139 // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) 140 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) 141 142 ADDQ DX, DX 143 ADCQ CX, CX 144 ADCQ BX, BX 145 ADCQ SI, SI 146 147 // reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14) 148 REDUCE(DX,CX,BX,SI,R11,R12,R13,R14) 149 150 MOVQ DX, R11 151 MOVQ CX, R12 152 MOVQ BX, R13 153 MOVQ SI, R14 154 ADDQ DX, DX 155 ADCQ CX, CX 156 ADCQ BX, BX 157 ADCQ SI, SI 158 159 // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) 160 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) 161 162 ADDQ R11, DX 163 ADCQ R12, CX 164 ADCQ R13, BX 165 ADCQ R14, SI 166 167 // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) 168 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) 169 170 ADDQ 0(AX), DX 171 ADCQ 8(AX), CX 172 ADCQ 16(AX), BX 173 ADCQ 24(AX), SI 174 175 // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) 176 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) 177 178 MOVQ DX, 0(AX) 179 MOVQ CX, 8(AX) 180 MOVQ BX, 16(AX) 181 MOVQ SI, 24(AX) 182 RET 183 184 // Butterfly(a, b *Element) sets a = a + b; b = a - b 185 TEXT ·Butterfly(SB), NOSPLIT, $0-16 186 MOVQ a+0(FP), AX 187 MOVQ 0(AX), CX 188 MOVQ 8(AX), BX 189 MOVQ 16(AX), SI 190 MOVQ 24(AX), DI 191 MOVQ CX, R8 192 MOVQ BX, R9 193 MOVQ SI, R10 194 MOVQ DI, R11 195 XORQ AX, AX 196 MOVQ b+8(FP), DX 197 ADDQ 0(DX), CX 198 ADCQ 8(DX), BX 199 ADCQ 16(DX), SI 200 ADCQ 24(DX), DI 201 SUBQ 0(DX), R8 202 SBBQ 8(DX), R9 203 SBBQ 16(DX), R10 204 SBBQ 24(DX), R11 205 MOVQ $1, R12 206 MOVQ $0, R13 207 MOVQ $0, R14 208 MOVQ $0x0800000000000011, R15 209 CMOVQCC AX, R12 210 CMOVQCC AX, R13 211 CMOVQCC AX, R14 212 CMOVQCC AX, R15 213 ADDQ R12, R8 214 ADCQ R13, R9 215 ADCQ R14, R10 216 ADCQ R15, R11 217 MOVQ R8, 0(DX) 218 MOVQ R9, 8(DX) 219 MOVQ R10, 16(DX) 220 MOVQ R11, 24(DX) 221 222 // reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11) 223 REDUCE(CX,BX,SI,DI,R8,R9,R10,R11) 224 225 MOVQ a+0(FP), AX 226 MOVQ CX, 0(AX) 227 MOVQ BX, 8(AX) 228 MOVQ SI, 16(AX) 229 MOVQ DI, 24(AX) 230 RET