github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/math/big/arith_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 // This file provides fast assembly versions for the elementary 8 // arithmetic operations on vectors implemented in arith.go. 9 10 // func mulWW(x, y Word) (z1, z0 Word) 11 TEXT ·mulWW(SB),NOSPLIT,$0 12 MOVL x+0(FP), AX 13 MULL y+4(FP) 14 MOVL DX, z1+8(FP) 15 MOVL AX, z0+12(FP) 16 RET 17 18 19 // func divWW(x1, x0, y Word) (q, r Word) 20 TEXT ·divWW(SB),NOSPLIT,$0 21 MOVL x1+0(FP), DX 22 MOVL x0+4(FP), AX 23 DIVL y+8(FP) 24 MOVL AX, q+12(FP) 25 MOVL DX, r+16(FP) 26 RET 27 28 29 // func addVV(z, x, y []Word) (c Word) 30 TEXT ·addVV(SB),NOSPLIT,$0 31 MOVL z+0(FP), DI 32 MOVL x+12(FP), SI 33 MOVL y+24(FP), CX 34 MOVL z_len+4(FP), BP 35 MOVL $0, BX // i = 0 36 MOVL $0, DX // c = 0 37 JMP E1 38 39 L1: MOVL (SI)(BX*4), AX 40 ADDL DX, DX // restore CF 41 ADCL (CX)(BX*4), AX 42 SBBL DX, DX // save CF 43 MOVL AX, (DI)(BX*4) 44 ADDL $1, BX // i++ 45 46 E1: CMPL BX, BP // i < n 47 JL L1 48 49 NEGL DX 50 MOVL DX, c+36(FP) 51 RET 52 53 54 // func subVV(z, x, y []Word) (c Word) 55 // (same as addVV except for SBBL instead of ADCL and label names) 56 TEXT ·subVV(SB),NOSPLIT,$0 57 MOVL z+0(FP), DI 58 MOVL x+12(FP), SI 59 MOVL y+24(FP), CX 60 MOVL z_len+4(FP), BP 61 MOVL $0, BX // i = 0 62 MOVL $0, DX // c = 0 63 JMP E2 64 65 L2: MOVL (SI)(BX*4), AX 66 ADDL DX, DX // restore CF 67 SBBL (CX)(BX*4), AX 68 SBBL DX, DX // save CF 69 MOVL AX, (DI)(BX*4) 70 ADDL $1, BX // i++ 71 72 E2: CMPL BX, BP // i < n 73 JL L2 74 75 NEGL DX 76 MOVL DX, c+36(FP) 77 RET 78 79 80 // func addVW(z, x []Word, y Word) (c Word) 81 TEXT ·addVW(SB),NOSPLIT,$0 82 MOVL z+0(FP), DI 83 MOVL x+12(FP), SI 84 MOVL y+24(FP), AX // c = y 85 MOVL z_len+4(FP), BP 86 MOVL $0, BX // i = 0 87 JMP E3 88 89 L3: ADDL (SI)(BX*4), AX 90 MOVL AX, (DI)(BX*4) 91 SBBL AX, AX // save CF 92 NEGL AX 93 ADDL $1, BX // i++ 94 95 E3: CMPL BX, BP // i < n 96 JL L3 97 98 MOVL AX, c+28(FP) 99 RET 100 101 102 // func subVW(z, x []Word, y Word) (c Word) 103 TEXT ·subVW(SB),NOSPLIT,$0 104 MOVL z+0(FP), DI 105 MOVL x+12(FP), SI 106 MOVL y+24(FP), AX // c = y 107 MOVL z_len+4(FP), BP 108 MOVL $0, BX // i = 0 109 JMP E4 110 111 L4: MOVL (SI)(BX*4), DX 112 SUBL AX, DX 113 MOVL DX, (DI)(BX*4) 114 SBBL AX, AX // save CF 115 NEGL AX 116 ADDL $1, BX // i++ 117 118 E4: CMPL BX, BP // i < n 119 JL L4 120 121 MOVL AX, c+28(FP) 122 RET 123 124 125 // func shlVU(z, x []Word, s uint) (c Word) 126 TEXT ·shlVU(SB),NOSPLIT,$0 127 MOVL z_len+4(FP), BX // i = z 128 SUBL $1, BX // i-- 129 JL X8b // i < 0 (n <= 0) 130 131 // n > 0 132 MOVL z+0(FP), DI 133 MOVL x+12(FP), SI 134 MOVL s+24(FP), CX 135 MOVL (SI)(BX*4), AX // w1 = x[n-1] 136 MOVL $0, DX 137 SHLL CX, DX:AX // w1>>ŝ 138 MOVL DX, c+28(FP) 139 140 CMPL BX, $0 141 JLE X8a // i <= 0 142 143 // i > 0 144 L8: MOVL AX, DX // w = w1 145 MOVL -4(SI)(BX*4), AX // w1 = x[i-1] 146 SHLL CX, DX:AX // w<<s | w1>>ŝ 147 MOVL DX, (DI)(BX*4) // z[i] = w<<s | w1>>ŝ 148 SUBL $1, BX // i-- 149 JG L8 // i > 0 150 151 // i <= 0 152 X8a: SHLL CX, AX // w1<<s 153 MOVL AX, (DI) // z[0] = w1<<s 154 RET 155 156 X8b: MOVL $0, c+28(FP) 157 RET 158 159 160 // func shrVU(z, x []Word, s uint) (c Word) 161 TEXT ·shrVU(SB),NOSPLIT,$0 162 MOVL z_len+4(FP), BP 163 SUBL $1, BP // n-- 164 JL X9b // n < 0 (n <= 0) 165 166 // n > 0 167 MOVL z+0(FP), DI 168 MOVL x+12(FP), SI 169 MOVL s+24(FP), CX 170 MOVL (SI), AX // w1 = x[0] 171 MOVL $0, DX 172 SHRL CX, DX:AX // w1<<ŝ 173 MOVL DX, c+28(FP) 174 175 MOVL $0, BX // i = 0 176 JMP E9 177 178 // i < n-1 179 L9: MOVL AX, DX // w = w1 180 MOVL 4(SI)(BX*4), AX // w1 = x[i+1] 181 SHRL CX, DX:AX // w>>s | w1<<ŝ 182 MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ 183 ADDL $1, BX // i++ 184 185 E9: CMPL BX, BP 186 JL L9 // i < n-1 187 188 // i >= n-1 189 X9a: SHRL CX, AX // w1>>s 190 MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s 191 RET 192 193 X9b: MOVL $0, c+28(FP) 194 RET 195 196 197 // func mulAddVWW(z, x []Word, y, r Word) (c Word) 198 TEXT ·mulAddVWW(SB),NOSPLIT,$0 199 MOVL z+0(FP), DI 200 MOVL x+12(FP), SI 201 MOVL y+24(FP), BP 202 MOVL r+28(FP), CX // c = r 203 MOVL z_len+4(FP), BX 204 LEAL (DI)(BX*4), DI 205 LEAL (SI)(BX*4), SI 206 NEGL BX // i = -n 207 JMP E5 208 209 L5: MOVL (SI)(BX*4), AX 210 MULL BP 211 ADDL CX, AX 212 ADCL $0, DX 213 MOVL AX, (DI)(BX*4) 214 MOVL DX, CX 215 ADDL $1, BX // i++ 216 217 E5: CMPL BX, $0 // i < 0 218 JL L5 219 220 MOVL CX, c+32(FP) 221 RET 222 223 224 // func addMulVVW(z, x []Word, y Word) (c Word) 225 TEXT ·addMulVVW(SB),NOSPLIT,$0 226 MOVL z+0(FP), DI 227 MOVL x+12(FP), SI 228 MOVL y+24(FP), BP 229 MOVL z_len+4(FP), BX 230 LEAL (DI)(BX*4), DI 231 LEAL (SI)(BX*4), SI 232 NEGL BX // i = -n 233 MOVL $0, CX // c = 0 234 JMP E6 235 236 L6: MOVL (SI)(BX*4), AX 237 MULL BP 238 ADDL CX, AX 239 ADCL $0, DX 240 ADDL AX, (DI)(BX*4) 241 ADCL $0, DX 242 MOVL DX, CX 243 ADDL $1, BX // i++ 244 245 E6: CMPL BX, $0 // i < 0 246 JL L6 247 248 MOVL CX, c+28(FP) 249 RET 250 251 252 // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word) 253 TEXT ·divWVW(SB),NOSPLIT,$0 254 MOVL z+0(FP), DI 255 MOVL xn+12(FP), DX // r = xn 256 MOVL x+16(FP), SI 257 MOVL y+28(FP), CX 258 MOVL z_len+4(FP), BX // i = z 259 JMP E7 260 261 L7: MOVL (SI)(BX*4), AX 262 DIVL CX 263 MOVL AX, (DI)(BX*4) 264 265 E7: SUBL $1, BX // i-- 266 JGE L7 // i >= 0 267 268 MOVL DX, r+32(FP) 269 RET 270 271 // func bitLen(x Word) (n int) 272 TEXT ·bitLen(SB),NOSPLIT,$0 273 BSRL x+0(FP), AX 274 JZ Z1 275 INCL AX 276 MOVL AX, n+4(FP) 277 RET 278 279 Z1: MOVL $0, n+4(FP) 280 RET