github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/math/big/arith_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "../../../cmd/ld/textflag.h" 6 7 // This file provides fast assembly versions for the elementary 8 // arithmetic operations on vectors implemented in arith.go. 9 10 // func mulWW(x, y Word) (z1, z0 Word) 11 TEXT ·mulWW(SB),NOSPLIT,$0 12 MOVL x+0(FP), AX 13 MULL y+4(FP) 14 MOVL DX, z1+8(FP) 15 MOVL AX, z0+12(FP) 16 RET 17 18 19 // func divWW(x1, x0, y Word) (q, r Word) 20 TEXT ·divWW(SB),NOSPLIT,$0 21 MOVL x1+0(FP), DX 22 MOVL x0+4(FP), AX 23 DIVL y+8(FP) 24 MOVL AX, q+12(FP) 25 MOVL DX, r+16(FP) 26 RET 27 28 29 // func addVV(z, x, y []Word) (c Word) 30 TEXT ·addVV(SB),NOSPLIT,$0 31 MOVL z+0(FP), DI 32 MOVL x+12(FP), SI 33 MOVL y+24(FP), CX 34 MOVL z_len+4(FP), BP 35 MOVL $0, BX // i = 0 36 MOVL $0, DX // c = 0 37 JMP E1 38 39 L1: MOVL (SI)(BX*4), AX 40 RCRL $1, DX 41 ADCL (CX)(BX*4), AX 42 RCLL $1, DX 43 MOVL AX, (DI)(BX*4) 44 ADDL $1, BX // i++ 45 46 E1: CMPL BX, BP // i < n 47 JL L1 48 49 MOVL DX, c+36(FP) 50 RET 51 52 53 // func subVV(z, x, y []Word) (c Word) 54 // (same as addVV except for SBBL instead of ADCL and label names) 55 TEXT ·subVV(SB),NOSPLIT,$0 56 MOVL z+0(FP), DI 57 MOVL x+12(FP), SI 58 MOVL y+24(FP), CX 59 MOVL z_len+4(FP), BP 60 MOVL $0, BX // i = 0 61 MOVL $0, DX // c = 0 62 JMP E2 63 64 L2: MOVL (SI)(BX*4), AX 65 RCRL $1, DX 66 SBBL (CX)(BX*4), AX 67 RCLL $1, DX 68 MOVL AX, (DI)(BX*4) 69 ADDL $1, BX // i++ 70 71 E2: CMPL BX, BP // i < n 72 JL L2 73 74 MOVL DX, c+36(FP) 75 RET 76 77 78 // func addVW(z, x []Word, y Word) (c Word) 79 TEXT ·addVW(SB),NOSPLIT,$0 80 MOVL z+0(FP), DI 81 MOVL x+12(FP), SI 82 MOVL y+24(FP), AX // c = y 83 MOVL z_len+4(FP), BP 84 MOVL $0, BX // i = 0 85 JMP E3 86 87 L3: ADDL (SI)(BX*4), AX 88 MOVL AX, (DI)(BX*4) 89 RCLL $1, AX 90 ANDL $1, AX 91 ADDL $1, BX // i++ 92 93 E3: CMPL BX, BP // i < n 94 JL L3 95 96 MOVL AX, c+28(FP) 97 RET 98 99 100 // func subVW(z, x []Word, y Word) (c Word) 101 TEXT ·subVW(SB),NOSPLIT,$0 102 MOVL z+0(FP), DI 103 MOVL x+12(FP), SI 104 MOVL y+24(FP), AX // c = y 105 MOVL z_len+4(FP), BP 106 MOVL $0, BX // i = 0 107 JMP E4 108 109 L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL? 110 SUBL AX, DX 111 MOVL DX, (DI)(BX*4) 112 RCLL $1, AX 113 ANDL $1, AX 114 ADDL $1, BX // i++ 115 116 E4: CMPL BX, BP // i < n 117 JL L4 118 119 MOVL AX, c+28(FP) 120 RET 121 122 123 // func shlVU(z, x []Word, s uint) (c Word) 124 TEXT ·shlVU(SB),NOSPLIT,$0 125 MOVL z_len+4(FP), BX // i = z 126 SUBL $1, BX // i-- 127 JL X8b // i < 0 (n <= 0) 128 129 // n > 0 130 MOVL z+0(FP), DI 131 MOVL x+12(FP), SI 132 MOVL s+24(FP), CX 133 MOVL (SI)(BX*4), AX // w1 = x[n-1] 134 MOVL $0, DX 135 SHLL CX, DX:AX // w1>>ŝ 136 MOVL DX, c+28(FP) 137 138 CMPL BX, $0 139 JLE X8a // i <= 0 140 141 // i > 0 142 L8: MOVL AX, DX // w = w1 143 MOVL -4(SI)(BX*4), AX // w1 = x[i-1] 144 SHLL CX, DX:AX // w<<s | w1>>ŝ 145 MOVL DX, (DI)(BX*4) // z[i] = w<<s | w1>>ŝ 146 SUBL $1, BX // i-- 147 JG L8 // i > 0 148 149 // i <= 0 150 X8a: SHLL CX, AX // w1<<s 151 MOVL AX, (DI) // z[0] = w1<<s 152 RET 153 154 X8b: MOVL $0, c+28(FP) 155 RET 156 157 158 // func shrVU(z, x []Word, s uint) (c Word) 159 TEXT ·shrVU(SB),NOSPLIT,$0 160 MOVL z_len+4(FP), BP 161 SUBL $1, BP // n-- 162 JL X9b // n < 0 (n <= 0) 163 164 // n > 0 165 MOVL z+0(FP), DI 166 MOVL x+12(FP), SI 167 MOVL s+24(FP), CX 168 MOVL (SI), AX // w1 = x[0] 169 MOVL $0, DX 170 SHRL CX, DX:AX // w1<<ŝ 171 MOVL DX, c+28(FP) 172 173 MOVL $0, BX // i = 0 174 JMP E9 175 176 // i < n-1 177 L9: MOVL AX, DX // w = w1 178 MOVL 4(SI)(BX*4), AX // w1 = x[i+1] 179 SHRL CX, DX:AX // w>>s | w1<<ŝ 180 MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ 181 ADDL $1, BX // i++ 182 183 E9: CMPL BX, BP 184 JL L9 // i < n-1 185 186 // i >= n-1 187 X9a: SHRL CX, AX // w1>>s 188 MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s 189 RET 190 191 X9b: MOVL $0, c+28(FP) 192 RET 193 194 195 // func mulAddVWW(z, x []Word, y, r Word) (c Word) 196 TEXT ·mulAddVWW(SB),NOSPLIT,$0 197 MOVL z+0(FP), DI 198 MOVL x+12(FP), SI 199 MOVL y+24(FP), BP 200 MOVL r+28(FP), CX // c = r 201 MOVL z_len+4(FP), BX 202 LEAL (DI)(BX*4), DI 203 LEAL (SI)(BX*4), SI 204 NEGL BX // i = -n 205 JMP E5 206 207 L5: MOVL (SI)(BX*4), AX 208 MULL BP 209 ADDL CX, AX 210 ADCL $0, DX 211 MOVL AX, (DI)(BX*4) 212 MOVL DX, CX 213 ADDL $1, BX // i++ 214 215 E5: CMPL BX, $0 // i < 0 216 JL L5 217 218 MOVL CX, c+32(FP) 219 RET 220 221 222 // func addMulVVW(z, x []Word, y Word) (c Word) 223 TEXT ·addMulVVW(SB),NOSPLIT,$0 224 MOVL z+0(FP), DI 225 MOVL x+12(FP), SI 226 MOVL y+24(FP), BP 227 MOVL z_len+4(FP), BX 228 LEAL (DI)(BX*4), DI 229 LEAL (SI)(BX*4), SI 230 NEGL BX // i = -n 231 MOVL $0, CX // c = 0 232 JMP E6 233 234 L6: MOVL (SI)(BX*4), AX 235 MULL BP 236 ADDL CX, AX 237 ADCL $0, DX 238 ADDL AX, (DI)(BX*4) 239 ADCL $0, DX 240 MOVL DX, CX 241 ADDL $1, BX // i++ 242 243 E6: CMPL BX, $0 // i < 0 244 JL L6 245 246 MOVL CX, c+28(FP) 247 RET 248 249 250 // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word) 251 TEXT ·divWVW(SB),NOSPLIT,$0 252 MOVL z+0(FP), DI 253 MOVL xn+12(FP), DX // r = xn 254 MOVL x+16(FP), SI 255 MOVL y+28(FP), CX 256 MOVL z_len+4(FP), BX // i = z 257 JMP E7 258 259 L7: MOVL (SI)(BX*4), AX 260 DIVL CX 261 MOVL AX, (DI)(BX*4) 262 263 E7: SUBL $1, BX // i-- 264 JGE L7 // i >= 0 265 266 MOVL DX, r+32(FP) 267 RET 268 269 // func bitLen(x Word) (n int) 270 TEXT ·bitLen(SB),NOSPLIT,$0 271 BSRL x+0(FP), AX 272 JZ Z1 273 INCL AX 274 MOVL AX, n+4(FP) 275 RET 276 277 Z1: MOVL $0, n+4(FP) 278 RET