github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/math/big/arith_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file provides fast assembly versions for the elementary 6 // arithmetic operations on vectors implemented in arith.go. 7 8 // func mulWW(x, y Word) (z1, z0 Word) 9 TEXT ·mulWW(SB),7,$0 10 MOVL x+0(FP), AX 11 MULL y+4(FP) 12 MOVL DX, z1+8(FP) 13 MOVL AX, z0+12(FP) 14 RET 15 16 17 // func divWW(x1, x0, y Word) (q, r Word) 18 TEXT ·divWW(SB),7,$0 19 MOVL x1+0(FP), DX 20 MOVL x0+4(FP), AX 21 DIVL y+8(FP) 22 MOVL AX, q+12(FP) 23 MOVL DX, r+16(FP) 24 RET 25 26 27 // func addVV(z, x, y []Word) (c Word) 28 TEXT ·addVV(SB),7,$0 29 MOVL z+0(FP), DI 30 MOVL x+12(FP), SI 31 MOVL y+24(FP), CX 32 MOVL z_len+4(FP), BP 33 MOVL $0, BX // i = 0 34 MOVL $0, DX // c = 0 35 JMP E1 36 37 L1: MOVL (SI)(BX*4), AX 38 RCRL $1, DX 39 ADCL (CX)(BX*4), AX 40 RCLL $1, DX 41 MOVL AX, (DI)(BX*4) 42 ADDL $1, BX // i++ 43 44 E1: CMPL BX, BP // i < n 45 JL L1 46 47 MOVL DX, c+36(FP) 48 RET 49 50 51 // func subVV(z, x, y []Word) (c Word) 52 // (same as addVV except for SBBL instead of ADCL and label names) 53 TEXT ·subVV(SB),7,$0 54 MOVL z+0(FP), DI 55 MOVL x+12(FP), SI 56 MOVL y+24(FP), CX 57 MOVL z_len+4(FP), BP 58 MOVL $0, BX // i = 0 59 MOVL $0, DX // c = 0 60 JMP E2 61 62 L2: MOVL (SI)(BX*4), AX 63 RCRL $1, DX 64 SBBL (CX)(BX*4), AX 65 RCLL $1, DX 66 MOVL AX, (DI)(BX*4) 67 ADDL $1, BX // i++ 68 69 E2: CMPL BX, BP // i < n 70 JL L2 71 72 MOVL DX, c+36(FP) 73 RET 74 75 76 // func addVW(z, x []Word, y Word) (c Word) 77 TEXT ·addVW(SB),7,$0 78 MOVL z+0(FP), DI 79 MOVL x+12(FP), SI 80 MOVL y+24(FP), AX // c = y 81 MOVL z_len+4(FP), BP 82 MOVL $0, BX // i = 0 83 JMP E3 84 85 L3: ADDL (SI)(BX*4), AX 86 MOVL AX, (DI)(BX*4) 87 RCLL $1, AX 88 ANDL $1, AX 89 ADDL $1, BX // i++ 90 91 E3: CMPL BX, BP // i < n 92 JL L3 93 94 MOVL AX, c+28(FP) 95 RET 96 97 98 // func subVW(z, x []Word, y Word) (c Word) 99 TEXT ·subVW(SB),7,$0 100 MOVL z+0(FP), DI 101 MOVL x+12(FP), SI 102 MOVL y+24(FP), AX // c = y 103 MOVL z_len+4(FP), BP 104 MOVL $0, BX // i = 0 105 JMP E4 106 107 L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL? 108 SUBL AX, DX 109 MOVL DX, (DI)(BX*4) 110 RCLL $1, AX 111 ANDL $1, AX 112 ADDL $1, BX // i++ 113 114 E4: CMPL BX, BP // i < n 115 JL L4 116 117 MOVL AX, c+28(FP) 118 RET 119 120 121 // func shlVU(z, x []Word, s uint) (c Word) 122 TEXT ·shlVU(SB),7,$0 123 MOVL z_len+4(FP), BX // i = z 124 SUBL $1, BX // i-- 125 JL X8b // i < 0 (n <= 0) 126 127 // n > 0 128 MOVL z+0(FP), DI 129 MOVL x+12(FP), SI 130 MOVL s+24(FP), CX 131 MOVL (SI)(BX*4), AX // w1 = x[n-1] 132 MOVL $0, DX 133 SHLL CX, DX:AX // w1>>ŝ 134 MOVL DX, c+28(FP) 135 136 CMPL BX, $0 137 JLE X8a // i <= 0 138 139 // i > 0 140 L8: MOVL AX, DX // w = w1 141 MOVL -4(SI)(BX*4), AX // w1 = x[i-1] 142 SHLL CX, DX:AX // w<<s | w1>>ŝ 143 MOVL DX, (DI)(BX*4) // z[i] = w<<s | w1>>ŝ 144 SUBL $1, BX // i-- 145 JG L8 // i > 0 146 147 // i <= 0 148 X8a: SHLL CX, AX // w1<<s 149 MOVL AX, (DI) // z[0] = w1<<s 150 RET 151 152 X8b: MOVL $0, c+28(FP) 153 RET 154 155 156 // func shrVU(z, x []Word, s uint) (c Word) 157 TEXT ·shrVU(SB),7,$0 158 MOVL z_len+4(FP), BP 159 SUBL $1, BP // n-- 160 JL X9b // n < 0 (n <= 0) 161 162 // n > 0 163 MOVL z+0(FP), DI 164 MOVL x+12(FP), SI 165 MOVL s+24(FP), CX 166 MOVL (SI), AX // w1 = x[0] 167 MOVL $0, DX 168 SHRL CX, DX:AX // w1<<ŝ 169 MOVL DX, c+28(FP) 170 171 MOVL $0, BX // i = 0 172 JMP E9 173 174 // i < n-1 175 L9: MOVL AX, DX // w = w1 176 MOVL 4(SI)(BX*4), AX // w1 = x[i+1] 177 SHRL CX, DX:AX // w>>s | w1<<ŝ 178 MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ 179 ADDL $1, BX // i++ 180 181 E9: CMPL BX, BP 182 JL L9 // i < n-1 183 184 // i >= n-1 185 X9a: SHRL CX, AX // w1>>s 186 MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s 187 RET 188 189 X9b: MOVL $0, c+28(FP) 190 RET 191 192 193 // func mulAddVWW(z, x []Word, y, r Word) (c Word) 194 TEXT ·mulAddVWW(SB),7,$0 195 MOVL z+0(FP), DI 196 MOVL x+12(FP), SI 197 MOVL y+24(FP), BP 198 MOVL r+28(FP), CX // c = r 199 MOVL z_len+4(FP), BX 200 LEAL (DI)(BX*4), DI 201 LEAL (SI)(BX*4), SI 202 NEGL BX // i = -n 203 JMP E5 204 205 L5: MOVL (SI)(BX*4), AX 206 MULL BP 207 ADDL CX, AX 208 ADCL $0, DX 209 MOVL AX, (DI)(BX*4) 210 MOVL DX, CX 211 ADDL $1, BX // i++ 212 213 E5: CMPL BX, $0 // i < 0 214 JL L5 215 216 MOVL CX, c+32(FP) 217 RET 218 219 220 // func addMulVVW(z, x []Word, y Word) (c Word) 221 TEXT ·addMulVVW(SB),7,$0 222 MOVL z+0(FP), DI 223 MOVL x+12(FP), SI 224 MOVL y+24(FP), BP 225 MOVL z_len+4(FP), BX 226 LEAL (DI)(BX*4), DI 227 LEAL (SI)(BX*4), SI 228 NEGL BX // i = -n 229 MOVL $0, CX // c = 0 230 JMP E6 231 232 L6: MOVL (SI)(BX*4), AX 233 MULL BP 234 ADDL CX, AX 235 ADCL $0, DX 236 ADDL AX, (DI)(BX*4) 237 ADCL $0, DX 238 MOVL DX, CX 239 ADDL $1, BX // i++ 240 241 E6: CMPL BX, $0 // i < 0 242 JL L6 243 244 MOVL CX, c+28(FP) 245 RET 246 247 248 // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word) 249 TEXT ·divWVW(SB),7,$0 250 MOVL z+0(FP), DI 251 MOVL xn+12(FP), DX // r = xn 252 MOVL x+16(FP), SI 253 MOVL y+28(FP), CX 254 MOVL z_len+4(FP), BX // i = z 255 JMP E7 256 257 L7: MOVL (SI)(BX*4), AX 258 DIVL CX 259 MOVL AX, (DI)(BX*4) 260 261 E7: SUBL $1, BX // i-- 262 JGE L7 // i >= 0 263 264 MOVL DX, r+32(FP) 265 RET 266 267 // func bitLen(x Word) (n int) 268 TEXT ·bitLen(SB),7,$0 269 BSRL x+0(FP), AX 270 JZ Z1 271 INCL AX 272 MOVL AX, n+4(FP) 273 RET 274 275 Z1: MOVL $0, n+4(FP) 276 RET