github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/math/big/arith_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !math_big_pure_go 6 7 #include "textflag.h" 8 9 // This file provides fast assembly versions for the elementary 10 // arithmetic operations on vectors implemented in arith.go. 11 12 // func mulWW(x, y Word) (z1, z0 Word) 13 TEXT ·mulWW(SB),NOSPLIT,$0 14 MOVL x+0(FP), AX 15 MULL y+4(FP) 16 MOVL DX, z1+8(FP) 17 MOVL AX, z0+12(FP) 18 RET 19 20 21 // func addVV(z, x, y []Word) (c Word) 22 TEXT ·addVV(SB),NOSPLIT,$0 23 MOVL z+0(FP), DI 24 MOVL x+12(FP), SI 25 MOVL y+24(FP), CX 26 MOVL z_len+4(FP), BP 27 MOVL $0, BX // i = 0 28 MOVL $0, DX // c = 0 29 JMP E1 30 31 L1: MOVL (SI)(BX*4), AX 32 ADDL DX, DX // restore CF 33 ADCL (CX)(BX*4), AX 34 SBBL DX, DX // save CF 35 MOVL AX, (DI)(BX*4) 36 ADDL $1, BX // i++ 37 38 E1: CMPL BX, BP // i < n 39 JL L1 40 41 NEGL DX 42 MOVL DX, c+36(FP) 43 RET 44 45 46 // func subVV(z, x, y []Word) (c Word) 47 // (same as addVV except for SBBL instead of ADCL and label names) 48 TEXT ·subVV(SB),NOSPLIT,$0 49 MOVL z+0(FP), DI 50 MOVL x+12(FP), SI 51 MOVL y+24(FP), CX 52 MOVL z_len+4(FP), BP 53 MOVL $0, BX // i = 0 54 MOVL $0, DX // c = 0 55 JMP E2 56 57 L2: MOVL (SI)(BX*4), AX 58 ADDL DX, DX // restore CF 59 SBBL (CX)(BX*4), AX 60 SBBL DX, DX // save CF 61 MOVL AX, (DI)(BX*4) 62 ADDL $1, BX // i++ 63 64 E2: CMPL BX, BP // i < n 65 JL L2 66 67 NEGL DX 68 MOVL DX, c+36(FP) 69 RET 70 71 72 // func addVW(z, x []Word, y Word) (c Word) 73 TEXT ·addVW(SB),NOSPLIT,$0 74 MOVL z+0(FP), DI 75 MOVL x+12(FP), SI 76 MOVL y+24(FP), AX // c = y 77 MOVL z_len+4(FP), BP 78 MOVL $0, BX // i = 0 79 JMP E3 80 81 L3: ADDL (SI)(BX*4), AX 82 MOVL AX, (DI)(BX*4) 83 SBBL AX, AX // save CF 84 NEGL AX 85 ADDL $1, BX // i++ 86 87 E3: CMPL BX, BP // i < n 88 JL L3 89 90 MOVL AX, c+28(FP) 91 RET 92 93 94 // func subVW(z, x []Word, y Word) (c Word) 95 TEXT ·subVW(SB),NOSPLIT,$0 96 MOVL z+0(FP), DI 97 MOVL x+12(FP), SI 98 MOVL y+24(FP), AX // c = y 99 MOVL z_len+4(FP), BP 100 MOVL $0, BX // i = 0 101 JMP E4 102 103 L4: MOVL (SI)(BX*4), DX 104 SUBL AX, DX 105 MOVL DX, (DI)(BX*4) 106 SBBL AX, AX // save CF 107 NEGL AX 108 ADDL $1, BX // i++ 109 110 E4: CMPL BX, BP // i < n 111 JL L4 112 113 MOVL AX, c+28(FP) 114 RET 115 116 117 // func shlVU(z, x []Word, s uint) (c Word) 118 TEXT ·shlVU(SB),NOSPLIT,$0 119 MOVL z_len+4(FP), BX // i = z 120 SUBL $1, BX // i-- 121 JL X8b // i < 0 (n <= 0) 122 123 // n > 0 124 MOVL z+0(FP), DI 125 MOVL x+12(FP), SI 126 MOVL s+24(FP), CX 127 MOVL (SI)(BX*4), AX // w1 = x[n-1] 128 MOVL $0, DX 129 SHLL CX, AX, DX // w1>>ŝ 130 MOVL DX, c+28(FP) 131 132 CMPL BX, $0 133 JLE X8a // i <= 0 134 135 // i > 0 136 L8: MOVL AX, DX // w = w1 137 MOVL -4(SI)(BX*4), AX // w1 = x[i-1] 138 SHLL CX, AX, DX // w<<s | w1>>ŝ 139 MOVL DX, (DI)(BX*4) // z[i] = w<<s | w1>>ŝ 140 SUBL $1, BX // i-- 141 JG L8 // i > 0 142 143 // i <= 0 144 X8a: SHLL CX, AX // w1<<s 145 MOVL AX, (DI) // z[0] = w1<<s 146 RET 147 148 X8b: MOVL $0, c+28(FP) 149 RET 150 151 152 // func shrVU(z, x []Word, s uint) (c Word) 153 TEXT ·shrVU(SB),NOSPLIT,$0 154 MOVL z_len+4(FP), BP 155 SUBL $1, BP // n-- 156 JL X9b // n < 0 (n <= 0) 157 158 // n > 0 159 MOVL z+0(FP), DI 160 MOVL x+12(FP), SI 161 MOVL s+24(FP), CX 162 MOVL (SI), AX // w1 = x[0] 163 MOVL $0, DX 164 SHRL CX, AX, DX // w1<<ŝ 165 MOVL DX, c+28(FP) 166 167 MOVL $0, BX // i = 0 168 JMP E9 169 170 // i < n-1 171 L9: MOVL AX, DX // w = w1 172 MOVL 4(SI)(BX*4), AX // w1 = x[i+1] 173 SHRL CX, AX, DX // w>>s | w1<<ŝ 174 MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ 175 ADDL $1, BX // i++ 176 177 E9: CMPL BX, BP 178 JL L9 // i < n-1 179 180 // i >= n-1 181 X9a: SHRL CX, AX // w1>>s 182 MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s 183 RET 184 185 X9b: MOVL $0, c+28(FP) 186 RET 187 188 189 // func mulAddVWW(z, x []Word, y, r Word) (c Word) 190 TEXT ·mulAddVWW(SB),NOSPLIT,$0 191 MOVL z+0(FP), DI 192 MOVL x+12(FP), SI 193 MOVL y+24(FP), BP 194 MOVL r+28(FP), CX // c = r 195 MOVL z_len+4(FP), BX 196 LEAL (DI)(BX*4), DI 197 LEAL (SI)(BX*4), SI 198 NEGL BX // i = -n 199 JMP E5 200 201 L5: MOVL (SI)(BX*4), AX 202 MULL BP 203 ADDL CX, AX 204 ADCL $0, DX 205 MOVL AX, (DI)(BX*4) 206 MOVL DX, CX 207 ADDL $1, BX // i++ 208 209 E5: CMPL BX, $0 // i < 0 210 JL L5 211 212 MOVL CX, c+32(FP) 213 RET 214 215 216 // func addMulVVW(z, x []Word, y Word) (c Word) 217 TEXT ·addMulVVW(SB),NOSPLIT,$0 218 MOVL z+0(FP), DI 219 MOVL x+12(FP), SI 220 MOVL y+24(FP), BP 221 MOVL z_len+4(FP), BX 222 LEAL (DI)(BX*4), DI 223 LEAL (SI)(BX*4), SI 224 NEGL BX // i = -n 225 MOVL $0, CX // c = 0 226 JMP E6 227 228 L6: MOVL (SI)(BX*4), AX 229 MULL BP 230 ADDL CX, AX 231 ADCL $0, DX 232 ADDL AX, (DI)(BX*4) 233 ADCL $0, DX 234 MOVL DX, CX 235 ADDL $1, BX // i++ 236 237 E6: CMPL BX, $0 // i < 0 238 JL L6 239 240 MOVL CX, c+28(FP) 241 RET 242 243 244