github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/math/big/arith_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build !math_big_pure_go 6 // +build !math_big_pure_go 7 8 #include "textflag.h" 9 10 // This file provides fast assembly versions for the elementary 11 // arithmetic operations on vectors implemented in arith.go. 12 13 // func addVV(z, x, y []Word) (c Word) 14 TEXT ·addVV(SB),NOSPLIT,$0 15 MOVL z+0(FP), DI 16 MOVL x+12(FP), SI 17 MOVL y+24(FP), CX 18 MOVL z_len+4(FP), BP 19 MOVL $0, BX // i = 0 20 MOVL $0, DX // c = 0 21 JMP E1 22 23 L1: MOVL (SI)(BX*4), AX 24 ADDL DX, DX // restore CF 25 ADCL (CX)(BX*4), AX 26 SBBL DX, DX // save CF 27 MOVL AX, (DI)(BX*4) 28 ADDL $1, BX // i++ 29 30 E1: CMPL BX, BP // i < n 31 JL L1 32 33 NEGL DX 34 MOVL DX, c+36(FP) 35 RET 36 37 38 // func subVV(z, x, y []Word) (c Word) 39 // (same as addVV except for SBBL instead of ADCL and label names) 40 TEXT ·subVV(SB),NOSPLIT,$0 41 MOVL z+0(FP), DI 42 MOVL x+12(FP), SI 43 MOVL y+24(FP), CX 44 MOVL z_len+4(FP), BP 45 MOVL $0, BX // i = 0 46 MOVL $0, DX // c = 0 47 JMP E2 48 49 L2: MOVL (SI)(BX*4), AX 50 ADDL DX, DX // restore CF 51 SBBL (CX)(BX*4), AX 52 SBBL DX, DX // save CF 53 MOVL AX, (DI)(BX*4) 54 ADDL $1, BX // i++ 55 56 E2: CMPL BX, BP // i < n 57 JL L2 58 59 NEGL DX 60 MOVL DX, c+36(FP) 61 RET 62 63 64 // func addVW(z, x []Word, y Word) (c Word) 65 TEXT ·addVW(SB),NOSPLIT,$0 66 MOVL z+0(FP), DI 67 MOVL x+12(FP), SI 68 MOVL y+24(FP), AX // c = y 69 MOVL z_len+4(FP), BP 70 MOVL $0, BX // i = 0 71 JMP E3 72 73 L3: ADDL (SI)(BX*4), AX 74 MOVL AX, (DI)(BX*4) 75 SBBL AX, AX // save CF 76 NEGL AX 77 ADDL $1, BX // i++ 78 79 E3: CMPL BX, BP // i < n 80 JL L3 81 82 MOVL AX, c+28(FP) 83 RET 84 85 86 // func subVW(z, x []Word, y Word) (c Word) 87 TEXT ·subVW(SB),NOSPLIT,$0 88 MOVL z+0(FP), DI 89 MOVL x+12(FP), SI 90 MOVL y+24(FP), AX // c = y 91 MOVL z_len+4(FP), BP 92 MOVL $0, BX // i = 0 93 JMP E4 94 95 L4: MOVL (SI)(BX*4), DX 96 SUBL AX, DX 97 MOVL DX, (DI)(BX*4) 98 SBBL AX, AX // save CF 99 NEGL AX 100 ADDL $1, BX // i++ 101 102 E4: CMPL BX, BP // i < n 103 JL L4 104 105 MOVL AX, c+28(FP) 106 RET 107 108 109 // func shlVU(z, x []Word, s uint) (c Word) 110 TEXT ·shlVU(SB),NOSPLIT,$0 111 MOVL z_len+4(FP), BX // i = z 112 SUBL $1, BX // i-- 113 JL X8b // i < 0 (n <= 0) 114 115 // n > 0 116 MOVL z+0(FP), DI 117 MOVL x+12(FP), SI 118 MOVL s+24(FP), CX 119 MOVL (SI)(BX*4), AX // w1 = x[n-1] 120 MOVL $0, DX 121 SHLL CX, AX, DX // w1>>ŝ 122 MOVL DX, c+28(FP) 123 124 CMPL BX, $0 125 JLE X8a // i <= 0 126 127 // i > 0 128 L8: MOVL AX, DX // w = w1 129 MOVL -4(SI)(BX*4), AX // w1 = x[i-1] 130 SHLL CX, AX, DX // w<<s | w1>>ŝ 131 MOVL DX, (DI)(BX*4) // z[i] = w<<s | w1>>ŝ 132 SUBL $1, BX // i-- 133 JG L8 // i > 0 134 135 // i <= 0 136 X8a: SHLL CX, AX // w1<<s 137 MOVL AX, (DI) // z[0] = w1<<s 138 RET 139 140 X8b: MOVL $0, c+28(FP) 141 RET 142 143 144 // func shrVU(z, x []Word, s uint) (c Word) 145 TEXT ·shrVU(SB),NOSPLIT,$0 146 MOVL z_len+4(FP), BP 147 SUBL $1, BP // n-- 148 JL X9b // n < 0 (n <= 0) 149 150 // n > 0 151 MOVL z+0(FP), DI 152 MOVL x+12(FP), SI 153 MOVL s+24(FP), CX 154 MOVL (SI), AX // w1 = x[0] 155 MOVL $0, DX 156 SHRL CX, AX, DX // w1<<ŝ 157 MOVL DX, c+28(FP) 158 159 MOVL $0, BX // i = 0 160 JMP E9 161 162 // i < n-1 163 L9: MOVL AX, DX // w = w1 164 MOVL 4(SI)(BX*4), AX // w1 = x[i+1] 165 SHRL CX, AX, DX // w>>s | w1<<ŝ 166 MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ 167 ADDL $1, BX // i++ 168 169 E9: CMPL BX, BP 170 JL L9 // i < n-1 171 172 // i >= n-1 173 X9a: SHRL CX, AX // w1>>s 174 MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s 175 RET 176 177 X9b: MOVL $0, c+28(FP) 178 RET 179 180 181 // func mulAddVWW(z, x []Word, y, r Word) (c Word) 182 TEXT ·mulAddVWW(SB),NOSPLIT,$0 183 MOVL z+0(FP), DI 184 MOVL x+12(FP), SI 185 MOVL y+24(FP), BP 186 MOVL r+28(FP), CX // c = r 187 MOVL z_len+4(FP), BX 188 LEAL (DI)(BX*4), DI 189 LEAL (SI)(BX*4), SI 190 NEGL BX // i = -n 191 JMP E5 192 193 L5: MOVL (SI)(BX*4), AX 194 MULL BP 195 ADDL CX, AX 196 ADCL $0, DX 197 MOVL AX, (DI)(BX*4) 198 MOVL DX, CX 199 ADDL $1, BX // i++ 200 201 E5: CMPL BX, $0 // i < 0 202 JL L5 203 204 MOVL CX, c+32(FP) 205 RET 206 207 208 // func addMulVVW(z, x []Word, y Word) (c Word) 209 TEXT ·addMulVVW(SB),NOSPLIT,$0 210 MOVL z+0(FP), DI 211 MOVL x+12(FP), SI 212 MOVL y+24(FP), BP 213 MOVL z_len+4(FP), BX 214 LEAL (DI)(BX*4), DI 215 LEAL (SI)(BX*4), SI 216 NEGL BX // i = -n 217 MOVL $0, CX // c = 0 218 JMP E6 219 220 L6: MOVL (SI)(BX*4), AX 221 MULL BP 222 ADDL CX, AX 223 ADCL $0, DX 224 ADDL AX, (DI)(BX*4) 225 ADCL $0, DX 226 MOVL DX, CX 227 ADDL $1, BX // i++ 228 229 E6: CMPL BX, $0 // i < 0 230 JL L6 231 232 MOVL CX, c+28(FP) 233 RET 234 235 236