github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/math/big/arith_386.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // This file provides fast assembly versions for the elementary
     8  // arithmetic operations on vectors implemented in arith.go.
     9  
    10  // func mulWW(x, y Word) (z1, z0 Word)
    11  TEXT ·mulWW(SB),NOSPLIT,$0
    12  	MOVL x+0(FP), AX
    13  	MULL y+4(FP)
    14  	MOVL DX, z1+8(FP)
    15  	MOVL AX, z0+12(FP)
    16  	RET
    17  
    18  
    19  // func divWW(x1, x0, y Word) (q, r Word)
    20  TEXT ·divWW(SB),NOSPLIT,$0
    21  	MOVL x1+0(FP), DX
    22  	MOVL x0+4(FP), AX
    23  	DIVL y+8(FP)
    24  	MOVL AX, q+12(FP)
    25  	MOVL DX, r+16(FP)
    26  	RET
    27  
    28  
    29  // func addVV(z, x, y []Word) (c Word)
    30  TEXT ·addVV(SB),NOSPLIT,$0
    31  	MOVL z+0(FP), DI
    32  	MOVL x+12(FP), SI
    33  	MOVL y+24(FP), CX
    34  	MOVL z_len+4(FP), BP
    35  	MOVL $0, BX		// i = 0
    36  	MOVL $0, DX		// c = 0
    37  	JMP E1
    38  
    39  L1:	MOVL (SI)(BX*4), AX
    40  	ADDL DX, DX		// restore CF
    41  	ADCL (CX)(BX*4), AX
    42  	SBBL DX, DX		// save CF
    43  	MOVL AX, (DI)(BX*4)
    44  	ADDL $1, BX		// i++
    45  
    46  E1:	CMPL BX, BP		// i < n
    47  	JL L1
    48  
    49  	NEGL DX
    50  	MOVL DX, c+36(FP)
    51  	RET
    52  
    53  
    54  // func subVV(z, x, y []Word) (c Word)
    55  // (same as addVV except for SBBL instead of ADCL and label names)
    56  TEXT ·subVV(SB),NOSPLIT,$0
    57  	MOVL z+0(FP), DI
    58  	MOVL x+12(FP), SI
    59  	MOVL y+24(FP), CX
    60  	MOVL z_len+4(FP), BP
    61  	MOVL $0, BX		// i = 0
    62  	MOVL $0, DX		// c = 0
    63  	JMP E2
    64  
    65  L2:	MOVL (SI)(BX*4), AX
    66  	ADDL DX, DX		// restore CF
    67  	SBBL (CX)(BX*4), AX
    68  	SBBL DX, DX		// save CF
    69  	MOVL AX, (DI)(BX*4)
    70  	ADDL $1, BX		// i++
    71  
    72  E2:	CMPL BX, BP		// i < n
    73  	JL L2
    74  
    75  	NEGL DX
    76  	MOVL DX, c+36(FP)
    77  	RET
    78  
    79  
    80  // func addVW(z, x []Word, y Word) (c Word)
    81  TEXT ·addVW(SB),NOSPLIT,$0
    82  	MOVL z+0(FP), DI
    83  	MOVL x+12(FP), SI
    84  	MOVL y+24(FP), AX	// c = y
    85  	MOVL z_len+4(FP), BP
    86  	MOVL $0, BX		// i = 0
    87  	JMP E3
    88  
    89  L3:	ADDL (SI)(BX*4), AX
    90  	MOVL AX, (DI)(BX*4)
    91  	SBBL AX, AX		// save CF
    92  	NEGL AX
    93  	ADDL $1, BX		// i++
    94  
    95  E3:	CMPL BX, BP		// i < n
    96  	JL L3
    97  
    98  	MOVL AX, c+28(FP)
    99  	RET
   100  
   101  
   102  // func subVW(z, x []Word, y Word) (c Word)
   103  TEXT ·subVW(SB),NOSPLIT,$0
   104  	MOVL z+0(FP), DI
   105  	MOVL x+12(FP), SI
   106  	MOVL y+24(FP), AX	// c = y
   107  	MOVL z_len+4(FP), BP
   108  	MOVL $0, BX		// i = 0
   109  	JMP E4
   110  
   111  L4:	MOVL (SI)(BX*4), DX
   112  	SUBL AX, DX
   113  	MOVL DX, (DI)(BX*4)
   114  	SBBL AX, AX		// save CF
   115  	NEGL AX
   116  	ADDL $1, BX		// i++
   117  
   118  E4:	CMPL BX, BP		// i < n
   119  	JL L4
   120  
   121  	MOVL AX, c+28(FP)
   122  	RET
   123  
   124  
   125  // func shlVU(z, x []Word, s uint) (c Word)
   126  TEXT ·shlVU(SB),NOSPLIT,$0
   127  	MOVL z_len+4(FP), BX	// i = z
   128  	SUBL $1, BX		// i--
   129  	JL X8b			// i < 0	(n <= 0)
   130  
   131  	// n > 0
   132  	MOVL z+0(FP), DI
   133  	MOVL x+12(FP), SI
   134  	MOVL s+24(FP), CX
   135  	MOVL (SI)(BX*4), AX	// w1 = x[n-1]
   136  	MOVL $0, DX
   137  	SHLL CX, DX:AX		// w1>>ŝ
   138  	MOVL DX, c+28(FP)
   139  
   140  	CMPL BX, $0
   141  	JLE X8a			// i <= 0
   142  
   143  	// i > 0
   144  L8:	MOVL AX, DX		// w = w1
   145  	MOVL -4(SI)(BX*4), AX	// w1 = x[i-1]
   146  	SHLL CX, DX:AX		// w<<s | w1>>ŝ
   147  	MOVL DX, (DI)(BX*4)	// z[i] = w<<s | w1>>ŝ
   148  	SUBL $1, BX		// i--
   149  	JG L8			// i > 0
   150  
   151  	// i <= 0
   152  X8a:	SHLL CX, AX		// w1<<s
   153  	MOVL AX, (DI)		// z[0] = w1<<s
   154  	RET
   155  
   156  X8b:	MOVL $0, c+28(FP)
   157  	RET
   158  
   159  
   160  // func shrVU(z, x []Word, s uint) (c Word)
   161  TEXT ·shrVU(SB),NOSPLIT,$0
   162  	MOVL z_len+4(FP), BP
   163  	SUBL $1, BP		// n--
   164  	JL X9b			// n < 0	(n <= 0)
   165  
   166  	// n > 0
   167  	MOVL z+0(FP), DI
   168  	MOVL x+12(FP), SI
   169  	MOVL s+24(FP), CX
   170  	MOVL (SI), AX		// w1 = x[0]
   171  	MOVL $0, DX
   172  	SHRL CX, DX:AX		// w1<<ŝ
   173  	MOVL DX, c+28(FP)
   174  
   175  	MOVL $0, BX		// i = 0
   176  	JMP E9
   177  
   178  	// i < n-1
   179  L9:	MOVL AX, DX		// w = w1
   180  	MOVL 4(SI)(BX*4), AX	// w1 = x[i+1]
   181  	SHRL CX, DX:AX		// w>>s | w1<<ŝ
   182  	MOVL DX, (DI)(BX*4)	// z[i] = w>>s | w1<<ŝ
   183  	ADDL $1, BX		// i++
   184  	
   185  E9:	CMPL BX, BP
   186  	JL L9			// i < n-1
   187  
   188  	// i >= n-1
   189  X9a:	SHRL CX, AX		// w1>>s
   190  	MOVL AX, (DI)(BP*4)	// z[n-1] = w1>>s
   191  	RET
   192  
   193  X9b:	MOVL $0, c+28(FP)
   194  	RET
   195  
   196  
   197  // func mulAddVWW(z, x []Word, y, r Word) (c Word)
   198  TEXT ·mulAddVWW(SB),NOSPLIT,$0
   199  	MOVL z+0(FP), DI
   200  	MOVL x+12(FP), SI
   201  	MOVL y+24(FP), BP
   202  	MOVL r+28(FP), CX	// c = r
   203  	MOVL z_len+4(FP), BX
   204  	LEAL (DI)(BX*4), DI
   205  	LEAL (SI)(BX*4), SI
   206  	NEGL BX			// i = -n
   207  	JMP E5
   208  
   209  L5:	MOVL (SI)(BX*4), AX
   210  	MULL BP
   211  	ADDL CX, AX
   212  	ADCL $0, DX
   213  	MOVL AX, (DI)(BX*4)
   214  	MOVL DX, CX
   215  	ADDL $1, BX		// i++
   216  
   217  E5:	CMPL BX, $0		// i < 0
   218  	JL L5
   219  
   220  	MOVL CX, c+32(FP)
   221  	RET
   222  
   223  
   224  // func addMulVVW(z, x []Word, y Word) (c Word)
   225  TEXT ·addMulVVW(SB),NOSPLIT,$0
   226  	MOVL z+0(FP), DI
   227  	MOVL x+12(FP), SI
   228  	MOVL y+24(FP), BP
   229  	MOVL z_len+4(FP), BX
   230  	LEAL (DI)(BX*4), DI
   231  	LEAL (SI)(BX*4), SI
   232  	NEGL BX			// i = -n
   233  	MOVL $0, CX		// c = 0
   234  	JMP E6
   235  
   236  L6:	MOVL (SI)(BX*4), AX
   237  	MULL BP
   238  	ADDL CX, AX
   239  	ADCL $0, DX
   240  	ADDL AX, (DI)(BX*4)
   241  	ADCL $0, DX
   242  	MOVL DX, CX
   243  	ADDL $1, BX		// i++
   244  
   245  E6:	CMPL BX, $0		// i < 0
   246  	JL L6
   247  
   248  	MOVL CX, c+28(FP)
   249  	RET
   250  
   251  
   252  // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
   253  TEXT ·divWVW(SB),NOSPLIT,$0
   254  	MOVL z+0(FP), DI
   255  	MOVL xn+12(FP), DX	// r = xn
   256  	MOVL x+16(FP), SI
   257  	MOVL y+28(FP), CX
   258  	MOVL z_len+4(FP), BX	// i = z
   259  	JMP E7
   260  
   261  L7:	MOVL (SI)(BX*4), AX
   262  	DIVL CX
   263  	MOVL AX, (DI)(BX*4)
   264  
   265  E7:	SUBL $1, BX		// i--
   266  	JGE L7			// i >= 0
   267  
   268  	MOVL DX, r+32(FP)
   269  	RET
   270  
   271  // func bitLen(x Word) (n int)
   272  TEXT ·bitLen(SB),NOSPLIT,$0
   273  	BSRL x+0(FP), AX
   274  	JZ Z1
   275  	INCL AX
   276  	MOVL AX, n+4(FP)
   277  	RET
   278  
   279  Z1:	MOVL $0, n+4(FP)
   280  	RET