github.com/yanyiwu/go@v0.0.0-20150106053140-03d6637dbb7f/src/math/big/arith_386.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // This file provides fast assembly versions for the elementary
     8  // arithmetic operations on vectors implemented in arith.go.
     9  
    10  // func mulWW(x, y Word) (z1, z0 Word)
    11  TEXT ·mulWW(SB),NOSPLIT,$0
    12  	MOVL x+0(FP), AX
    13  	MULL y+4(FP)
    14  	MOVL DX, z1+8(FP)
    15  	MOVL AX, z0+12(FP)
    16  	RET
    17  
    18  
    19  // func divWW(x1, x0, y Word) (q, r Word)
    20  TEXT ·divWW(SB),NOSPLIT,$0
    21  	MOVL x1+0(FP), DX
    22  	MOVL x0+4(FP), AX
    23  	DIVL y+8(FP)
    24  	MOVL AX, q+12(FP)
    25  	MOVL DX, r+16(FP)
    26  	RET
    27  
    28  
    29  // func addVV(z, x, y []Word) (c Word)
    30  TEXT ·addVV(SB),NOSPLIT,$0
    31  	MOVL z+0(FP), DI
    32  	MOVL x+12(FP), SI
    33  	MOVL y+24(FP), CX
    34  	MOVL z_len+4(FP), BP
    35  	MOVL $0, BX		// i = 0
    36  	MOVL $0, DX		// c = 0
    37  	JMP E1
    38  
    39  L1:	MOVL (SI)(BX*4), AX
    40  	RCRL $1, DX
    41  	ADCL (CX)(BX*4), AX
    42  	RCLL $1, DX
    43  	MOVL AX, (DI)(BX*4)
    44  	ADDL $1, BX		// i++
    45  
    46  E1:	CMPL BX, BP		// i < n
    47  	JL L1
    48  
    49  	MOVL DX, c+36(FP)
    50  	RET
    51  
    52  
    53  // func subVV(z, x, y []Word) (c Word)
    54  // (same as addVV except for SBBL instead of ADCL and label names)
    55  TEXT ·subVV(SB),NOSPLIT,$0
    56  	MOVL z+0(FP), DI
    57  	MOVL x+12(FP), SI
    58  	MOVL y+24(FP), CX
    59  	MOVL z_len+4(FP), BP
    60  	MOVL $0, BX		// i = 0
    61  	MOVL $0, DX		// c = 0
    62  	JMP E2
    63  
    64  L2:	MOVL (SI)(BX*4), AX
    65  	RCRL $1, DX
    66  	SBBL (CX)(BX*4), AX
    67  	RCLL $1, DX
    68  	MOVL AX, (DI)(BX*4)
    69  	ADDL $1, BX		// i++
    70  
    71  E2:	CMPL BX, BP		// i < n
    72  	JL L2
    73  
    74  	MOVL DX, c+36(FP)
    75  	RET
    76  
    77  
    78  // func addVW(z, x []Word, y Word) (c Word)
    79  TEXT ·addVW(SB),NOSPLIT,$0
    80  	MOVL z+0(FP), DI
    81  	MOVL x+12(FP), SI
    82  	MOVL y+24(FP), AX	// c = y
    83  	MOVL z_len+4(FP), BP
    84  	MOVL $0, BX		// i = 0
    85  	JMP E3
    86  
    87  L3:	ADDL (SI)(BX*4), AX
    88  	MOVL AX, (DI)(BX*4)
    89  	RCLL $1, AX
    90  	ANDL $1, AX
    91  	ADDL $1, BX		// i++
    92  
    93  E3:	CMPL BX, BP		// i < n
    94  	JL L3
    95  
    96  	MOVL AX, c+28(FP)
    97  	RET
    98  
    99  
   100  // func subVW(z, x []Word, y Word) (c Word)
   101  TEXT ·subVW(SB),NOSPLIT,$0
   102  	MOVL z+0(FP), DI
   103  	MOVL x+12(FP), SI
   104  	MOVL y+24(FP), AX	// c = y
   105  	MOVL z_len+4(FP), BP
   106  	MOVL $0, BX		// i = 0
   107  	JMP E4
   108  
   109  L4:	MOVL (SI)(BX*4), DX	// TODO(gri) is there a reverse SUBL?
   110  	SUBL AX, DX
   111  	MOVL DX, (DI)(BX*4)
   112  	RCLL $1, AX
   113  	ANDL $1, AX
   114  	ADDL $1, BX		// i++
   115  
   116  E4:	CMPL BX, BP		// i < n
   117  	JL L4
   118  
   119  	MOVL AX, c+28(FP)
   120  	RET
   121  
   122  
   123  // func shlVU(z, x []Word, s uint) (c Word)
   124  TEXT ·shlVU(SB),NOSPLIT,$0
   125  	MOVL z_len+4(FP), BX	// i = z
   126  	SUBL $1, BX		// i--
   127  	JL X8b			// i < 0	(n <= 0)
   128  
   129  	// n > 0
   130  	MOVL z+0(FP), DI
   131  	MOVL x+12(FP), SI
   132  	MOVL s+24(FP), CX
   133  	MOVL (SI)(BX*4), AX	// w1 = x[n-1]
   134  	MOVL $0, DX
   135  	SHLL CX, DX:AX		// w1>>ŝ
   136  	MOVL DX, c+28(FP)
   137  
   138  	CMPL BX, $0
   139  	JLE X8a			// i <= 0
   140  
   141  	// i > 0
   142  L8:	MOVL AX, DX		// w = w1
   143  	MOVL -4(SI)(BX*4), AX	// w1 = x[i-1]
   144  	SHLL CX, DX:AX		// w<<s | w1>>ŝ
   145  	MOVL DX, (DI)(BX*4)	// z[i] = w<<s | w1>>ŝ
   146  	SUBL $1, BX		// i--
   147  	JG L8			// i > 0
   148  
   149  	// i <= 0
   150  X8a:	SHLL CX, AX		// w1<<s
   151  	MOVL AX, (DI)		// z[0] = w1<<s
   152  	RET
   153  
   154  X8b:	MOVL $0, c+28(FP)
   155  	RET
   156  
   157  
   158  // func shrVU(z, x []Word, s uint) (c Word)
   159  TEXT ·shrVU(SB),NOSPLIT,$0
   160  	MOVL z_len+4(FP), BP
   161  	SUBL $1, BP		// n--
   162  	JL X9b			// n < 0	(n <= 0)
   163  
   164  	// n > 0
   165  	MOVL z+0(FP), DI
   166  	MOVL x+12(FP), SI
   167  	MOVL s+24(FP), CX
   168  	MOVL (SI), AX		// w1 = x[0]
   169  	MOVL $0, DX
   170  	SHRL CX, DX:AX		// w1<<ŝ
   171  	MOVL DX, c+28(FP)
   172  
   173  	MOVL $0, BX		// i = 0
   174  	JMP E9
   175  
   176  	// i < n-1
   177  L9:	MOVL AX, DX		// w = w1
   178  	MOVL 4(SI)(BX*4), AX	// w1 = x[i+1]
   179  	SHRL CX, DX:AX		// w>>s | w1<<ŝ
   180  	MOVL DX, (DI)(BX*4)	// z[i] = w>>s | w1<<ŝ
   181  	ADDL $1, BX		// i++
   182  	
   183  E9:	CMPL BX, BP
   184  	JL L9			// i < n-1
   185  
   186  	// i >= n-1
   187  X9a:	SHRL CX, AX		// w1>>s
   188  	MOVL AX, (DI)(BP*4)	// z[n-1] = w1>>s
   189  	RET
   190  
   191  X9b:	MOVL $0, c+28(FP)
   192  	RET
   193  
   194  
   195  // func mulAddVWW(z, x []Word, y, r Word) (c Word)
   196  TEXT ·mulAddVWW(SB),NOSPLIT,$0
   197  	MOVL z+0(FP), DI
   198  	MOVL x+12(FP), SI
   199  	MOVL y+24(FP), BP
   200  	MOVL r+28(FP), CX	// c = r
   201  	MOVL z_len+4(FP), BX
   202  	LEAL (DI)(BX*4), DI
   203  	LEAL (SI)(BX*4), SI
   204  	NEGL BX			// i = -n
   205  	JMP E5
   206  
   207  L5:	MOVL (SI)(BX*4), AX
   208  	MULL BP
   209  	ADDL CX, AX
   210  	ADCL $0, DX
   211  	MOVL AX, (DI)(BX*4)
   212  	MOVL DX, CX
   213  	ADDL $1, BX		// i++
   214  
   215  E5:	CMPL BX, $0		// i < 0
   216  	JL L5
   217  
   218  	MOVL CX, c+32(FP)
   219  	RET
   220  
   221  
   222  // func addMulVVW(z, x []Word, y Word) (c Word)
   223  TEXT ·addMulVVW(SB),NOSPLIT,$0
   224  	MOVL z+0(FP), DI
   225  	MOVL x+12(FP), SI
   226  	MOVL y+24(FP), BP
   227  	MOVL z_len+4(FP), BX
   228  	LEAL (DI)(BX*4), DI
   229  	LEAL (SI)(BX*4), SI
   230  	NEGL BX			// i = -n
   231  	MOVL $0, CX		// c = 0
   232  	JMP E6
   233  
   234  L6:	MOVL (SI)(BX*4), AX
   235  	MULL BP
   236  	ADDL CX, AX
   237  	ADCL $0, DX
   238  	ADDL AX, (DI)(BX*4)
   239  	ADCL $0, DX
   240  	MOVL DX, CX
   241  	ADDL $1, BX		// i++
   242  
   243  E6:	CMPL BX, $0		// i < 0
   244  	JL L6
   245  
   246  	MOVL CX, c+28(FP)
   247  	RET
   248  
   249  
   250  // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
   251  TEXT ·divWVW(SB),NOSPLIT,$0
   252  	MOVL z+0(FP), DI
   253  	MOVL xn+12(FP), DX	// r = xn
   254  	MOVL x+16(FP), SI
   255  	MOVL y+28(FP), CX
   256  	MOVL z_len+4(FP), BX	// i = z
   257  	JMP E7
   258  
   259  L7:	MOVL (SI)(BX*4), AX
   260  	DIVL CX
   261  	MOVL AX, (DI)(BX*4)
   262  
   263  E7:	SUBL $1, BX		// i--
   264  	JGE L7			// i >= 0
   265  
   266  	MOVL DX, r+32(FP)
   267  	RET
   268  
   269  // func bitLen(x Word) (n int)
   270  TEXT ·bitLen(SB),NOSPLIT,$0
   271  	BSRL x+0(FP), AX
   272  	JZ Z1
   273  	INCL AX
   274  	MOVL AX, n+4(FP)
   275  	RET
   276  
   277  Z1:	MOVL $0, n+4(FP)
   278  	RET