github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/math/big/arith_386.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file provides fast assembly versions for the elementary
     6  // arithmetic operations on vectors implemented in arith.go.
     7  
     8  // func mulWW(x, y Word) (z1, z0 Word)
     9  TEXT ·mulWW(SB),7,$0
    10  	MOVL x+0(FP), AX
    11  	MULL y+4(FP)
    12  	MOVL DX, z1+8(FP)
    13  	MOVL AX, z0+12(FP)
    14  	RET
    15  
    16  
    17  // func divWW(x1, x0, y Word) (q, r Word)
    18  TEXT ·divWW(SB),7,$0
    19  	MOVL x1+0(FP), DX
    20  	MOVL x0+4(FP), AX
    21  	DIVL y+8(FP)
    22  	MOVL AX, q+12(FP)
    23  	MOVL DX, r+16(FP)
    24  	RET
    25  
    26  
    27  // func addVV(z, x, y []Word) (c Word)
    28  TEXT ·addVV(SB),7,$0
    29  	MOVL z+0(FP), DI
    30  	MOVL x+12(FP), SI
    31  	MOVL y+24(FP), CX
    32  	MOVL z_len+4(FP), BP
    33  	MOVL $0, BX		// i = 0
    34  	MOVL $0, DX		// c = 0
    35  	JMP E1
    36  
    37  L1:	MOVL (SI)(BX*4), AX
    38  	RCRL $1, DX
    39  	ADCL (CX)(BX*4), AX
    40  	RCLL $1, DX
    41  	MOVL AX, (DI)(BX*4)
    42  	ADDL $1, BX		// i++
    43  
    44  E1:	CMPL BX, BP		// i < n
    45  	JL L1
    46  
    47  	MOVL DX, c+36(FP)
    48  	RET
    49  
    50  
    51  // func subVV(z, x, y []Word) (c Word)
    52  // (same as addVV except for SBBL instead of ADCL and label names)
    53  TEXT ·subVV(SB),7,$0
    54  	MOVL z+0(FP), DI
    55  	MOVL x+12(FP), SI
    56  	MOVL y+24(FP), CX
    57  	MOVL z_len+4(FP), BP
    58  	MOVL $0, BX		// i = 0
    59  	MOVL $0, DX		// c = 0
    60  	JMP E2
    61  
    62  L2:	MOVL (SI)(BX*4), AX
    63  	RCRL $1, DX
    64  	SBBL (CX)(BX*4), AX
    65  	RCLL $1, DX
    66  	MOVL AX, (DI)(BX*4)
    67  	ADDL $1, BX		// i++
    68  
    69  E2:	CMPL BX, BP		// i < n
    70  	JL L2
    71  
    72  	MOVL DX, c+36(FP)
    73  	RET
    74  
    75  
    76  // func addVW(z, x []Word, y Word) (c Word)
    77  TEXT ·addVW(SB),7,$0
    78  	MOVL z+0(FP), DI
    79  	MOVL x+12(FP), SI
    80  	MOVL y+24(FP), AX	// c = y
    81  	MOVL z_len+4(FP), BP
    82  	MOVL $0, BX		// i = 0
    83  	JMP E3
    84  
    85  L3:	ADDL (SI)(BX*4), AX
    86  	MOVL AX, (DI)(BX*4)
    87  	RCLL $1, AX
    88  	ANDL $1, AX
    89  	ADDL $1, BX		// i++
    90  
    91  E3:	CMPL BX, BP		// i < n
    92  	JL L3
    93  
    94  	MOVL AX, c+28(FP)
    95  	RET
    96  
    97  
    98  // func subVW(z, x []Word, y Word) (c Word)
    99  TEXT ·subVW(SB),7,$0
   100  	MOVL z+0(FP), DI
   101  	MOVL x+12(FP), SI
   102  	MOVL y+24(FP), AX	// c = y
   103  	MOVL z_len+4(FP), BP
   104  	MOVL $0, BX		// i = 0
   105  	JMP E4
   106  
   107  L4:	MOVL (SI)(BX*4), DX	// TODO(gri) is there a reverse SUBL?
   108  	SUBL AX, DX
   109  	MOVL DX, (DI)(BX*4)
   110  	RCLL $1, AX
   111  	ANDL $1, AX
   112  	ADDL $1, BX		// i++
   113  
   114  E4:	CMPL BX, BP		// i < n
   115  	JL L4
   116  
   117  	MOVL AX, c+28(FP)
   118  	RET
   119  
   120  
   121  // func shlVU(z, x []Word, s uint) (c Word)
   122  TEXT ·shlVU(SB),7,$0
   123  	MOVL z_len+4(FP), BX	// i = z
   124  	SUBL $1, BX		// i--
   125  	JL X8b			// i < 0	(n <= 0)
   126  
   127  	// n > 0
   128  	MOVL z+0(FP), DI
   129  	MOVL x+12(FP), SI
   130  	MOVL s+24(FP), CX
   131  	MOVL (SI)(BX*4), AX	// w1 = x[n-1]
   132  	MOVL $0, DX
   133  	SHLL CX, DX:AX		// w1>>ŝ
   134  	MOVL DX, c+28(FP)
   135  
   136  	CMPL BX, $0
   137  	JLE X8a			// i <= 0
   138  
   139  	// i > 0
   140  L8:	MOVL AX, DX		// w = w1
   141  	MOVL -4(SI)(BX*4), AX	// w1 = x[i-1]
   142  	SHLL CX, DX:AX		// w<<s | w1>>ŝ
   143  	MOVL DX, (DI)(BX*4)	// z[i] = w<<s | w1>>ŝ
   144  	SUBL $1, BX		// i--
   145  	JG L8			// i > 0
   146  
   147  	// i <= 0
   148  X8a:	SHLL CX, AX		// w1<<s
   149  	MOVL AX, (DI)		// z[0] = w1<<s
   150  	RET
   151  
   152  X8b:	MOVL $0, c+28(FP)
   153  	RET
   154  
   155  
   156  // func shrVU(z, x []Word, s uint) (c Word)
   157  TEXT ·shrVU(SB),7,$0
   158  	MOVL z_len+4(FP), BP
   159  	SUBL $1, BP		// n--
   160  	JL X9b			// n < 0	(n <= 0)
   161  
   162  	// n > 0
   163  	MOVL z+0(FP), DI
   164  	MOVL x+12(FP), SI
   165  	MOVL s+24(FP), CX
   166  	MOVL (SI), AX		// w1 = x[0]
   167  	MOVL $0, DX
   168  	SHRL CX, DX:AX		// w1<<ŝ
   169  	MOVL DX, c+28(FP)
   170  
   171  	MOVL $0, BX		// i = 0
   172  	JMP E9
   173  
   174  	// i < n-1
   175  L9:	MOVL AX, DX		// w = w1
   176  	MOVL 4(SI)(BX*4), AX	// w1 = x[i+1]
   177  	SHRL CX, DX:AX		// w>>s | w1<<ŝ
   178  	MOVL DX, (DI)(BX*4)	// z[i] = w>>s | w1<<ŝ
   179  	ADDL $1, BX		// i++
   180  	
   181  E9:	CMPL BX, BP
   182  	JL L9			// i < n-1
   183  
   184  	// i >= n-1
   185  X9a:	SHRL CX, AX		// w1>>s
   186  	MOVL AX, (DI)(BP*4)	// z[n-1] = w1>>s
   187  	RET
   188  
   189  X9b:	MOVL $0, c+28(FP)
   190  	RET
   191  
   192  
   193  // func mulAddVWW(z, x []Word, y, r Word) (c Word)
   194  TEXT ·mulAddVWW(SB),7,$0
   195  	MOVL z+0(FP), DI
   196  	MOVL x+12(FP), SI
   197  	MOVL y+24(FP), BP
   198  	MOVL r+28(FP), CX	// c = r
   199  	MOVL z_len+4(FP), BX
   200  	LEAL (DI)(BX*4), DI
   201  	LEAL (SI)(BX*4), SI
   202  	NEGL BX			// i = -n
   203  	JMP E5
   204  
   205  L5:	MOVL (SI)(BX*4), AX
   206  	MULL BP
   207  	ADDL CX, AX
   208  	ADCL $0, DX
   209  	MOVL AX, (DI)(BX*4)
   210  	MOVL DX, CX
   211  	ADDL $1, BX		// i++
   212  
   213  E5:	CMPL BX, $0		// i < 0
   214  	JL L5
   215  
   216  	MOVL CX, c+32(FP)
   217  	RET
   218  
   219  
   220  // func addMulVVW(z, x []Word, y Word) (c Word)
   221  TEXT ·addMulVVW(SB),7,$0
   222  	MOVL z+0(FP), DI
   223  	MOVL x+12(FP), SI
   224  	MOVL y+24(FP), BP
   225  	MOVL z_len+4(FP), BX
   226  	LEAL (DI)(BX*4), DI
   227  	LEAL (SI)(BX*4), SI
   228  	NEGL BX			// i = -n
   229  	MOVL $0, CX		// c = 0
   230  	JMP E6
   231  
   232  L6:	MOVL (SI)(BX*4), AX
   233  	MULL BP
   234  	ADDL CX, AX
   235  	ADCL $0, DX
   236  	ADDL AX, (DI)(BX*4)
   237  	ADCL $0, DX
   238  	MOVL DX, CX
   239  	ADDL $1, BX		// i++
   240  
   241  E6:	CMPL BX, $0		// i < 0
   242  	JL L6
   243  
   244  	MOVL CX, c+28(FP)
   245  	RET
   246  
   247  
   248  // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
   249  TEXT ·divWVW(SB),7,$0
   250  	MOVL z+0(FP), DI
   251  	MOVL xn+12(FP), DX	// r = xn
   252  	MOVL x+16(FP), SI
   253  	MOVL y+28(FP), CX
   254  	MOVL z_len+4(FP), BX	// i = z
   255  	JMP E7
   256  
   257  L7:	MOVL (SI)(BX*4), AX
   258  	DIVL CX
   259  	MOVL AX, (DI)(BX*4)
   260  
   261  E7:	SUBL $1, BX		// i--
   262  	JGE L7			// i >= 0
   263  
   264  	MOVL DX, r+32(FP)
   265  	RET
   266  
   267  // func bitLen(x Word) (n int)
   268  TEXT ·bitLen(SB),7,$0
   269  	BSRL x+0(FP), AX
   270  	JZ Z1
   271  	INCL AX
   272  	MOVL AX, n+4(FP)
   273  	RET
   274  
   275  Z1:	MOVL $0, n+4(FP)
   276  	RET