github.com/likebike/go--@v0.0.0-20190911215757-0bd925d16e96/go/src/math/big/arith_386.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !math_big_pure_go
     6  
     7  #include "textflag.h"
     8  
     9  // This file provides fast assembly versions for the elementary
    10  // arithmetic operations on vectors implemented in arith.go.
    11  
    12  // func mulWW(x, y Word) (z1, z0 Word)
    13  TEXT ·mulWW(SB),NOSPLIT,$0
    14  	MOVL x+0(FP), AX
    15  	MULL y+4(FP)
    16  	MOVL DX, z1+8(FP)
    17  	MOVL AX, z0+12(FP)
    18  	RET
    19  
    20  
    21  // func divWW(x1, x0, y Word) (q, r Word)
    22  TEXT ·divWW(SB),NOSPLIT,$0
    23  	MOVL x1+0(FP), DX
    24  	MOVL x0+4(FP), AX
    25  	DIVL y+8(FP)
    26  	MOVL AX, q+12(FP)
    27  	MOVL DX, r+16(FP)
    28  	RET
    29  
    30  
    31  // func addVV(z, x, y []Word) (c Word)
    32  TEXT ·addVV(SB),NOSPLIT,$0
    33  	MOVL z+0(FP), DI
    34  	MOVL x+12(FP), SI
    35  	MOVL y+24(FP), CX
    36  	MOVL z_len+4(FP), BP
    37  	MOVL $0, BX		// i = 0
    38  	MOVL $0, DX		// c = 0
    39  	JMP E1
    40  
    41  L1:	MOVL (SI)(BX*4), AX
    42  	ADDL DX, DX		// restore CF
    43  	ADCL (CX)(BX*4), AX
    44  	SBBL DX, DX		// save CF
    45  	MOVL AX, (DI)(BX*4)
    46  	ADDL $1, BX		// i++
    47  
    48  E1:	CMPL BX, BP		// i < n
    49  	JL L1
    50  
    51  	NEGL DX
    52  	MOVL DX, c+36(FP)
    53  	RET
    54  
    55  
    56  // func subVV(z, x, y []Word) (c Word)
    57  // (same as addVV except for SBBL instead of ADCL and label names)
    58  TEXT ·subVV(SB),NOSPLIT,$0
    59  	MOVL z+0(FP), DI
    60  	MOVL x+12(FP), SI
    61  	MOVL y+24(FP), CX
    62  	MOVL z_len+4(FP), BP
    63  	MOVL $0, BX		// i = 0
    64  	MOVL $0, DX		// c = 0
    65  	JMP E2
    66  
    67  L2:	MOVL (SI)(BX*4), AX
    68  	ADDL DX, DX		// restore CF
    69  	SBBL (CX)(BX*4), AX
    70  	SBBL DX, DX		// save CF
    71  	MOVL AX, (DI)(BX*4)
    72  	ADDL $1, BX		// i++
    73  
    74  E2:	CMPL BX, BP		// i < n
    75  	JL L2
    76  
    77  	NEGL DX
    78  	MOVL DX, c+36(FP)
    79  	RET
    80  
    81  
    82  // func addVW(z, x []Word, y Word) (c Word)
    83  TEXT ·addVW(SB),NOSPLIT,$0
    84  	MOVL z+0(FP), DI
    85  	MOVL x+12(FP), SI
    86  	MOVL y+24(FP), AX	// c = y
    87  	MOVL z_len+4(FP), BP
    88  	MOVL $0, BX		// i = 0
    89  	JMP E3
    90  
    91  L3:	ADDL (SI)(BX*4), AX
    92  	MOVL AX, (DI)(BX*4)
    93  	SBBL AX, AX		// save CF
    94  	NEGL AX
    95  	ADDL $1, BX		// i++
    96  
    97  E3:	CMPL BX, BP		// i < n
    98  	JL L3
    99  
   100  	MOVL AX, c+28(FP)
   101  	RET
   102  
   103  
   104  // func subVW(z, x []Word, y Word) (c Word)
   105  TEXT ·subVW(SB),NOSPLIT,$0
   106  	MOVL z+0(FP), DI
   107  	MOVL x+12(FP), SI
   108  	MOVL y+24(FP), AX	// c = y
   109  	MOVL z_len+4(FP), BP
   110  	MOVL $0, BX		// i = 0
   111  	JMP E4
   112  
   113  L4:	MOVL (SI)(BX*4), DX
   114  	SUBL AX, DX
   115  	MOVL DX, (DI)(BX*4)
   116  	SBBL AX, AX		// save CF
   117  	NEGL AX
   118  	ADDL $1, BX		// i++
   119  
   120  E4:	CMPL BX, BP		// i < n
   121  	JL L4
   122  
   123  	MOVL AX, c+28(FP)
   124  	RET
   125  
   126  
   127  // func shlVU(z, x []Word, s uint) (c Word)
   128  TEXT ·shlVU(SB),NOSPLIT,$0
   129  	MOVL z_len+4(FP), BX	// i = z
   130  	SUBL $1, BX		// i--
   131  	JL X8b			// i < 0	(n <= 0)
   132  
   133  	// n > 0
   134  	MOVL z+0(FP), DI
   135  	MOVL x+12(FP), SI
   136  	MOVL s+24(FP), CX
   137  	MOVL (SI)(BX*4), AX	// w1 = x[n-1]
   138  	MOVL $0, DX
   139  	SHLL CX, DX:AX		// w1>>ŝ
   140  	MOVL DX, c+28(FP)
   141  
   142  	CMPL BX, $0
   143  	JLE X8a			// i <= 0
   144  
   145  	// i > 0
   146  L8:	MOVL AX, DX		// w = w1
   147  	MOVL -4(SI)(BX*4), AX	// w1 = x[i-1]
   148  	SHLL CX, DX:AX		// w<<s | w1>>ŝ
   149  	MOVL DX, (DI)(BX*4)	// z[i] = w<<s | w1>>ŝ
   150  	SUBL $1, BX		// i--
   151  	JG L8			// i > 0
   152  
   153  	// i <= 0
   154  X8a:	SHLL CX, AX		// w1<<s
   155  	MOVL AX, (DI)		// z[0] = w1<<s
   156  	RET
   157  
   158  X8b:	MOVL $0, c+28(FP)
   159  	RET
   160  
   161  
   162  // func shrVU(z, x []Word, s uint) (c Word)
   163  TEXT ·shrVU(SB),NOSPLIT,$0
   164  	MOVL z_len+4(FP), BP
   165  	SUBL $1, BP		// n--
   166  	JL X9b			// n < 0	(n <= 0)
   167  
   168  	// n > 0
   169  	MOVL z+0(FP), DI
   170  	MOVL x+12(FP), SI
   171  	MOVL s+24(FP), CX
   172  	MOVL (SI), AX		// w1 = x[0]
   173  	MOVL $0, DX
   174  	SHRL CX, DX:AX		// w1<<ŝ
   175  	MOVL DX, c+28(FP)
   176  
   177  	MOVL $0, BX		// i = 0
   178  	JMP E9
   179  
   180  	// i < n-1
   181  L9:	MOVL AX, DX		// w = w1
   182  	MOVL 4(SI)(BX*4), AX	// w1 = x[i+1]
   183  	SHRL CX, DX:AX		// w>>s | w1<<ŝ
   184  	MOVL DX, (DI)(BX*4)	// z[i] = w>>s | w1<<ŝ
   185  	ADDL $1, BX		// i++
   186  	
   187  E9:	CMPL BX, BP
   188  	JL L9			// i < n-1
   189  
   190  	// i >= n-1
   191  X9a:	SHRL CX, AX		// w1>>s
   192  	MOVL AX, (DI)(BP*4)	// z[n-1] = w1>>s
   193  	RET
   194  
   195  X9b:	MOVL $0, c+28(FP)
   196  	RET
   197  
   198  
   199  // func mulAddVWW(z, x []Word, y, r Word) (c Word)
   200  TEXT ·mulAddVWW(SB),NOSPLIT,$0
   201  	MOVL z+0(FP), DI
   202  	MOVL x+12(FP), SI
   203  	MOVL y+24(FP), BP
   204  	MOVL r+28(FP), CX	// c = r
   205  	MOVL z_len+4(FP), BX
   206  	LEAL (DI)(BX*4), DI
   207  	LEAL (SI)(BX*4), SI
   208  	NEGL BX			// i = -n
   209  	JMP E5
   210  
   211  L5:	MOVL (SI)(BX*4), AX
   212  	MULL BP
   213  	ADDL CX, AX
   214  	ADCL $0, DX
   215  	MOVL AX, (DI)(BX*4)
   216  	MOVL DX, CX
   217  	ADDL $1, BX		// i++
   218  
   219  E5:	CMPL BX, $0		// i < 0
   220  	JL L5
   221  
   222  	MOVL CX, c+32(FP)
   223  	RET
   224  
   225  
   226  // func addMulVVW(z, x []Word, y Word) (c Word)
   227  TEXT ·addMulVVW(SB),NOSPLIT,$0
   228  	MOVL z+0(FP), DI
   229  	MOVL x+12(FP), SI
   230  	MOVL y+24(FP), BP
   231  	MOVL z_len+4(FP), BX
   232  	LEAL (DI)(BX*4), DI
   233  	LEAL (SI)(BX*4), SI
   234  	NEGL BX			// i = -n
   235  	MOVL $0, CX		// c = 0
   236  	JMP E6
   237  
   238  L6:	MOVL (SI)(BX*4), AX
   239  	MULL BP
   240  	ADDL CX, AX
   241  	ADCL $0, DX
   242  	ADDL AX, (DI)(BX*4)
   243  	ADCL $0, DX
   244  	MOVL DX, CX
   245  	ADDL $1, BX		// i++
   246  
   247  E6:	CMPL BX, $0		// i < 0
   248  	JL L6
   249  
   250  	MOVL CX, c+28(FP)
   251  	RET
   252  
   253  
   254  // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
   255  TEXT ·divWVW(SB),NOSPLIT,$0
   256  	MOVL z+0(FP), DI
   257  	MOVL xn+12(FP), DX	// r = xn
   258  	MOVL x+16(FP), SI
   259  	MOVL y+28(FP), CX
   260  	MOVL z_len+4(FP), BX	// i = z
   261  	JMP E7
   262  
   263  L7:	MOVL (SI)(BX*4), AX
   264  	DIVL CX
   265  	MOVL AX, (DI)(BX*4)
   266  
   267  E7:	SUBL $1, BX		// i--
   268  	JGE L7			// i >= 0
   269  
   270  	MOVL DX, r+32(FP)
   271  	RET