github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/math/big/arith_386.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !math_big_pure_go
     6  
     7  #include "textflag.h"
     8  
     9  // This file provides fast assembly versions for the elementary
    10  // arithmetic operations on vectors implemented in arith.go.
    11  
    12  // func mulWW(x, y Word) (z1, z0 Word)
    13  TEXT ·mulWW(SB),NOSPLIT,$0
    14  	MOVL x+0(FP), AX
    15  	MULL y+4(FP)
    16  	MOVL DX, z1+8(FP)
    17  	MOVL AX, z0+12(FP)
    18  	RET
    19  
    20  
    21  // func addVV(z, x, y []Word) (c Word)
    22  TEXT ·addVV(SB),NOSPLIT,$0
    23  	MOVL z+0(FP), DI
    24  	MOVL x+12(FP), SI
    25  	MOVL y+24(FP), CX
    26  	MOVL z_len+4(FP), BP
    27  	MOVL $0, BX		// i = 0
    28  	MOVL $0, DX		// c = 0
    29  	JMP E1
    30  
    31  L1:	MOVL (SI)(BX*4), AX
    32  	ADDL DX, DX		// restore CF
    33  	ADCL (CX)(BX*4), AX
    34  	SBBL DX, DX		// save CF
    35  	MOVL AX, (DI)(BX*4)
    36  	ADDL $1, BX		// i++
    37  
    38  E1:	CMPL BX, BP		// i < n
    39  	JL L1
    40  
    41  	NEGL DX
    42  	MOVL DX, c+36(FP)
    43  	RET
    44  
    45  
    46  // func subVV(z, x, y []Word) (c Word)
    47  // (same as addVV except for SBBL instead of ADCL and label names)
    48  TEXT ·subVV(SB),NOSPLIT,$0
    49  	MOVL z+0(FP), DI
    50  	MOVL x+12(FP), SI
    51  	MOVL y+24(FP), CX
    52  	MOVL z_len+4(FP), BP
    53  	MOVL $0, BX		// i = 0
    54  	MOVL $0, DX		// c = 0
    55  	JMP E2
    56  
    57  L2:	MOVL (SI)(BX*4), AX
    58  	ADDL DX, DX		// restore CF
    59  	SBBL (CX)(BX*4), AX
    60  	SBBL DX, DX		// save CF
    61  	MOVL AX, (DI)(BX*4)
    62  	ADDL $1, BX		// i++
    63  
    64  E2:	CMPL BX, BP		// i < n
    65  	JL L2
    66  
    67  	NEGL DX
    68  	MOVL DX, c+36(FP)
    69  	RET
    70  
    71  
    72  // func addVW(z, x []Word, y Word) (c Word)
    73  TEXT ·addVW(SB),NOSPLIT,$0
    74  	MOVL z+0(FP), DI
    75  	MOVL x+12(FP), SI
    76  	MOVL y+24(FP), AX	// c = y
    77  	MOVL z_len+4(FP), BP
    78  	MOVL $0, BX		// i = 0
    79  	JMP E3
    80  
    81  L3:	ADDL (SI)(BX*4), AX
    82  	MOVL AX, (DI)(BX*4)
    83  	SBBL AX, AX		// save CF
    84  	NEGL AX
    85  	ADDL $1, BX		// i++
    86  
    87  E3:	CMPL BX, BP		// i < n
    88  	JL L3
    89  
    90  	MOVL AX, c+28(FP)
    91  	RET
    92  
    93  
    94  // func subVW(z, x []Word, y Word) (c Word)
    95  TEXT ·subVW(SB),NOSPLIT,$0
    96  	MOVL z+0(FP), DI
    97  	MOVL x+12(FP), SI
    98  	MOVL y+24(FP), AX	// c = y
    99  	MOVL z_len+4(FP), BP
   100  	MOVL $0, BX		// i = 0
   101  	JMP E4
   102  
   103  L4:	MOVL (SI)(BX*4), DX
   104  	SUBL AX, DX
   105  	MOVL DX, (DI)(BX*4)
   106  	SBBL AX, AX		// save CF
   107  	NEGL AX
   108  	ADDL $1, BX		// i++
   109  
   110  E4:	CMPL BX, BP		// i < n
   111  	JL L4
   112  
   113  	MOVL AX, c+28(FP)
   114  	RET
   115  
   116  
   117  // func shlVU(z, x []Word, s uint) (c Word)
   118  TEXT ·shlVU(SB),NOSPLIT,$0
   119  	MOVL z_len+4(FP), BX	// i = z
   120  	SUBL $1, BX		// i--
   121  	JL X8b			// i < 0	(n <= 0)
   122  
   123  	// n > 0
   124  	MOVL z+0(FP), DI
   125  	MOVL x+12(FP), SI
   126  	MOVL s+24(FP), CX
   127  	MOVL (SI)(BX*4), AX	// w1 = x[n-1]
   128  	MOVL $0, DX
   129  	SHLL CX, AX, DX		// w1>>ŝ
   130  	MOVL DX, c+28(FP)
   131  
   132  	CMPL BX, $0
   133  	JLE X8a			// i <= 0
   134  
   135  	// i > 0
   136  L8:	MOVL AX, DX		// w = w1
   137  	MOVL -4(SI)(BX*4), AX	// w1 = x[i-1]
   138  	SHLL CX, AX, DX		// w<<s | w1>>ŝ
   139  	MOVL DX, (DI)(BX*4)	// z[i] = w<<s | w1>>ŝ
   140  	SUBL $1, BX		// i--
   141  	JG L8			// i > 0
   142  
   143  	// i <= 0
   144  X8a:	SHLL CX, AX		// w1<<s
   145  	MOVL AX, (DI)		// z[0] = w1<<s
   146  	RET
   147  
   148  X8b:	MOVL $0, c+28(FP)
   149  	RET
   150  
   151  
   152  // func shrVU(z, x []Word, s uint) (c Word)
   153  TEXT ·shrVU(SB),NOSPLIT,$0
   154  	MOVL z_len+4(FP), BP
   155  	SUBL $1, BP		// n--
   156  	JL X9b			// n < 0	(n <= 0)
   157  
   158  	// n > 0
   159  	MOVL z+0(FP), DI
   160  	MOVL x+12(FP), SI
   161  	MOVL s+24(FP), CX
   162  	MOVL (SI), AX		// w1 = x[0]
   163  	MOVL $0, DX
   164  	SHRL CX, AX, DX		// w1<<ŝ
   165  	MOVL DX, c+28(FP)
   166  
   167  	MOVL $0, BX		// i = 0
   168  	JMP E9
   169  
   170  	// i < n-1
   171  L9:	MOVL AX, DX		// w = w1
   172  	MOVL 4(SI)(BX*4), AX	// w1 = x[i+1]
   173  	SHRL CX, AX, DX		// w>>s | w1<<ŝ
   174  	MOVL DX, (DI)(BX*4)	// z[i] = w>>s | w1<<ŝ
   175  	ADDL $1, BX		// i++
   176  
   177  E9:	CMPL BX, BP
   178  	JL L9			// i < n-1
   179  
   180  	// i >= n-1
   181  X9a:	SHRL CX, AX		// w1>>s
   182  	MOVL AX, (DI)(BP*4)	// z[n-1] = w1>>s
   183  	RET
   184  
   185  X9b:	MOVL $0, c+28(FP)
   186  	RET
   187  
   188  
   189  // func mulAddVWW(z, x []Word, y, r Word) (c Word)
   190  TEXT ·mulAddVWW(SB),NOSPLIT,$0
   191  	MOVL z+0(FP), DI
   192  	MOVL x+12(FP), SI
   193  	MOVL y+24(FP), BP
   194  	MOVL r+28(FP), CX	// c = r
   195  	MOVL z_len+4(FP), BX
   196  	LEAL (DI)(BX*4), DI
   197  	LEAL (SI)(BX*4), SI
   198  	NEGL BX			// i = -n
   199  	JMP E5
   200  
   201  L5:	MOVL (SI)(BX*4), AX
   202  	MULL BP
   203  	ADDL CX, AX
   204  	ADCL $0, DX
   205  	MOVL AX, (DI)(BX*4)
   206  	MOVL DX, CX
   207  	ADDL $1, BX		// i++
   208  
   209  E5:	CMPL BX, $0		// i < 0
   210  	JL L5
   211  
   212  	MOVL CX, c+32(FP)
   213  	RET
   214  
   215  
   216  // func addMulVVW(z, x []Word, y Word) (c Word)
   217  TEXT ·addMulVVW(SB),NOSPLIT,$0
   218  	MOVL z+0(FP), DI
   219  	MOVL x+12(FP), SI
   220  	MOVL y+24(FP), BP
   221  	MOVL z_len+4(FP), BX
   222  	LEAL (DI)(BX*4), DI
   223  	LEAL (SI)(BX*4), SI
   224  	NEGL BX			// i = -n
   225  	MOVL $0, CX		// c = 0
   226  	JMP E6
   227  
   228  L6:	MOVL (SI)(BX*4), AX
   229  	MULL BP
   230  	ADDL CX, AX
   231  	ADCL $0, DX
   232  	ADDL AX, (DI)(BX*4)
   233  	ADCL $0, DX
   234  	MOVL DX, CX
   235  	ADDL $1, BX		// i++
   236  
   237  E6:	CMPL BX, $0		// i < 0
   238  	JL L6
   239  
   240  	MOVL CX, c+28(FP)
   241  	RET
   242  
   243  
   244