github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/math/big/arith_386.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !math_big_pure_go
     6  // +build !math_big_pure_go
     7  
     8  #include "textflag.h"
     9  
    10  // This file provides fast assembly versions for the elementary
    11  // arithmetic operations on vectors implemented in arith.go.
    12  
    13  // func addVV(z, x, y []Word) (c Word)
    14  TEXT ·addVV(SB),NOSPLIT,$0
    15  	MOVL z+0(FP), DI
    16  	MOVL x+12(FP), SI
    17  	MOVL y+24(FP), CX
    18  	MOVL z_len+4(FP), BP
    19  	MOVL $0, BX		// i = 0
    20  	MOVL $0, DX		// c = 0
    21  	JMP E1
    22  
    23  L1:	MOVL (SI)(BX*4), AX
    24  	ADDL DX, DX		// restore CF
    25  	ADCL (CX)(BX*4), AX
    26  	SBBL DX, DX		// save CF
    27  	MOVL AX, (DI)(BX*4)
    28  	ADDL $1, BX		// i++
    29  
    30  E1:	CMPL BX, BP		// i < n
    31  	JL L1
    32  
    33  	NEGL DX
    34  	MOVL DX, c+36(FP)
    35  	RET
    36  
    37  
    38  // func subVV(z, x, y []Word) (c Word)
    39  // (same as addVV except for SBBL instead of ADCL and label names)
    40  TEXT ·subVV(SB),NOSPLIT,$0
    41  	MOVL z+0(FP), DI
    42  	MOVL x+12(FP), SI
    43  	MOVL y+24(FP), CX
    44  	MOVL z_len+4(FP), BP
    45  	MOVL $0, BX		// i = 0
    46  	MOVL $0, DX		// c = 0
    47  	JMP E2
    48  
    49  L2:	MOVL (SI)(BX*4), AX
    50  	ADDL DX, DX		// restore CF
    51  	SBBL (CX)(BX*4), AX
    52  	SBBL DX, DX		// save CF
    53  	MOVL AX, (DI)(BX*4)
    54  	ADDL $1, BX		// i++
    55  
    56  E2:	CMPL BX, BP		// i < n
    57  	JL L2
    58  
    59  	NEGL DX
    60  	MOVL DX, c+36(FP)
    61  	RET
    62  
    63  
    64  // func addVW(z, x []Word, y Word) (c Word)
    65  TEXT ·addVW(SB),NOSPLIT,$0
    66  	MOVL z+0(FP), DI
    67  	MOVL x+12(FP), SI
    68  	MOVL y+24(FP), AX	// c = y
    69  	MOVL z_len+4(FP), BP
    70  	MOVL $0, BX		// i = 0
    71  	JMP E3
    72  
    73  L3:	ADDL (SI)(BX*4), AX
    74  	MOVL AX, (DI)(BX*4)
    75  	SBBL AX, AX		// save CF
    76  	NEGL AX
    77  	ADDL $1, BX		// i++
    78  
    79  E3:	CMPL BX, BP		// i < n
    80  	JL L3
    81  
    82  	MOVL AX, c+28(FP)
    83  	RET
    84  
    85  
    86  // func subVW(z, x []Word, y Word) (c Word)
    87  TEXT ·subVW(SB),NOSPLIT,$0
    88  	MOVL z+0(FP), DI
    89  	MOVL x+12(FP), SI
    90  	MOVL y+24(FP), AX	// c = y
    91  	MOVL z_len+4(FP), BP
    92  	MOVL $0, BX		// i = 0
    93  	JMP E4
    94  
    95  L4:	MOVL (SI)(BX*4), DX
    96  	SUBL AX, DX
    97  	MOVL DX, (DI)(BX*4)
    98  	SBBL AX, AX		// save CF
    99  	NEGL AX
   100  	ADDL $1, BX		// i++
   101  
   102  E4:	CMPL BX, BP		// i < n
   103  	JL L4
   104  
   105  	MOVL AX, c+28(FP)
   106  	RET
   107  
   108  
   109  // func shlVU(z, x []Word, s uint) (c Word)
   110  TEXT ·shlVU(SB),NOSPLIT,$0
   111  	MOVL z_len+4(FP), BX	// i = z
   112  	SUBL $1, BX		// i--
   113  	JL X8b			// i < 0	(n <= 0)
   114  
   115  	// n > 0
   116  	MOVL z+0(FP), DI
   117  	MOVL x+12(FP), SI
   118  	MOVL s+24(FP), CX
   119  	MOVL (SI)(BX*4), AX	// w1 = x[n-1]
   120  	MOVL $0, DX
   121  	SHLL CX, AX, DX		// w1>>ŝ
   122  	MOVL DX, c+28(FP)
   123  
   124  	CMPL BX, $0
   125  	JLE X8a			// i <= 0
   126  
   127  	// i > 0
   128  L8:	MOVL AX, DX		// w = w1
   129  	MOVL -4(SI)(BX*4), AX	// w1 = x[i-1]
   130  	SHLL CX, AX, DX		// w<<s | w1>>ŝ
   131  	MOVL DX, (DI)(BX*4)	// z[i] = w<<s | w1>>ŝ
   132  	SUBL $1, BX		// i--
   133  	JG L8			// i > 0
   134  
   135  	// i <= 0
   136  X8a:	SHLL CX, AX		// w1<<s
   137  	MOVL AX, (DI)		// z[0] = w1<<s
   138  	RET
   139  
   140  X8b:	MOVL $0, c+28(FP)
   141  	RET
   142  
   143  
   144  // func shrVU(z, x []Word, s uint) (c Word)
   145  TEXT ·shrVU(SB),NOSPLIT,$0
   146  	MOVL z_len+4(FP), BP
   147  	SUBL $1, BP		// n--
   148  	JL X9b			// n < 0	(n <= 0)
   149  
   150  	// n > 0
   151  	MOVL z+0(FP), DI
   152  	MOVL x+12(FP), SI
   153  	MOVL s+24(FP), CX
   154  	MOVL (SI), AX		// w1 = x[0]
   155  	MOVL $0, DX
   156  	SHRL CX, AX, DX		// w1<<ŝ
   157  	MOVL DX, c+28(FP)
   158  
   159  	MOVL $0, BX		// i = 0
   160  	JMP E9
   161  
   162  	// i < n-1
   163  L9:	MOVL AX, DX		// w = w1
   164  	MOVL 4(SI)(BX*4), AX	// w1 = x[i+1]
   165  	SHRL CX, AX, DX		// w>>s | w1<<ŝ
   166  	MOVL DX, (DI)(BX*4)	// z[i] = w>>s | w1<<ŝ
   167  	ADDL $1, BX		// i++
   168  
   169  E9:	CMPL BX, BP
   170  	JL L9			// i < n-1
   171  
   172  	// i >= n-1
   173  X9a:	SHRL CX, AX		// w1>>s
   174  	MOVL AX, (DI)(BP*4)	// z[n-1] = w1>>s
   175  	RET
   176  
   177  X9b:	MOVL $0, c+28(FP)
   178  	RET
   179  
   180  
   181  // func mulAddVWW(z, x []Word, y, r Word) (c Word)
   182  TEXT ·mulAddVWW(SB),NOSPLIT,$0
   183  	MOVL z+0(FP), DI
   184  	MOVL x+12(FP), SI
   185  	MOVL y+24(FP), BP
   186  	MOVL r+28(FP), CX	// c = r
   187  	MOVL z_len+4(FP), BX
   188  	LEAL (DI)(BX*4), DI
   189  	LEAL (SI)(BX*4), SI
   190  	NEGL BX			// i = -n
   191  	JMP E5
   192  
   193  L5:	MOVL (SI)(BX*4), AX
   194  	MULL BP
   195  	ADDL CX, AX
   196  	ADCL $0, DX
   197  	MOVL AX, (DI)(BX*4)
   198  	MOVL DX, CX
   199  	ADDL $1, BX		// i++
   200  
   201  E5:	CMPL BX, $0		// i < 0
   202  	JL L5
   203  
   204  	MOVL CX, c+32(FP)
   205  	RET
   206  
   207  
   208  // func addMulVVW(z, x []Word, y Word) (c Word)
   209  TEXT ·addMulVVW(SB),NOSPLIT,$0
   210  	MOVL z+0(FP), DI
   211  	MOVL x+12(FP), SI
   212  	MOVL y+24(FP), BP
   213  	MOVL z_len+4(FP), BX
   214  	LEAL (DI)(BX*4), DI
   215  	LEAL (SI)(BX*4), SI
   216  	NEGL BX			// i = -n
   217  	MOVL $0, CX		// c = 0
   218  	JMP E6
   219  
   220  L6:	MOVL (SI)(BX*4), AX
   221  	MULL BP
   222  	ADDL CX, AX
   223  	ADCL $0, DX
   224  	ADDL AX, (DI)(BX*4)
   225  	ADCL $0, DX
   226  	MOVL DX, CX
   227  	ADDL $1, BX		// i++
   228  
   229  E6:	CMPL BX, $0		// i < 0
   230  	JL L6
   231  
   232  	MOVL CX, c+28(FP)
   233  	RET
   234  
   235  
   236