github.com/emmansun/gmsm@v0.29.1/internal/bigmod/nat_riscv64.s (about)

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  #include "textflag.h"
     8  
     9  // func addMulVVW256(z, x *uint, y uint) (c uint)
    10  TEXT ·addMulVVW256(SB),$0-32
    11  	MOV	$4, X30
    12  	JMP	addMulVVWy(SB)
    13  
    14  // func addMulVVW1024(z, x *uint, y uint) (c uint)
    15  TEXT ·addMulVVW1024(SB),$0-32
    16  	MOV	$16, X30
    17  	JMP	addMulVVWy(SB)
    18  
    19  // func addMulVVW1536(z, x *uint, y uint) (c uint)
    20  TEXT ·addMulVVW1536(SB),$0-32
    21  	MOV	$24, X30
    22  	JMP	addMulVVWy(SB)
    23  
    24  // func addMulVVW2048(z, x *uint, y uint) (c uint)
    25  TEXT ·addMulVVW2048(SB),$0-32
    26  	MOV	$32, X30
    27  	JMP	addMulVVWy(SB)
    28  
    29  TEXT addMulVVWy(SB),NOFRAME|NOSPLIT,$0
    30  	MOV	z+0(FP), X5
    31  	MOV	x+8(FP), X7
    32  	MOV	y+16(FP), X6
    33  	MOV	$0, X29
    34  
    35  	BEQZ	X30, done
    36  loop:
    37  	MOV	0*8(X5), X10	// z[0]
    38  	MOV	1*8(X5), X13	// z[1]
    39  	MOV	2*8(X5), X16	// z[2]
    40  	MOV	3*8(X5), X19	// z[3]
    41  
    42  	MOV	0*8(X7), X8	// x[0]
    43  	MOV	1*8(X7), X11	// x[1]
    44  	MOV	2*8(X7), X14	// x[2]
    45  	MOV	3*8(X7), X17	// x[3]
    46  
    47  	MULHU	X8, X6, X9	// z_hi[0] = x[0] * y
    48  	MUL	X8, X6, X8	// z_lo[0] = x[0] * y
    49  	ADD	X8, X10, X21	// z_lo[0] = x[0] * y + z[0]
    50  	SLTU	X8, X21, X22
    51  	ADD	X9, X22, X9	// z_hi[0] = x[0] * y + z[0]
    52  	ADD	X21, X29, X10	// z_lo[0] = x[0] * y + z[0] + c
    53  	SLTU	X21, X10, X22
    54  	ADD	X9, X22, X29	// next c
    55  
    56  	MULHU	X11, X6, X12	// z_hi[1] = x[1] * y
    57  	MUL	X11, X6, X11	// z_lo[1] = x[1] * y
    58  	ADD	X11, X13, X21	// z_lo[1] = x[1] * y + z[1]
    59  	SLTU	X11, X21, X22
    60  	ADD	X12, X22, X12	// z_hi[1] = x[1] * y + z[1]
    61  	ADD	X21, X29, X13	// z_lo[1] = x[1] * y + z[1] + c
    62  	SLTU	X21, X13, X22
    63  	ADD	X12, X22, X29	// next c
    64  
    65  	MULHU	X14, X6, X15	// z_hi[2] = x[2] * y
    66  	MUL	X14, X6, X14	// z_lo[2] = x[2] * y
    67  	ADD	X14, X16, X21	// z_lo[2] = x[2] * y + z[2]
    68  	SLTU	X14, X21, X22
    69  	ADD	X15, X22, X15	// z_hi[2] = x[2] * y + z[2]
    70  	ADD	X21, X29, X16	// z_lo[2] = x[2] * y + z[2] + c
    71  	SLTU	X21, X16, X22
    72  	ADD	X15, X22, X29	// next c
    73  
    74  	MULHU	X17, X6, X18	// z_hi[3] = x[3] * y
    75  	MUL	X17, X6, X17	// z_lo[3] = x[3] * y
    76  	ADD	X17, X19, X21	// z_lo[3] = x[3] * y + z[3]
    77  	SLTU	X17, X21, X22
    78  	ADD	X18, X22, X18	// z_hi[3] = x[3] * y + z[3]
    79  	ADD	X21, X29, X19	// z_lo[3] = x[3] * y + z[3] + c
    80  	SLTU	X21, X19, X22
    81  	ADD	X18, X22, X29	// next c
    82  
    83  	MOV	X10, 0*8(X5)	// z[0]
    84  	MOV	X13, 1*8(X5)	// z[1]
    85  	MOV	X16, 2*8(X5)	// z[2]
    86  	MOV	X19, 3*8(X5)	// z[3]
    87  
    88  	ADD	$32, X5
    89  	ADD	$32, X7
    90  
    91  	SUB	$4, X30
    92  	BNEZ	X30, loop
    93  
    94  done:
    95  	MOV	X29, c+24(FP)
    96  	RET