github.com/emmansun/gmsm@v0.29.1/internal/bigmod/nat_riscv64.s (about) 1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build !purego 6 7 #include "textflag.h" 8 9 // func addMulVVW256(z, x *uint, y uint) (c uint) 10 TEXT ·addMulVVW256(SB),$0-32 11 MOV $4, X30 12 JMP addMulVVWy(SB) 13 14 // func addMulVVW1024(z, x *uint, y uint) (c uint) 15 TEXT ·addMulVVW1024(SB),$0-32 16 MOV $16, X30 17 JMP addMulVVWy(SB) 18 19 // func addMulVVW1536(z, x *uint, y uint) (c uint) 20 TEXT ·addMulVVW1536(SB),$0-32 21 MOV $24, X30 22 JMP addMulVVWy(SB) 23 24 // func addMulVVW2048(z, x *uint, y uint) (c uint) 25 TEXT ·addMulVVW2048(SB),$0-32 26 MOV $32, X30 27 JMP addMulVVWy(SB) 28 29 TEXT addMulVVWy(SB),NOFRAME|NOSPLIT,$0 30 MOV z+0(FP), X5 31 MOV x+8(FP), X7 32 MOV y+16(FP), X6 33 MOV $0, X29 34 35 BEQZ X30, done 36 loop: 37 MOV 0*8(X5), X10 // z[0] 38 MOV 1*8(X5), X13 // z[1] 39 MOV 2*8(X5), X16 // z[2] 40 MOV 3*8(X5), X19 // z[3] 41 42 MOV 0*8(X7), X8 // x[0] 43 MOV 1*8(X7), X11 // x[1] 44 MOV 2*8(X7), X14 // x[2] 45 MOV 3*8(X7), X17 // x[3] 46 47 MULHU X8, X6, X9 // z_hi[0] = x[0] * y 48 MUL X8, X6, X8 // z_lo[0] = x[0] * y 49 ADD X8, X10, X21 // z_lo[0] = x[0] * y + z[0] 50 SLTU X8, X21, X22 51 ADD X9, X22, X9 // z_hi[0] = x[0] * y + z[0] 52 ADD X21, X29, X10 // z_lo[0] = x[0] * y + z[0] + c 53 SLTU X21, X10, X22 54 ADD X9, X22, X29 // next c 55 56 MULHU X11, X6, X12 // z_hi[1] = x[1] * y 57 MUL X11, X6, X11 // z_lo[1] = x[1] * y 58 ADD X11, X13, X21 // z_lo[1] = x[1] * y + z[1] 59 SLTU X11, X21, X22 60 ADD X12, X22, X12 // z_hi[1] = x[1] * y + z[1] 61 ADD X21, X29, X13 // z_lo[1] = x[1] * y + z[1] + c 62 SLTU X21, X13, X22 63 ADD X12, X22, X29 // next c 64 65 MULHU X14, X6, X15 // z_hi[2] = x[2] * y 66 MUL X14, X6, X14 // z_lo[2] = x[2] * y 67 ADD X14, X16, X21 // z_lo[2] = x[2] * y + z[2] 68 SLTU X14, X21, X22 69 ADD X15, X22, X15 // z_hi[2] = x[2] * y + z[2] 70 ADD X21, X29, X16 // z_lo[2] = x[2] * y + z[2] + c 71 SLTU X21, X16, X22 72 ADD X15, X22, X29 // next c 73 74 MULHU X17, X6, X18 // z_hi[3] = x[3] * y 75 MUL X17, X6, X17 // z_lo[3] = x[3] * y 76 ADD X17, X19, X21 // z_lo[3] = x[3] * y + z[3] 77 SLTU X17, X21, X22 78 ADD X18, X22, X18 // z_hi[3] = x[3] * y + z[3] 79 ADD X21, X29, X19 // z_lo[3] = x[3] * y + z[3] + c 80 SLTU X21, X19, X22 81 ADD X18, X22, X29 // next c 82 83 MOV X10, 0*8(X5) // z[0] 84 MOV X13, 1*8(X5) // z[1] 85 MOV X16, 2*8(X5) // z[2] 86 MOV X19, 3*8(X5) // z[3] 87 88 ADD $32, X5 89 ADD $32, X7 90 91 SUB $4, X30 92 BNEZ X30, loop 93 94 done: 95 MOV X29, c+24(FP) 96 RET