github.com/emmansun/gmsm@v0.29.1/internal/bigmod/nat_ppc64x.s (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build !purego && (ppc64 || ppc64le) 6 7 #include "textflag.h" 8 9 // func addMulVVW256(z, x *uint, y uint) (c uint) 10 TEXT ·addMulVVW256(SB), $0-32 11 MOVD $1, R6 // R6 = z_len/4 12 JMP addMulVVWy<>(SB) 13 14 // func addMulVVW1024(z, x *uint, y uint) (c uint) 15 TEXT ·addMulVVW1024(SB), $0-32 16 MOVD $4, R6 // R6 = z_len/4 17 JMP addMulVVWy<>(SB) 18 19 // func addMulVVW1536(z, x *uint, y uint) (c uint) 20 TEXT ·addMulVVW1536(SB), $0-32 21 MOVD $6, R6 // R6 = z_len/4 22 JMP addMulVVWy<>(SB) 23 24 // func addMulVVW2048(z, x *uint, y uint) (c uint) 25 TEXT ·addMulVVW2048(SB), $0-32 26 MOVD $8, R6 // R6 = z_len/4 27 JMP addMulVVWy<>(SB) 28 29 // This local function expects to be called only by 30 // callers above. R6 contains the z length/4 31 // since 4 values are processed for each 32 // loop iteration, and is guaranteed to be > 0. 33 // If other callers are added this function might 34 // need to change. 35 TEXT addMulVVWy<>(SB), NOSPLIT, $0 36 MOVD z+0(FP), R3 37 MOVD x+8(FP), R4 38 MOVD y+16(FP), R5 39 40 MOVD $0, R9 // R9 = c = 0 41 MOVD R6, CTR // Initialize loop counter 42 PCALIGN $16 43 44 loop: 45 MOVD 0(R4), R14 // x[i] 46 MOVD 8(R4), R16 // x[i+1] 47 MOVD 16(R4), R18 // x[i+2] 48 MOVD 24(R4), R20 // x[i+3] 49 MOVD 0(R3), R15 // z[i] 50 MOVD 8(R3), R17 // z[i+1] 51 MOVD 16(R3), R19 // z[i+2] 52 MOVD 24(R3), R21 // z[i+3] 53 MULLD R5, R14, R10 // low x[i]*y 54 MULHDU R5, R14, R11 // high x[i]*y 55 ADDC R15, R10 56 ADDZE R11 57 ADDC R9, R10 58 ADDZE R11, R9 59 MULLD R5, R16, R14 // low x[i+1]*y 60 MULHDU R5, R16, R15 // high x[i+1]*y 61 ADDC R17, R14 62 ADDZE R15 63 ADDC R9, R14 64 ADDZE R15, R9 65 MULLD R5, R18, R16 // low x[i+2]*y 66 MULHDU R5, R18, R17 // high x[i+2]*y 67 ADDC R19, R16 68 ADDZE R17 69 ADDC R9, R16 70 ADDZE R17, R9 71 MULLD R5, R20, R18 // low x[i+3]*y 72 MULHDU R5, R20, R19 // high x[i+3]*y 73 ADDC R21, R18 74 ADDZE R19 75 ADDC R9, R18 76 ADDZE R19, R9 77 MOVD R10, 0(R3) // z[i] 78 MOVD R14, 8(R3) // z[i+1] 79 MOVD R16, 16(R3) // z[i+2] 80 MOVD R18, 24(R3) // z[i+3] 81 ADD $32, R3 82 ADD $32, R4 83 BDNZ loop 84 85 done: 86 MOVD R9, c+24(FP) 87 RET