github.com/emmansun/gmsm@v0.29.1/cipher/xts_ppc64x.s (about) 1 // Copyright 2024 Sun Yimin. All rights reserved. 2 // Use of this source code is governed by a MIT-style 3 // license that can be found in the LICENSE file. 4 5 //go:build (ppc64 || ppc64le) && !purego 6 7 #include "textflag.h" 8 9 DATA xtsMask<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908 // byte swap BE -> LE 10 DATA xtsMask<>+0x08(SB)/8, $0x0706050403020100 11 DATA xtsMask<>+0x10(SB)/8, $0x0000000000000000 12 DATA xtsMask<>+0x18(SB)/8, $0x0000000000000087 13 DATA xtsMask<>+0x20(SB)/8, $0xe100000000000000 14 DATA xtsMask<>+0x28(SB)/8, $0x0000000000000000 15 GLOBL xtsMask<>(SB), (NOPTR+RODATA), $48 16 17 #define ESPERM V21 // Endian swapping permute into BE 18 19 #define POLY V0 20 #define B0 V1 21 #define T0 V2 22 #define T1 V3 23 #define CPOOL R7 24 25 #define doubleTweak(B0, POLY, T0, T1) \ 26 \ // Multiply by 2 27 VSPLTB $0, B0, T0 \ 28 VSPLTISB $7, T1 \ 29 VSRAB T0, T1, T0 \ 30 VAND POLY, T0, T0 \// T0 for reduction 31 \ 32 VSPLTISB $1, T1 \ 33 VSL B0, T1, T1 \ 34 VXOR T0, T1, B0 35 36 #define gbDoubleTweak(B0, POLY, T0, T1) \ 37 VSPLTB $15, B0, T0 \ 38 VSPLTISB $7, T1 \ 39 VSLB T0, T1, T0 \ 40 VSRAB T0, T1, T0 \ 41 VAND POLY, T0, T0 \ // T0 for reduction 42 VSPLTISB $1, T1 \ 43 VSR B0, T1, B0 \ 44 VXOR T0, B0, B0 45 46 // func mul2(tweak *[blockSize]byte, isGB bool) 47 TEXT ·mul2(SB),NOSPLIT,$0 48 MOVD tweak+0(FP), R3 49 MOVBZ isGB+8(FP), R4 50 51 MOVD $xtsMask<>(SB), CPOOL 52 53 CMPW R4, $1 54 BEQ gb_alg 55 56 // Load polynomial for reduction 57 MOVD $16, R5 58 LXVD2X (CPOOL)(R5), POLY 59 60 // Load tweak 61 LXVD2X (R3), B0 62 #ifdef GOARCH_ppc64le 63 XXPERMDI B0, B0, $2, B0 64 doubleTweak(B0, POLY, T0, T1) 65 XXPERMDI B0, B0, $2, B0 66 #else 67 LXVD2X (CPOOL), ESPERM 68 69 VPERM B0, B0, ESPERM, B0 70 doubleTweak(B0, POLY, T0, T1) 71 VPERM B0, B0, ESPERM, B0 72 #endif 73 STXVD2X B0, (R3) 74 75 RET 76 77 gb_alg: 78 // Load polynomial for reduction 79 MOVD $32, R5 80 LXVD2X (CPOOL)(R5), POLY 81 82 // Load tweak 83 LXVD2X (R3), B0 84 #ifdef GOARCH_ppc64le 85 LVX (CPOOL), ESPERM 86 VPERM B0, B0, ESPERM, B0 87 gbDoubleTweak(B0, POLY, T0, T1) 88 VPERM B0, B0, ESPERM, B0 89 #else 90 gbDoubleTweak(B0, POLY, T0, T1) 91 #endif 92 STXVD2X B0, (R3) 93 RET 94 95 // func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool) 96 TEXT ·doubleTweaks(SB),NOSPLIT,$0 97 MOVD tweak+0(FP), R3 98 MOVD tweaks+8(FP), R4 99 MOVD tweaks_len+16(FP), R5 100 MOVBZ isGB+32(FP), R6 101 102 MOVD $xtsMask<>(SB), CPOOL 103 104 // Load tweak 105 LXVD2X (R3), B0 106 107 CMPW R6, $1 108 BEQ gb_alg 109 110 SRD $4, R5 111 MOVD R5, CTR 112 113 #ifndef GOARCH_ppc64le 114 LXVD2X (CPOOL), ESPERM 115 #endif 116 // Load polynomial for reduction 117 MOVD $16, R5 118 LXVD2X (CPOOL)(R5), POLY 119 120 loop: 121 STXVD2X B0, (R4) 122 ADD $16, R4 123 124 #ifdef GOARCH_ppc64le 125 XXPERMDI B0, B0, $2, B0 126 doubleTweak(B0, POLY, T0, T1) 127 XXPERMDI B0, B0, $2, B0 128 #else 129 VPERM B0, B0, ESPERM, B0 130 doubleTweak(B0, POLY, T0, T1) 131 VPERM B0, B0, ESPERM, B0 132 #endif 133 134 BDNZ loop 135 136 STXVD2X B0, (R3) 137 RET 138 139 gb_alg: 140 SRD $4, R5 141 MOVD R5, CTR 142 143 // Load polynomial for reduction 144 MOVD $32, R5 145 LXVD2X (CPOOL)(R5), POLY 146 147 #ifdef GOARCH_ppc64le 148 LVX (CPOOL), ESPERM 149 #endif 150 151 gbLoop: 152 STXVD2X B0, (R4) 153 ADD $16, R4 154 155 #ifdef GOARCH_ppc64le 156 VPERM B0, B0, ESPERM, B0 157 gbDoubleTweak(B0, POLY, T0, T1) 158 VPERM B0, B0, ESPERM, B0 159 #else 160 gbDoubleTweak(B0, POLY, T0, T1) 161 #endif 162 163 BDNZ gbLoop 164 165 STXVD2X B0, (R3) 166 RET