github.com/emmansun/gmsm@v0.29.1/zuc/eia256_asm_ppc64x.s (about) 1 // Copyright 2024 Sun Yimin. All rights reserved. 2 // Use of this source code is governed by a MIT-style 3 // license that can be found in the LICENSE file. 4 5 //go:build (ppc64 || ppc64le) && !purego 6 7 #include "textflag.h" 8 9 #define XTMP1 V0 10 #define XTMP2 V1 11 #define XTMP3 V2 12 #define XTMP4 V3 13 #define XTMP5 V4 14 #define XTMP6 V5 15 #define XDATA V6 16 #define XDIGEST V7 17 #define KS_L V8 18 #define KS_M1 V9 19 #define KS_M2 V10 20 #define KS_H V11 21 #define BIT_REV_TAB_L V12 22 #define BIT_REV_TAB_H V13 23 #define BIT_REV_AND_TAB V14 24 #define ZERO V15 25 #define PTR R7 26 27 // func eia256RoundTag8(t *uint32, keyStream *uint32, p *byte) 28 TEXT ·eia256RoundTag8(SB),NOSPLIT,$0 29 MOVD t+0(FP), R3 30 MOVD ks+8(FP), R4 31 MOVD p+16(FP), R5 32 33 #ifndef GOARCH_ppc64le 34 MOVD $·rcon(SB), PTR // PTR points to rcon addr 35 LVX (PTR), XTMP1 36 ADD $0x10, PTR 37 #else 38 MOVD $·rcon+0x10(SB), PTR // PTR points to rcon addr (skipping permute vector) 39 #endif 40 41 LXVD2X (R5)(R0), XDATA 42 #ifndef GOARCH_ppc64le 43 VPERM XDATA, XDATA, XTMP1, XDATA 44 #endif 45 46 LXVD2X (PTR)(R0), BIT_REV_AND_TAB 47 VAND BIT_REV_AND_TAB, XDATA, XTMP3 48 VSPLTISB $4, XTMP2; 49 VSRW XDATA, XTMP2, XTMP1 50 VAND BIT_REV_AND_TAB, XTMP1, XTMP1 51 52 MOVD $0x10, R8 53 LXVD2X (PTR)(R8), BIT_REV_TAB_L 54 VSLB BIT_REV_TAB_L, XTMP2, BIT_REV_TAB_H 55 VPERM BIT_REV_TAB_L, BIT_REV_TAB_L, XTMP1, XTMP1 56 VPERM BIT_REV_TAB_H, BIT_REV_TAB_H, XTMP3, XTMP3 57 VXOR XTMP1, XTMP3, XTMP3 // XTMP3 - bit reverse data bytes 58 59 // ZUC authentication part, 4x32 data bits 60 // setup data 61 VSPLTISB $0, ZERO 62 MOVD $0x20, R8 63 LXVD2X (PTR)(R8), XTMP4 64 VPERM ZERO, XTMP3, XTMP4, XTMP1 65 MOVD $0x30, R8 66 LXVD2X (PTR)(R8), XTMP4 67 VPERM ZERO, XTMP3, XTMP4, XTMP2 68 69 // setup KS 70 LXVW4X (R4), KS_L 71 MOVD $8, R8 72 LXVW4X (R8)(R4), KS_M1 73 MOVD $16, R8 74 LXVW4X (R8)(R4), KS_M2 75 MOVD $0x40, R8 76 LXVD2X (PTR)(R8), XTMP4 77 VPERM KS_L, KS_L, XTMP4, KS_L 78 VPERM KS_M1, KS_M1, XTMP4, KS_M1 79 VPERM KS_M2, KS_M2, XTMP4, KS_M2 80 81 // clmul 82 // xor the results from 4 32-bit words together 83 // Calculate lower 32 bits of tag 84 VPMSUMD XTMP1, KS_L, XTMP3 85 VPMSUMD XTMP2, KS_M1, XTMP4 86 VXOR XTMP3, XTMP4, XTMP3 87 VSPLTW $2, XTMP3, XDIGEST 88 89 // Calculate upper 32 bits of tag 90 VSLDOI $8, KS_M1, KS_L, KS_L 91 VPMSUMD XTMP1, KS_L, XTMP3 92 VSLDOI $8, KS_M2, KS_M1, KS_M1 93 VPMSUMD XTMP2, KS_M1, XTMP4 94 VXOR XTMP3, XTMP4, XTMP3 95 VSPLTW $2, XTMP3, XTMP3 96 97 // Update tag 98 #ifdef GOARCH_ppc64le 99 VSLDOI $12, XTMP3, XDIGEST, XDIGEST 100 #else 101 VSLDOI $12, XDIGEST, XTMP3, XDIGEST 102 #endif 103 MFVSRD XDIGEST, R8 104 MOVD (R3), R6 105 XOR R6, R8, R6 106 MOVD R6, (R3) 107 108 // Copy last 16 bytes of KS to the front 109 MOVD $16, R8 110 LXVD2X (R8)(R4), XTMP1 111 STXVD2X XTMP1, (R4)(R0) 112 113 RET 114 115 // func eia256RoundTag16(t *uint32, keyStream *uint32, p *byte) 116 TEXT ·eia256RoundTag16(SB),NOSPLIT,$0 117 MOVD t+0(FP), R3 118 MOVD ks+8(FP), R4 119 MOVD p+16(FP), R5 120 121 #ifndef GOARCH_ppc64le 122 MOVD $·rcon(SB), PTR // PTR points to rcon addr 123 LVX (PTR), XTMP1 124 ADD $0x10, PTR 125 #else 126 MOVD $·rcon+0x10(SB), PTR // PTR points to rcon addr (skipping permute vector) 127 #endif 128 129 LXVD2X (R5)(R0), XDATA 130 #ifndef GOARCH_ppc64le 131 VPERM XDATA, XDATA, XTMP1, XDATA 132 #endif 133 134 LXVD2X (PTR)(R0), BIT_REV_AND_TAB 135 VAND BIT_REV_AND_TAB, XDATA, XTMP3 136 VSPLTISB $4, XTMP2; 137 VSRW XDATA, XTMP2, XTMP1 138 VAND BIT_REV_AND_TAB, XTMP1, XTMP1 139 140 MOVD $0x10, R8 141 LXVD2X (PTR)(R8), BIT_REV_TAB_L 142 VSLB BIT_REV_TAB_L, XTMP2, BIT_REV_TAB_H 143 VPERM BIT_REV_TAB_L, BIT_REV_TAB_L, XTMP1, XTMP1 144 VPERM BIT_REV_TAB_H, BIT_REV_TAB_H, XTMP3, XTMP3 145 VXOR XTMP1, XTMP3, XTMP3 // XTMP3 - bit reverse data bytes 146 147 // ZUC authentication part, 4x32 data bits 148 // setup data 149 VSPLTISB $0, ZERO 150 MOVD $0x20, R8 151 LXVD2X (PTR)(R8), XTMP4 152 VPERM ZERO, XTMP3, XTMP4, XTMP1 153 MOVD $0x30, R8 154 LXVD2X (PTR)(R8), XTMP4 155 VPERM ZERO, XTMP3, XTMP4, XTMP2 156 157 // setup KS 158 LXVW4X (R4), KS_L 159 MOVD $8, R8 160 LXVW4X (R8)(R4), KS_M1 161 MOVD $16, R8 162 LXVW4X (R8)(R4), KS_M2 163 VOR KS_M2, KS_M2, KS_H 164 MOVD $0x40, R8 165 LXVD2X (PTR)(R8), XTMP4 166 VPERM KS_L, KS_L, XTMP4, KS_L 167 VPERM KS_M1, KS_M1, XTMP4, KS_M1 168 VPERM KS_M2, KS_M2, XTMP4, KS_M2 169 170 // clmul 171 // xor the results from 4 32-bit words together 172 // Calculate lower 32 bits of tag 173 VPMSUMD XTMP1, KS_L, XTMP3 174 VPMSUMD XTMP2, KS_M1, XTMP4 175 VXOR XTMP3, XTMP4, XTMP3 176 VSLDOI $12, XTMP3, XTMP3, XDIGEST 177 178 // Calculate upper 32 bits of tag 179 VSLDOI $8, KS_M1, KS_L, KS_L 180 VPMSUMD XTMP1, KS_L, XTMP3 181 VSLDOI $8, KS_M2, KS_M1, XTMP5 182 VPMSUMD XTMP2, XTMP5, XTMP4 183 VXOR XTMP3, XTMP4, XTMP3 184 VSLDOI $8, XTMP3, XTMP3, XTMP3 185 VSLDOI $4, XDIGEST, XTMP3, XDIGEST 186 187 // calculate bits 95-64 of tag 188 VPMSUMD XTMP1, KS_M1, XTMP3 189 VPMSUMD XTMP2, KS_M2, XTMP4 190 VXOR XTMP3, XTMP4, XTMP3 191 VSLDOI $8, XTMP3, XTMP3, XTMP3 192 VSLDOI $4, XDIGEST, XTMP3, XDIGEST 193 194 // calculate bits 127-96 of tag 195 VSLDOI $8, KS_M2, KS_M1, KS_M1 196 VPMSUMD XTMP1, KS_M1, XTMP3 197 VSLDOI $8, KS_H, KS_M2, KS_M2 198 VPMSUMD XTMP2, KS_M2, XTMP4 199 VXOR XTMP3, XTMP4, XTMP3 200 VSLDOI $8, XTMP3, XTMP3, XTMP3 201 VSLDOI $4, XDIGEST, XTMP3, XDIGEST 202 203 // Update tag 204 LXVW4X (R3)(R0), XTMP1 205 VXOR XTMP1, XDIGEST, XDIGEST 206 STXVW4X XDIGEST, (R3) 207 208 // Copy last 16 bytes of KS to the front 209 MOVD $16, R8 210 LXVD2X (R8)(R4), XTMP1 211 STXVD2X XTMP1, (R4)(R0) 212 213 RET