github.com/emmansun/gmsm@v0.29.1/cipher/xts_s390x.s (about) 1 // Copyright 2024 Sun Yimin. All rights reserved. 2 // Use of this source code is governed by a MIT-style 3 // license that can be found in the LICENSE file. 4 5 //go:build !purego 6 7 #include "textflag.h" 8 9 DATA xtsMask<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908 // byte swap BE -> LE 10 DATA xtsMask<>+0x08(SB)/8, $0x0706050403020100 11 GLOBL xtsMask<>(SB), (NOPTR+RODATA), $16 12 13 #define BSWAP V0 14 #define POLY V1 15 #define B0 V2 16 #define T0 V3 17 #define T1 V4 18 #define CPOOL R3 19 20 #define doubleTweak(B0, BSWAP, POLY, T0, T1) \ 21 VPERM B0, B0, BSWAP, B0 \// BE -> LE 22 \ // Multiply by 2 23 VESRAF $31, B0, T0 \ 24 VREPF $0, T0, T0 \ 25 VN POLY, T0, T0 \ // T0 for reduction 26 VREPIB $1, T1 \ 27 VSL T1, B0, T1 \ 28 VX T1, T0, B0 \ 29 \ 30 VPERM B0, B0, BSWAP, B0 31 32 #define gbDoubleTweak(B0, POLY, T0, T1) \ 33 VESLF $31, B0, T0 \ 34 VESRAF $31, T0, T0 \ 35 VREPF $3, T0, T0 \ 36 VN POLY, T0, T0 \ // T0 for reduction 37 \ 38 VREPIB $1, T1 \ 39 VSRL T1, B0, T1 \ 40 VX T1, T0, B0 41 42 // func mul2(tweak *[blockSize]byte, isGB bool) 43 TEXT ·mul2(SB),NOSPLIT,$0 44 MOVD tweak+0(FP), R1 45 MOVB isGB+8(FP), R2 46 47 CMPBEQ R2, $1, gb_alg 48 49 MOVD $xtsMask<>+0x00(SB), CPOOL 50 VL (CPOOL), BSWAP 51 52 // Load polynomial for reduction 53 VZERO POLY 54 VLEIB $15, $0x87, POLY 55 56 // Load tweak 57 VL 0(R1), B0 58 doubleTweak(B0, BSWAP, POLY, T0, T1) 59 VST B0, 0(R1) 60 61 RET 62 63 gb_alg: 64 // Load polynomial for reduction 65 VZERO POLY 66 VLEIB $0, $0xe1, POLY 67 68 // Load tweak 69 VL 0(R1), B0 70 gbDoubleTweak(B0, POLY, T0, T1) 71 VST B0, 0(R1) 72 73 RET 74 75 // func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool) 76 TEXT ·doubleTweaks(SB),NOSPLIT,$0 77 MOVD tweak+0(FP), R1 78 MOVD tweaks+8(FP), R2 79 MOVD tweaks_len+16(FP), R3 80 MOVB isGB+32(FP), R4 81 82 AND $-16, R3 83 LAY (R2)(R3*1), R5 84 85 VL 0(R1), B0 86 87 CMPBEQ R4, $1, gb_alg 88 89 MOVD $xtsMask<>+0x00(SB), CPOOL 90 VL (CPOOL), BSWAP 91 92 // Load polynomial for reduction 93 VZERO POLY 94 VLEIB $15, $0x87, POLY 95 96 loop: 97 VST B0, 0(R2) 98 99 doubleTweak(B0, BSWAP, POLY, T0, T1) 100 101 LA 16(R2), R2 102 CMPBLT R2, R5, loop 103 104 VST B0, 0(R1) 105 RET 106 107 gb_alg: 108 // Load polynomial for reduction 109 VZERO POLY 110 VLEIB $0, $0xe1, POLY 111 112 gb_alg_loop: 113 VST B0, 0(R2) 114 115 gbDoubleTweak(B0, POLY, T0, T1) 116 117 LA 16(R2), R2 118 CMPBLT R2, R5, gb_alg_loop 119 120 VST B0, 0(R1) 121 RET