github.com/emmansun/gmsm@v0.29.1/cipher/xts_ppc64x.s (about)

     1  // Copyright 2024 Sun Yimin. All rights reserved.
     2  // Use of this source code is governed by a MIT-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build (ppc64 || ppc64le) && !purego
     6  
     7  #include "textflag.h"
     8  
     9  DATA xtsMask<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908 // byte swap BE -> LE
    10  DATA xtsMask<>+0x08(SB)/8, $0x0706050403020100
    11  DATA xtsMask<>+0x10(SB)/8, $0x0000000000000000
    12  DATA xtsMask<>+0x18(SB)/8, $0x0000000000000087
    13  DATA xtsMask<>+0x20(SB)/8, $0xe100000000000000
    14  DATA xtsMask<>+0x28(SB)/8, $0x0000000000000000
    15  GLOBL xtsMask<>(SB), (NOPTR+RODATA), $48
    16  
    17  #define ESPERM  V21  // Endian swapping permute into BE
    18  
    19  #define POLY V0
    20  #define B0 V1
    21  #define T0 V2
    22  #define T1 V3
    23  #define CPOOL R7
    24  
    25  #define doubleTweak(B0, POLY, T0, T1) \
    26  	\ // Multiply by 2
    27  	VSPLTB $0, B0, T0    \
    28  	VSPLTISB $7, T1      \
    29  	VSRAB    T0, T1, T0  \
    30  	VAND    POLY, T0, T0 \// T0 for reduction
    31  	\
    32  	VSPLTISB $1, T1      \
    33  	VSL B0, T1, T1       \
    34  	VXOR T0, T1, B0
    35  
    36  #define gbDoubleTweak(B0, POLY, T0, T1) \
    37  	VSPLTB $15, B0, T0   \
    38  	VSPLTISB $7, T1      \
    39  	VSLB T0, T1, T0      \
    40  	VSRAB T0, T1, T0     \
    41  	VAND POLY, T0, T0    \ // T0 for reduction
    42  	VSPLTISB $1, T1      \
    43  	VSR B0, T1, B0       \
    44  	VXOR T0, B0, B0
    45  
    46  // func mul2(tweak *[blockSize]byte, isGB bool)
    47  TEXT ·mul2(SB),NOSPLIT,$0
    48  	MOVD tweak+0(FP), R3
    49  	MOVBZ isGB+8(FP), R4
    50  
    51  	MOVD $xtsMask<>(SB), CPOOL
    52  
    53  	CMPW R4, $1
    54  	BEQ gb_alg
    55  	
    56  	// Load polynomial for reduction
    57  	MOVD $16, R5
    58  	LXVD2X (CPOOL)(R5), POLY
    59  
    60  	// Load tweak
    61  	LXVD2X (R3), B0
    62  #ifdef GOARCH_ppc64le
    63  	XXPERMDI B0, B0, $2, B0
    64  	doubleTweak(B0, POLY, T0, T1)
    65  	XXPERMDI B0, B0, $2, B0
    66  #else
    67  	LXVD2X (CPOOL), ESPERM
    68  	
    69  	VPERM B0, B0, ESPERM, B0
    70  	doubleTweak(B0, POLY, T0, T1)
    71  	VPERM B0, B0, ESPERM, B0
    72  #endif
    73  	STXVD2X B0, (R3)
    74  
    75  	RET
    76  
    77  gb_alg:	
    78  	// Load polynomial for reduction
    79  	MOVD $32, R5
    80  	LXVD2X (CPOOL)(R5), POLY
    81  
    82  	// Load tweak
    83  	LXVD2X (R3), B0
    84  #ifdef GOARCH_ppc64le
    85  	LVX (CPOOL), ESPERM
    86  	VPERM B0, B0, ESPERM, B0
    87  	gbDoubleTweak(B0, POLY, T0, T1)
    88  	VPERM B0, B0, ESPERM, B0
    89  #else
    90  	gbDoubleTweak(B0, POLY, T0, T1)
    91  #endif
    92  	STXVD2X B0, (R3)
    93  	RET
    94  
    95  // func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool)
    96  TEXT ·doubleTweaks(SB),NOSPLIT,$0
    97  	MOVD tweak+0(FP), R3
    98  	MOVD tweaks+8(FP), R4
    99  	MOVD tweaks_len+16(FP), R5
   100  	MOVBZ isGB+32(FP), R6
   101  
   102  	MOVD $xtsMask<>(SB), CPOOL
   103  
   104  	// Load tweak
   105  	LXVD2X (R3), B0
   106  
   107  	CMPW R6, $1
   108  	BEQ gb_alg
   109  
   110  	SRD	$4, R5
   111  	MOVD R5, CTR
   112  
   113  #ifndef GOARCH_ppc64le
   114  	LXVD2X (CPOOL), ESPERM
   115  #endif	
   116  	// Load polynomial for reduction
   117  	MOVD $16, R5
   118  	LXVD2X (CPOOL)(R5), POLY
   119  
   120  loop:
   121  		STXVD2X B0, (R4)
   122  		ADD $16, R4
   123  
   124  #ifdef GOARCH_ppc64le
   125  		XXPERMDI B0, B0, $2, B0
   126  		doubleTweak(B0, POLY, T0, T1)
   127  		XXPERMDI B0, B0, $2, B0
   128  #else
   129  		VPERM B0, B0, ESPERM, B0
   130  		doubleTweak(B0, POLY, T0, T1)
   131  		VPERM B0, B0, ESPERM, B0
   132  #endif
   133  
   134  		BDNZ	loop
   135  
   136  	STXVD2X B0, (R3)
   137  	RET
   138  
   139  gb_alg:	
   140  	SRD	$4, R5
   141  	MOVD R5, CTR
   142  
   143  	// Load polynomial for reduction
   144  	MOVD $32, R5
   145  	LXVD2X (CPOOL)(R5), POLY
   146  
   147  #ifdef GOARCH_ppc64le
   148  	LVX (CPOOL), ESPERM
   149  #endif
   150  
   151  gbLoop:
   152  		STXVD2X B0, (R4)
   153  		ADD $16, R4
   154  
   155  #ifdef GOARCH_ppc64le
   156  		VPERM B0, B0, ESPERM, B0
   157  		gbDoubleTweak(B0, POLY, T0, T1)
   158  		VPERM B0, B0, ESPERM, B0
   159  #else
   160  		gbDoubleTweak(B0, POLY, T0, T1)
   161  #endif
   162  
   163  		BDNZ	gbLoop
   164  
   165  	STXVD2X B0, (R3)
   166  	RET