github.com/emmansun/gmsm@v0.29.1/zuc/eia256_asm_ppc64x.s (about)

     1  // Copyright 2024 Sun Yimin. All rights reserved.
     2  // Use of this source code is governed by a MIT-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build (ppc64 || ppc64le) && !purego
     6  
     7  #include "textflag.h"
     8  
     9  #define XTMP1 V0
    10  #define XTMP2 V1
    11  #define XTMP3 V2
    12  #define XTMP4 V3
    13  #define XTMP5 V4
    14  #define XTMP6 V5
    15  #define XDATA V6
    16  #define XDIGEST V7
    17  #define KS_L V8
    18  #define KS_M1 V9
    19  #define KS_M2 V10
    20  #define KS_H V11
    21  #define BIT_REV_TAB_L V12
    22  #define BIT_REV_TAB_H V13
    23  #define BIT_REV_AND_TAB V14
    24  #define ZERO V15
    25  #define PTR R7
    26  
    27  // func eia256RoundTag8(t *uint32, keyStream *uint32, p *byte)
    28  TEXT ·eia256RoundTag8(SB),NOSPLIT,$0
    29  	MOVD t+0(FP), R3
    30  	MOVD ks+8(FP), R4
    31  	MOVD p+16(FP), R5
    32  
    33  #ifndef GOARCH_ppc64le
    34  	MOVD	$·rcon(SB), PTR // PTR points to rcon addr
    35  	LVX	(PTR), XTMP1
    36  	ADD	$0x10, PTR
    37  #else
    38  	MOVD	$·rcon+0x10(SB), PTR // PTR points to rcon addr (skipping permute vector)
    39  #endif
    40  
    41  	LXVD2X (R5)(R0), XDATA
    42  #ifndef GOARCH_ppc64le
    43  	VPERM XDATA, XDATA, XTMP1, XDATA
    44  #endif
    45  
    46  	LXVD2X (PTR)(R0), BIT_REV_AND_TAB
    47  	VAND	BIT_REV_AND_TAB, XDATA, XTMP3
    48  	VSPLTISB $4, XTMP2;
    49  	VSRW	XDATA, XTMP2, XTMP1
    50  	VAND	BIT_REV_AND_TAB, XTMP1, XTMP1
    51  
    52  	MOVD	$0x10, R8
    53  	LXVD2X (PTR)(R8), BIT_REV_TAB_L
    54  	VSLB  BIT_REV_TAB_L, XTMP2, BIT_REV_TAB_H
    55  	VPERM BIT_REV_TAB_L, BIT_REV_TAB_L, XTMP1, XTMP1
    56  	VPERM BIT_REV_TAB_H, BIT_REV_TAB_H, XTMP3, XTMP3
    57  	VXOR XTMP1, XTMP3, XTMP3 // XTMP3 - bit reverse data bytes
    58  
    59  	// ZUC authentication part, 4x32 data bits
    60  	// setup data
    61  	VSPLTISB $0, ZERO
    62  	MOVD $0x20, R8
    63  	LXVD2X (PTR)(R8), XTMP4
    64  	VPERM ZERO, XTMP3, XTMP4, XTMP1
    65  	MOVD $0x30, R8
    66  	LXVD2X (PTR)(R8), XTMP4
    67  	VPERM ZERO, XTMP3, XTMP4, XTMP2
    68  
    69  	// setup KS
    70  	LXVW4X (R4), KS_L
    71  	MOVD $8, R8
    72  	LXVW4X (R8)(R4), KS_M1
    73  	MOVD $16, R8
    74  	LXVW4X (R8)(R4), KS_M2
    75  	MOVD $0x40, R8
    76  	LXVD2X (PTR)(R8), XTMP4
    77  	VPERM KS_L, KS_L, XTMP4, KS_L
    78  	VPERM KS_M1, KS_M1, XTMP4, KS_M1
    79  	VPERM KS_M2, KS_M2, XTMP4, KS_M2
    80  
    81  	// clmul
    82  	// xor the results from 4 32-bit words together
    83  	// Calculate lower 32 bits of tag
    84  	VPMSUMD XTMP1, KS_L, XTMP3
    85  	VPMSUMD XTMP2, KS_M1, XTMP4
    86  	VXOR XTMP3, XTMP4, XTMP3
    87  	VSPLTW $2, XTMP3, XDIGEST
    88  
    89  	// Calculate upper 32 bits of tag
    90  	VSLDOI $8, KS_M1, KS_L, KS_L
    91  	VPMSUMD XTMP1, KS_L, XTMP3
    92  	VSLDOI $8, KS_M2, KS_M1, KS_M1
    93  	VPMSUMD XTMP2, KS_M1, XTMP4
    94  	VXOR XTMP3, XTMP4, XTMP3
    95  	VSPLTW $2, XTMP3, XTMP3
    96  
    97  	// Update tag
    98  #ifdef GOARCH_ppc64le
    99  	VSLDOI $12, XTMP3, XDIGEST, XDIGEST
   100  #else
   101  	VSLDOI $12, XDIGEST, XTMP3, XDIGEST
   102  #endif
   103  	MFVSRD XDIGEST, R8
   104  	MOVD (R3), R6
   105  	XOR R6, R8, R6
   106  	MOVD R6, (R3)
   107  
   108  	// Copy last 16 bytes of KS to the front
   109  	MOVD $16, R8
   110  	LXVD2X (R8)(R4), XTMP1
   111  	STXVD2X XTMP1, (R4)(R0)
   112  
   113  	RET
   114  
   115  // func eia256RoundTag16(t *uint32, keyStream *uint32, p *byte)
   116  TEXT ·eia256RoundTag16(SB),NOSPLIT,$0
   117  	MOVD t+0(FP), R3
   118  	MOVD ks+8(FP), R4
   119  	MOVD p+16(FP), R5
   120  
   121  #ifndef GOARCH_ppc64le
   122  	MOVD	$·rcon(SB), PTR // PTR points to rcon addr
   123  	LVX	(PTR), XTMP1
   124  	ADD	$0x10, PTR
   125  #else
   126  	MOVD	$·rcon+0x10(SB), PTR // PTR points to rcon addr (skipping permute vector)
   127  #endif
   128  
   129  	LXVD2X (R5)(R0), XDATA
   130  #ifndef GOARCH_ppc64le
   131  	VPERM XDATA, XDATA, XTMP1, XDATA
   132  #endif
   133  
   134  	LXVD2X (PTR)(R0), BIT_REV_AND_TAB
   135  	VAND	BIT_REV_AND_TAB, XDATA, XTMP3
   136  	VSPLTISB $4, XTMP2;
   137  	VSRW	XDATA, XTMP2, XTMP1
   138  	VAND	BIT_REV_AND_TAB, XTMP1, XTMP1
   139  
   140  	MOVD	$0x10, R8
   141  	LXVD2X (PTR)(R8), BIT_REV_TAB_L
   142  	VSLB  BIT_REV_TAB_L, XTMP2, BIT_REV_TAB_H
   143  	VPERM BIT_REV_TAB_L, BIT_REV_TAB_L, XTMP1, XTMP1
   144  	VPERM BIT_REV_TAB_H, BIT_REV_TAB_H, XTMP3, XTMP3
   145  	VXOR XTMP1, XTMP3, XTMP3 // XTMP3 - bit reverse data bytes
   146  
   147  	// ZUC authentication part, 4x32 data bits
   148  	// setup data
   149  	VSPLTISB $0, ZERO
   150  	MOVD $0x20, R8
   151  	LXVD2X (PTR)(R8), XTMP4
   152  	VPERM ZERO, XTMP3, XTMP4, XTMP1
   153  	MOVD $0x30, R8
   154  	LXVD2X (PTR)(R8), XTMP4
   155  	VPERM ZERO, XTMP3, XTMP4, XTMP2
   156  
   157  	// setup KS
   158  	LXVW4X (R4), KS_L
   159  	MOVD $8, R8
   160  	LXVW4X (R8)(R4), KS_M1
   161  	MOVD $16, R8
   162  	LXVW4X (R8)(R4), KS_M2
   163  	VOR KS_M2, KS_M2, KS_H
   164  	MOVD $0x40, R8
   165  	LXVD2X (PTR)(R8), XTMP4
   166  	VPERM KS_L, KS_L, XTMP4, KS_L
   167  	VPERM KS_M1, KS_M1, XTMP4, KS_M1
   168  	VPERM KS_M2, KS_M2, XTMP4, KS_M2
   169  
   170  	// clmul
   171  	// xor the results from 4 32-bit words together
   172  	// Calculate lower 32 bits of tag
   173  	VPMSUMD XTMP1, KS_L, XTMP3
   174  	VPMSUMD XTMP2, KS_M1, XTMP4
   175  	VXOR XTMP3, XTMP4, XTMP3
   176  	VSLDOI $12, XTMP3, XTMP3, XDIGEST
   177  
   178  	// Calculate upper 32 bits of tag
   179  	VSLDOI $8, KS_M1, KS_L, KS_L
   180  	VPMSUMD XTMP1, KS_L, XTMP3
   181  	VSLDOI $8, KS_M2, KS_M1, XTMP5
   182  	VPMSUMD XTMP2, XTMP5, XTMP4
   183  	VXOR XTMP3, XTMP4, XTMP3
   184  	VSLDOI $8, XTMP3, XTMP3, XTMP3
   185  	VSLDOI $4, XDIGEST, XTMP3, XDIGEST
   186  
   187  	// calculate bits 95-64 of tag
   188  	VPMSUMD XTMP1, KS_M1, XTMP3
   189  	VPMSUMD XTMP2, KS_M2, XTMP4
   190  	VXOR XTMP3, XTMP4, XTMP3
   191  	VSLDOI $8, XTMP3, XTMP3, XTMP3
   192  	VSLDOI $4, XDIGEST, XTMP3, XDIGEST
   193  
   194  	// calculate bits 127-96 of tag
   195  	VSLDOI $8, KS_M2, KS_M1, KS_M1
   196  	VPMSUMD XTMP1, KS_M1, XTMP3
   197  	VSLDOI $8, KS_H, KS_M2, KS_M2
   198  	VPMSUMD XTMP2, KS_M2, XTMP4
   199  	VXOR XTMP3, XTMP4, XTMP3
   200  	VSLDOI $8, XTMP3, XTMP3, XTMP3
   201  	VSLDOI $4, XDIGEST, XTMP3, XDIGEST
   202  
   203  	// Update tag
   204  	LXVW4X (R3)(R0), XTMP1
   205  	VXOR XTMP1, XDIGEST, XDIGEST
   206  	STXVW4X XDIGEST, (R3)
   207  
   208  	// Copy last 16 bytes of KS to the front
   209  	MOVD $16, R8
   210  	LXVD2X (R8)(R4), XTMP1
   211  	STXVD2X XTMP1, (R4)(R0)
   212  
   213  	RET