github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/crypto/aes/asm_ppc64x.s (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ppc64 || ppc64le
     6  
     7  // Based on CRYPTOGAMS code with the following comment:
     8  // # ====================================================================
     9  // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
    10  // # project. The module is, however, dual licensed under OpenSSL and
    11  // # CRYPTOGAMS licenses depending on where you obtain it. For further
    12  // # details see http://www.openssl.org/~appro/cryptogams/.
    13  // # ====================================================================
    14  
    15  // Original code can be found at the link below:
    16  // https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl
    17  
    18  // Some function names were changed to be consistent with Go function
    19  // names. For instance, function aes_p8_set_{en,de}crypt_key become
    20  // set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts
    21  // and a new session was created (doEncryptKeyAsm). This was necessary to
    22  // avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm.
    23  // There were other modifications as well but kept the same functionality.
    24  
    25  #include "textflag.h"
    26  
    27  // For expandKeyAsm
    28  #define INP     R3
    29  #define BITS    R4
    30  #define OUTENC  R5 // Pointer to next expanded encrypt key
    31  #define PTR     R6
    32  #define CNT     R7
    33  #define ROUNDS  R8
    34  #define OUTDEC  R9  // Pointer to next expanded decrypt key
    35  #define TEMP    R19
    36  #define ZERO    V0
    37  #define IN0     V1
    38  #define IN1     V2
    39  #define KEY     V3
    40  #define RCON    V4
    41  #define MASK    V5
    42  #define TMP     V6
    43  #define STAGE   V7
    44  #define OUTPERM V8
    45  #define OUTMASK V9
    46  #define OUTHEAD V10
    47  #define OUTTAIL V11
    48  
    49  // For P9 instruction emulation
    50  #define ESPERM  V21  // Endian swapping permute into BE
    51  #define TMP2    V22  // Temporary for P8_STXVB16X/P8_STXV
    52  
    53  // For {en,de}cryptBlockAsm
    54  #define BLK_INP    R3
    55  #define BLK_OUT    R4
    56  #define BLK_KEY    R5
    57  #define BLK_ROUNDS R6
    58  #define BLK_IDX    R7
    59  
    60  DATA ·rcon+0x00(SB)/8, $0x0f0e0d0c0b0a0908 // Permute for vector doubleword endian swap
    61  DATA ·rcon+0x08(SB)/8, $0x0706050403020100
    62  DATA ·rcon+0x10(SB)/8, $0x0100000001000000 // RCON
    63  DATA ·rcon+0x18(SB)/8, $0x0100000001000000 // RCON
    64  DATA ·rcon+0x20(SB)/8, $0x1b0000001b000000
    65  DATA ·rcon+0x28(SB)/8, $0x1b0000001b000000
    66  DATA ·rcon+0x30(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
    67  DATA ·rcon+0x38(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
    68  DATA ·rcon+0x40(SB)/8, $0x0000000000000000
    69  DATA ·rcon+0x48(SB)/8, $0x0000000000000000
    70  GLOBL ·rcon(SB), RODATA, $80
    71  
    72  // Emulate unaligned BE vector load/stores on LE targets
    73  #ifdef GOARCH_ppc64le
    74  #define P8_LXVB16X(RA,RB,VT) \
    75  	LXVD2X	(RA+RB), VT \
    76  	VPERM	VT, VT, ESPERM, VT
    77  
    78  #define P8_STXVB16X(VS,RA,RB) \
    79  	VPERM	VS, VS, ESPERM, TMP2 \
    80  	STXVD2X	TMP2, (RA+RB)
    81  
    82  #define LXSDX_BE(RA,RB,VT) \
    83  	LXSDX	(RA+RB), VT \
    84  	VPERM	VT, VT, ESPERM, VT
    85  #else
    86  #define P8_LXVB16X(RA,RB,VT) \
    87  	LXVD2X	(RA+RB), VT
    88  
    89  #define P8_STXVB16X(VS,RA,RB) \
    90  	STXVD2X	VS, (RA+RB)
    91  
    92  #define LXSDX_BE(RA,RB,VT) \
    93  	LXSDX	(RA+RB), VT
    94  #endif
    95  
    96  // func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
    97  TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0
    98  	// Load the arguments inside the registers
    99  	MOVD	nr+0(FP), ROUNDS
   100  	MOVD	key+8(FP), INP
   101  	MOVD	enc+16(FP), OUTENC
   102  	MOVD	dec+24(FP), OUTDEC
   103  
   104  #ifdef GOARCH_ppc64le
   105  	MOVD	$·rcon(SB), PTR // PTR point to rcon addr
   106  	LVX	(PTR), ESPERM
   107  	ADD	$0x10, PTR
   108  #else
   109  	MOVD	$·rcon+0x10(SB), PTR // PTR point to rcon addr (skipping permute vector)
   110  #endif
   111  
   112  	// Get key from memory and write aligned into VR
   113  	P8_LXVB16X(INP, R0, IN0)
   114  	ADD	$0x10, INP, INP
   115  	MOVD	$0x20, TEMP
   116  
   117  	CMPW	ROUNDS, $12
   118  	LVX	(PTR)(R0), RCON    // lvx   4,0,6      Load first 16 bytes into RCON
   119  	LVX	(PTR)(TEMP), MASK
   120  	ADD	$0x10, PTR, PTR    // addi  6,6,0x10   PTR to next 16 bytes of RCON
   121  	MOVD	$8, CNT            // li    7,8        CNT = 8
   122  	VXOR	ZERO, ZERO, ZERO   // vxor  0,0,0      Zero to be zero :)
   123  	MOVD	CNT, CTR           // mtctr 7          Set the counter to 8 (rounds)
   124  
   125  	// The expanded decrypt key is the expanded encrypt key stored in reverse order.
   126  	// Move OUTDEC to the last key location, and store in descending order.
   127  	ADD	$160, OUTDEC, OUTDEC
   128  	BLT	loop128
   129  	ADD	$32, OUTDEC, OUTDEC
   130  	BEQ	l192
   131  	ADD	$32, OUTDEC, OUTDEC
   132  	JMP	l256
   133  
   134  loop128:
   135  	// Key schedule (Round 1 to 8)
   136  	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5         Rotate-n-splat
   137  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   138  	STXVD2X	IN0, (R0+OUTENC)
   139  	STXVD2X	IN0, (R0+OUTDEC)
   140  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   141  	ADD	$16, OUTENC, OUTENC
   142  	ADD	$-16, OUTDEC, OUTDEC
   143  
   144  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   145  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   146  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   147  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   148  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   149  	VADDUWM	RCON, RCON, RCON    // vadduwm 4,4,4
   150  	VXOR	IN0, KEY, IN0       // vxor 1,1,3
   151  	BC	0x10, 0, loop128    // bdnz .Loop128
   152  
   153  	LVX	(PTR)(R0), RCON // lvx 4,0,6     Last two round keys
   154  
   155  	// Key schedule (Round 9)
   156  	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5   Rotate-n-spat
   157  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   158  	STXVD2X	IN0, (R0+OUTENC)
   159  	STXVD2X	IN0, (R0+OUTDEC)
   160  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   161  	ADD	$16, OUTENC, OUTENC
   162  	ADD	$-16, OUTDEC, OUTDEC
   163  
   164  	// Key schedule (Round 10)
   165  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   166  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   167  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   168  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   169  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   170  	VADDUWM	RCON, RCON, RCON    // vadduwm 4,4,4
   171  	VXOR	IN0, KEY, IN0       // vxor 1,1,3
   172  
   173  	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5   Rotate-n-splat
   174  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   175  	STXVD2X	IN0, (R0+OUTENC)
   176  	STXVD2X	IN0, (R0+OUTDEC)
   177  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   178  	ADD	$16, OUTENC, OUTENC
   179  	ADD	$-16, OUTDEC, OUTDEC
   180  
   181  	// Key schedule (Round 11)
   182  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   183  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   184  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   185  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   186  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   187  	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
   188  	STXVD2X	IN0, (R0+OUTENC)
   189  	STXVD2X	IN0, (R0+OUTDEC)
   190  
   191  	RET
   192  
   193  l192:
   194  	LXSDX_BE(INP, R0, IN1)                   // Load next 8 bytes into upper half of VSR in BE order.
   195  	MOVD	$4, CNT                          // li 7,4
   196  	STXVD2X	IN0, (R0+OUTENC)
   197  	STXVD2X	IN0, (R0+OUTDEC)
   198  	ADD	$16, OUTENC, OUTENC
   199  	ADD	$-16, OUTDEC, OUTDEC
   200  	VSPLTISB	$8, KEY                  // vspltisb 3,8
   201  	MOVD	CNT, CTR                         // mtctr 7
   202  	VSUBUBM	MASK, KEY, MASK                  // vsububm 5,5,3
   203  
   204  loop192:
   205  	VPERM	IN1, IN1, MASK, KEY // vperm 3,2,2,5
   206  	VSLDOI	$12, ZERO, IN0, TMP // vsldoi 6,0,1,12
   207  	VCIPHERLAST	KEY, RCON, KEY      // vcipherlast 3,3,4
   208  
   209  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   210  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   211  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   212  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   213  	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   214  
   215  	VSLDOI	$8, ZERO, IN1, STAGE  // vsldoi 7,0,2,8
   216  	VSPLTW	$3, IN0, TMP          // vspltw 6,1,3
   217  	VXOR	TMP, IN1, TMP         // vxor 6,6,2
   218  	VSLDOI	$12, ZERO, IN1, IN1   // vsldoi 2,0,2,12
   219  	VADDUWM	RCON, RCON, RCON      // vadduwm 4,4,4
   220  	VXOR	IN1, TMP, IN1         // vxor 2,2,6
   221  	VXOR	IN0, KEY, IN0         // vxor 1,1,3
   222  	VXOR	IN1, KEY, IN1         // vxor 2,2,3
   223  	VSLDOI	$8, STAGE, IN0, STAGE // vsldoi 7,7,1,8
   224  
   225  	VPERM	IN1, IN1, MASK, KEY              // vperm 3,2,2,5
   226  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   227  	STXVD2X	STAGE, (R0+OUTENC)
   228  	STXVD2X	STAGE, (R0+OUTDEC)
   229  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   230  	ADD	$16, OUTENC, OUTENC
   231  	ADD	$-16, OUTDEC, OUTDEC
   232  
   233  	VSLDOI	$8, IN0, IN1, STAGE              // vsldoi 7,1,2,8
   234  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   235  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   236  	STXVD2X	STAGE, (R0+OUTENC)
   237  	STXVD2X	STAGE, (R0+OUTDEC)
   238  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   239  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   240  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   241  	ADD	$16, OUTENC, OUTENC
   242  	ADD	$-16, OUTDEC, OUTDEC
   243  
   244  	VSPLTW	$3, IN0, TMP                     // vspltw 6,1,3
   245  	VXOR	TMP, IN1, TMP                    // vxor 6,6,2
   246  	VSLDOI	$12, ZERO, IN1, IN1              // vsldoi 2,0,2,12
   247  	VADDUWM	RCON, RCON, RCON                 // vadduwm 4,4,4
   248  	VXOR	IN1, TMP, IN1                    // vxor 2,2,6
   249  	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
   250  	VXOR	IN1, KEY, IN1                    // vxor 2,2,3
   251  	STXVD2X	IN0, (R0+OUTENC)
   252  	STXVD2X	IN0, (R0+OUTDEC)
   253  	ADD	$16, OUTENC, OUTENC
   254  	ADD	$-16, OUTDEC, OUTDEC
   255  	BC	0x10, 0, loop192                 // bdnz .Loop192
   256  
   257  	RET
   258  
   259  l256:
   260  	P8_LXVB16X(INP, R0, IN1)
   261  	MOVD	$7, CNT                          // li 7,7
   262  	STXVD2X	IN0, (R0+OUTENC)
   263  	STXVD2X	IN0, (R0+OUTDEC)
   264  	ADD	$16, OUTENC, OUTENC
   265  	ADD	$-16, OUTDEC, OUTDEC
   266  	MOVD	CNT, CTR                         // mtctr 7
   267  
   268  loop256:
   269  	VPERM	IN1, IN1, MASK, KEY              // vperm 3,2,2,5
   270  	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   271  	STXVD2X	IN1, (R0+OUTENC)
   272  	STXVD2X	IN1, (R0+OUTDEC)
   273  	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   274  	ADD	$16, OUTENC, OUTENC
   275  	ADD	$-16, OUTDEC, OUTDEC
   276  
   277  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   278  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   279  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   280  	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   281  	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   282  	VADDUWM	RCON, RCON, RCON                 // vadduwm 4,4,4
   283  	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
   284  	STXVD2X	IN0, (R0+OUTENC)
   285  	STXVD2X	IN0, (R0+OUTDEC)
   286  	ADD	$16, OUTENC, OUTENC
   287  	ADD	$-16, OUTDEC, OUTDEC
   288  	BC	0x12, 0, done                    // bdz .Ldone
   289  
   290  	VSPLTW	$3, IN0, KEY        // vspltw 3,1,3
   291  	VSLDOI	$12, ZERO, IN1, TMP // vsldoi 6,0,2,12
   292  	VSBOX	KEY, KEY            // vsbox 3,3
   293  
   294  	VXOR	IN1, TMP, IN1       // vxor 2,2,6
   295  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   296  	VXOR	IN1, TMP, IN1       // vxor 2,2,6
   297  	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   298  	VXOR	IN1, TMP, IN1       // vxor 2,2,6
   299  
   300  	VXOR	IN1, KEY, IN1 // vxor 2,2,3
   301  	JMP	loop256       // b .Loop256
   302  
   303  done:
   304  	RET
   305  
   306  // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
   307  TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
   308  	MOVD	nr+0(FP), R6   // Round count/Key size
   309  	MOVD	xk+8(FP), R5   // Key pointer
   310  	MOVD	dst+16(FP), R3 // Dest pointer
   311  	MOVD	src+24(FP), R4 // Src pointer
   312  #ifdef GOARCH_ppc64le
   313  	MOVD	$·rcon(SB), R7
   314  	LVX	(R7), ESPERM   // Permute value for P8_ macros.
   315  #endif
   316  
   317  	// Set CR{1,2,3}EQ to hold the key size information.
   318  	CMPU	R6, $10, CR1
   319  	CMPU	R6, $12, CR2
   320  	CMPU	R6, $14, CR3
   321  
   322  	MOVD	$16, R6
   323  	MOVD	$32, R7
   324  	MOVD	$48, R8
   325  	MOVD	$64, R9
   326  	MOVD	$80, R10
   327  	MOVD	$96, R11
   328  	MOVD	$112, R12
   329  
   330  	// Load text in BE order
   331  	P8_LXVB16X(R4, R0, V0)
   332  
   333  	// V1, V2 will hold keys, V0 is a temp.
   334  	// At completion, V2 will hold the ciphertext.
   335  	// Load xk[0:3] and xor with text
   336  	LXVD2X	(R0+R5), V1
   337  	VXOR	V0, V1, V0
   338  
   339  	// Load xk[4:11] and cipher
   340  	LXVD2X	(R6+R5), V1
   341  	LXVD2X	(R7+R5), V2
   342  	VCIPHER	V0, V1, V0
   343  	VCIPHER	V0, V2, V0
   344  
   345  	// Load xk[12:19] and cipher
   346  	LXVD2X	(R8+R5), V1
   347  	LXVD2X	(R9+R5), V2
   348  	VCIPHER	V0, V1, V0
   349  	VCIPHER	V0, V2, V0
   350  
   351  	// Load xk[20:27] and cipher
   352  	LXVD2X	(R10+R5), V1
   353  	LXVD2X	(R11+R5), V2
   354  	VCIPHER	V0, V1, V0
   355  	VCIPHER	V0, V2, V0
   356  
   357  	// Increment xk pointer to reuse constant offsets in R6-R12.
   358  	ADD	$112, R5
   359  
   360  	// Load xk[28:35] and cipher
   361  	LXVD2X	(R0+R5), V1
   362  	LXVD2X	(R6+R5), V2
   363  	VCIPHER	V0, V1, V0
   364  	VCIPHER	V0, V2, V0
   365  
   366  	// Load xk[36:43] and cipher
   367  	LXVD2X	(R7+R5), V1
   368  	LXVD2X	(R8+R5), V2
   369  	BEQ	CR1, Ldec_tail // Key size 10?
   370  	VCIPHER	V0, V1, V0
   371  	VCIPHER	V0, V2, V0
   372  
   373  	// Load xk[44:51] and cipher
   374  	LXVD2X	(R9+R5), V1
   375  	LXVD2X	(R10+R5), V2
   376  	BEQ	CR2, Ldec_tail // Key size 12?
   377  	VCIPHER	V0, V1, V0
   378  	VCIPHER	V0, V2, V0
   379  
   380  	// Load xk[52:59] and cipher
   381  	LXVD2X	(R11+R5), V1
   382  	LXVD2X	(R12+R5), V2
   383  	BNE	CR3, Linvalid_key_len // Not key size 14?
   384  	// Fallthrough to final cipher
   385  
   386  Ldec_tail:
   387  	// Cipher last two keys such that key information is
   388  	// cleared from V1 and V2.
   389  	VCIPHER		V0, V1, V1
   390  	VCIPHERLAST	V1, V2, V2
   391  
   392  	// Store the result in BE order.
   393  	P8_STXVB16X(V2, R3, R0)
   394  	RET
   395  
   396  Linvalid_key_len:
   397  	// Segfault, this should never happen. Only 3 keys sizes are created/used.
   398  	MOVD	R0, 0(R0)
   399  	RET
   400  
   401  // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
   402  TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
   403  	MOVD	nr+0(FP), R6   // Round count/Key size
   404  	MOVD	xk+8(FP), R5   // Key pointer
   405  	MOVD	dst+16(FP), R3 // Dest pointer
   406  	MOVD	src+24(FP), R4 // Src pointer
   407  #ifdef GOARCH_ppc64le
   408  	MOVD	$·rcon(SB), R7
   409  	LVX	(R7), ESPERM   // Permute value for P8_ macros.
   410  #endif
   411  
   412  	// Set CR{1,2,3}EQ to hold the key size information.
   413  	CMPU	R6, $10, CR1
   414  	CMPU	R6, $12, CR2
   415  	CMPU	R6, $14, CR3
   416  
   417  	MOVD	$16, R6
   418  	MOVD	$32, R7
   419  	MOVD	$48, R8
   420  	MOVD	$64, R9
   421  	MOVD	$80, R10
   422  	MOVD	$96, R11
   423  	MOVD	$112, R12
   424  
   425  	// Load text in BE order
   426  	P8_LXVB16X(R4, R0, V0)
   427  
   428  	// V1, V2 will hold keys, V0 is a temp.
   429  	// At completion, V2 will hold the text.
   430  	// Load xk[0:3] and xor with ciphertext
   431  	LXVD2X	(R0+R5), V1
   432  	VXOR	V0, V1, V0
   433  
   434  	// Load xk[4:11] and cipher
   435  	LXVD2X	(R6+R5), V1
   436  	LXVD2X	(R7+R5), V2
   437  	VNCIPHER	V0, V1, V0
   438  	VNCIPHER	V0, V2, V0
   439  
   440  	// Load xk[12:19] and cipher
   441  	LXVD2X	(R8+R5), V1
   442  	LXVD2X	(R9+R5), V2
   443  	VNCIPHER	V0, V1, V0
   444  	VNCIPHER	V0, V2, V0
   445  
   446  	// Load xk[20:27] and cipher
   447  	LXVD2X	(R10+R5), V1
   448  	LXVD2X	(R11+R5), V2
   449  	VNCIPHER	V0, V1, V0
   450  	VNCIPHER	V0, V2, V0
   451  
   452  	// Increment xk pointer to reuse constant offsets in R6-R12.
   453  	ADD	$112, R5
   454  
   455  	// Load xk[28:35] and cipher
   456  	LXVD2X	(R0+R5), V1
   457  	LXVD2X	(R6+R5), V2
   458  	VNCIPHER	V0, V1, V0
   459  	VNCIPHER	V0, V2, V0
   460  
   461  	// Load xk[36:43] and cipher
   462  	LXVD2X	(R7+R5), V1
   463  	LXVD2X	(R8+R5), V2
   464  	BEQ	CR1, Ldec_tail // Key size 10?
   465  	VNCIPHER	V0, V1, V0
   466  	VNCIPHER	V0, V2, V0
   467  
   468  	// Load xk[44:51] and cipher
   469  	LXVD2X	(R9+R5), V1
   470  	LXVD2X	(R10+R5), V2
   471  	BEQ	CR2, Ldec_tail // Key size 12?
   472  	VNCIPHER	V0, V1, V0
   473  	VNCIPHER	V0, V2, V0
   474  
   475  	// Load xk[52:59] and cipher
   476  	LXVD2X	(R11+R5), V1
   477  	LXVD2X	(R12+R5), V2
   478  	BNE	CR3, Linvalid_key_len // Not key size 14?
   479  	// Fallthrough to final cipher
   480  
   481  Ldec_tail:
   482  	// Cipher last two keys such that key information is
   483  	// cleared from V1 and V2.
   484  	VNCIPHER	V0, V1, V1
   485  	VNCIPHERLAST	V1, V2, V2
   486  
   487  	// Store the result in BE order.
   488  	P8_STXVB16X(V2, R3, R0)
   489  	RET
   490  
   491  Linvalid_key_len:
   492  	// Segfault, this should never happen. Only 3 keys sizes are created/used.
   493  	MOVD	R0, 0(R0)
   494  	RET
   495  
   496  // Remove defines from above so they can be defined here
   497  #undef INP
   498  #undef OUTENC
   499  #undef ROUNDS
   500  #undef KEY
   501  #undef TMP
   502  
   503  // CBC encrypt or decrypt
   504  // R3 src
   505  // R4 dst
   506  // R5 len
   507  // R6 key
   508  // R7 iv
   509  // R8 enc=1 dec=0
   510  // Ported from: aes_p8_cbc_encrypt
   511  // Register usage:
   512  // R9: ROUNDS
   513  // R10: Index
   514  // V4: IV
   515  // V5: SRC
   516  // V7: DST
   517  
   518  #define INP R3
   519  #define OUT R4
   520  #define LEN R5
   521  #define KEY R6
   522  #define IVP R7
   523  #define ENC R8
   524  #define ROUNDS R9
   525  #define IDX R10
   526  
   527  #define RNDKEY0 V0
   528  #define INOUT V2
   529  #define TMP V3
   530  
   531  #define IVEC V4
   532  
   533  // Vector loads are done using LVX followed by
   534  // a VPERM using mask generated from previous
   535  // LVSL or LVSR instruction, to obtain the correct
   536  // bytes if address is unaligned.
   537  
   538  // Encryption is done with VCIPHER and VCIPHERLAST
   539  // Decryption is done with VNCIPHER and VNCIPHERLAST
   540  
   541  // Encrypt and decypt is done as follows:
   542  // - INOUT value is initialized in outer loop.
   543  // - ROUNDS value is adjusted for loop unrolling.
   544  // - Encryption/decryption is done in loop based on
   545  // adjusted ROUNDS value.
   546  // - Final INOUT value is encrypted/decrypted and stored.
   547  
   548  // Note: original implementation had an 8X version
   549  // for decryption which was omitted to avoid the
   550  // complexity.
   551  
   552  // func cryptBlocksChain(src, dst *byte, length int, key *uint32, iv *byte, enc int, nr int)
   553  TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0
   554  	MOVD	src+0(FP), INP
   555  	MOVD	dst+8(FP), OUT
   556  	MOVD	length+16(FP), LEN
   557  	MOVD	key+24(FP), KEY
   558  	MOVD	iv+32(FP), IVP
   559  	MOVD	enc+40(FP), ENC
   560  	MOVD	nr+48(FP), ROUNDS
   561  
   562  #ifdef GOARCH_ppc64le
   563  	MOVD	$·rcon(SB), R11
   564  	LVX	(R11), ESPERM   // Permute value for P8_ macros.
   565  #endif
   566  
   567  	CMPU	LEN, $16    // cmpldi r5,16
   568  	BC	14, 0, LR   // bltlr-, return if len < 16.
   569  	CMPW	ENC, $0     // cmpwi r8,0
   570  
   571  	P8_LXVB16X(IVP, R0, IVEC) // load ivec in BE register order
   572  
   573  	SRW	$1, ROUNDS  // rlwinm r9,r9,31,1,31
   574  	MOVD	$0, IDX     // li r10,0
   575  	ADD	$-1, ROUNDS // addi r9,r9,-1
   576  	BEQ	Lcbc_dec    // beq
   577  	PCALIGN	$16
   578  
   579  	// Outer loop: initialize encrypted value (INOUT)
   580  	// Load input (INPTAIL) ivec (IVEC)
   581  Lcbc_enc:
   582  	P8_LXVB16X(INP, R0, INOUT)                 // load text in BE vreg order
   583  	ADD	$16, INP                           // addi r3,r3,16
   584  	MOVD	ROUNDS, CTR                        // mtctr r9
   585  	ADD	$-16, LEN                          // addi r5,r5,-16
   586  	LXVD2X	(KEY+IDX), RNDKEY0                 // load first xkey
   587  	ADD	$16, IDX                           // addi r10,r10,16
   588  	VXOR	INOUT, RNDKEY0, INOUT              // vxor v2,v2,v0
   589  	VXOR	INOUT, IVEC, INOUT                 // vxor v2,v2,v4
   590  
   591  	// Encryption loop of INOUT using RNDKEY0
   592  Loop_cbc_enc:
   593  	LXVD2X	(KEY+IDX), RNDKEY0                 // load next xkey
   594  	VCIPHER	INOUT, RNDKEY0, INOUT              // vcipher v2,v2,v1
   595  	ADD	$16, IDX                           // addi r10,r10,16
   596  	LXVD2X	(KEY+IDX), RNDKEY0                 // load next xkey
   597  	VCIPHER	INOUT, RNDKEY0, INOUT              // vcipher v2,v2,v1
   598  	ADD	$16, IDX                           // addi r10,r10,16
   599  	BDNZ Loop_cbc_enc
   600  
   601  	// Encrypt tail values and store INOUT
   602  	LXVD2X	(KEY+IDX), RNDKEY0                 // load next xkey
   603  	VCIPHER	INOUT, RNDKEY0, INOUT              // vcipher v2,v2,v1
   604  	ADD	$16, IDX                           // addi r10,r10,16
   605  	LXVD2X	(KEY+IDX), RNDKEY0                 // load final xkey
   606  	VCIPHERLAST	INOUT, RNDKEY0, IVEC       // vcipherlast v4,v2,v0
   607  	MOVD	$0, IDX                            // reset key index for next block
   608  	CMPU	LEN, $16                           // cmpldi r5,16
   609  	P8_STXVB16X(IVEC, OUT, R0)                 // store ciphertext in BE order
   610  	ADD	$16, OUT                           // addi r4,r4,16
   611  	BGE	Lcbc_enc                           // bge Lcbc_enc
   612  	BR	Lcbc_done                          // b Lcbc_done
   613  
   614  	// Outer loop: initialize decrypted value (INOUT)
   615  	// Load input (INPTAIL) ivec (IVEC)
   616  Lcbc_dec:
   617  	P8_LXVB16X(INP, R0, TMP)                   // load ciphertext in BE vreg order
   618  	ADD	$16, INP                           // addi r3,r3,16
   619  	MOVD	ROUNDS, CTR                        // mtctr r9
   620  	ADD	$-16, LEN                          // addi r5,r5,-16
   621  	LXVD2X	(KEY+IDX), RNDKEY0                 // load first xkey
   622  	ADD	$16, IDX                           // addi r10,r10,16
   623  	VXOR	TMP, RNDKEY0, INOUT                // vxor v2,v3,v0
   624  	PCALIGN	$16
   625  
   626  	// Decryption loop of INOUT using RNDKEY0
   627  Loop_cbc_dec:
   628  	LXVD2X	(KEY+IDX), RNDKEY0                 // load next xkey
   629  	ADD	$16, IDX                           // addi r10,r10,16
   630  	VNCIPHER	INOUT, RNDKEY0, INOUT      // vncipher v2,v2,v1
   631  	LXVD2X	(KEY+IDX), RNDKEY0                 // load next xkey
   632  	ADD	$16, IDX                           // addi r10,r10,16
   633  	VNCIPHER	INOUT, RNDKEY0, INOUT      // vncipher v2,v2,v0
   634  	BDNZ Loop_cbc_dec
   635  
   636  	// Decrypt tail values and store INOUT
   637  	LXVD2X	(KEY+IDX), RNDKEY0                 // load next xkey
   638  	ADD	$16, IDX                           // addi r10,r10,16
   639  	VNCIPHER	INOUT, RNDKEY0, INOUT      // vncipher v2,v2,v1
   640  	LXVD2X	(KEY+IDX), RNDKEY0                 // load final xkey
   641  	MOVD	$0, IDX                            // li r10,0
   642  	VNCIPHERLAST	INOUT, RNDKEY0, INOUT      // vncipherlast v2,v2,v0
   643  	CMPU	LEN, $16                           // cmpldi r5,16
   644  	VXOR	INOUT, IVEC, INOUT                 // vxor v2,v2,v4
   645  	VOR	TMP, TMP, IVEC                     // vor v4,v3,v3
   646  	P8_STXVB16X(INOUT, OUT, R0)                // store text in BE order
   647  	ADD	$16, OUT                           // addi r4,r4,16
   648  	BGE	Lcbc_dec                           // bge
   649  
   650  Lcbc_done:
   651  	VXOR	RNDKEY0, RNDKEY0, RNDKEY0          // clear key register
   652  	P8_STXVB16X(IVEC, R0, IVP)                 // Save ivec in BE order for next round.
   653  	RET                                        // bclr 20,lt,0
   654