github.com/zebozhuang/go@v0.0.0-20200207033046-f8a98f6f5c5d/src/crypto/aes/asm_ppc64le.s (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This is a derived work from OpenSSL of AES using assembly optimizations. The
     6  // original code was written by Andy Polyakov <appro@openssl.org> and it's dual
     7  // licensed under OpenSSL and CRYPTOGAMS licenses depending on where you obtain
     8  // it. For further details see http://www.openssl.org/~appro/cryptogams/.
     9  
    10  // Original code can be found at the link below:
    11  // https://git.openssl.org/?p=openssl.git;a=blob;f=crypto/aes/asm/aesp8-ppc.pl
    12  
    13  // The code is based on 627c953376 from 4 Jun 2016. I changed some function
    14  // names in order to be more likely to go standards. For instance, function
    15  // aes_p8_set_{en,de}crypt_key become set{En,De}cryptKeyAsm. I also split
    16  // setEncryptKeyAsm in two parts and a new session was created
    17  // (doEncryptKeyAsm). This was necessary to avoid arguments overwriting when
    18  // setDecryptKeyAsm calls setEncryptKeyAsm. There were other modifications as
    19  // well but kept the same functionality.
    20  
    21  #include "textflag.h"
    22  
    23  // For set{En,De}cryptKeyAsm
    24  #define INP     R3
    25  #define BITS    R4
    26  #define OUT     R5
    27  #define PTR     R6
    28  #define CNT     R7
    29  #define ROUNDS  R8
    30  #define TEMP    R19
    31  #define ZERO    V0
    32  #define IN0     V1
    33  #define IN1     V2
    34  #define KEY     V3
    35  #define RCON    V4
    36  #define MASK    V5
    37  #define TMP     V6
    38  #define STAGE   V7
    39  #define OUTPERM V8
    40  #define OUTMASK V9
    41  #define OUTHEAD V10
    42  #define OUTTAIL V11
    43  
    44  // For {en,de}cryptBlockAsm
    45  #define BLK_INP    R3
    46  #define BLK_OUT    R4
    47  #define BLK_KEY    R5
    48  #define BLK_ROUNDS R6
    49  #define BLK_IDX    R7
    50  
    51  DATA  ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON
    52  DATA  ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON
    53  DATA  ·rcon+0x10(SB)/8, $0x1b0000001b000000
    54  DATA  ·rcon+0x18(SB)/8, $0x1b0000001b000000
    55  DATA  ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
    56  DATA  ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
    57  DATA  ·rcon+0x30(SB)/8, $0x0000000000000000
    58  DATA  ·rcon+0x38(SB)/8, $0x0000000000000000
    59  GLOBL ·rcon(SB), RODATA, $64
    60  
    61  // func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int
    62  TEXT ·setEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0
    63  	// Load the arguments inside the registers
    64  	MOVD key+0(FP), INP
    65  	MOVD keylen+8(FP), BITS
    66  	MOVD enc+16(FP), OUT
    67  	JMP ·doEncryptKeyAsm(SB)
    68  
    69  // This text is used both setEncryptKeyAsm and setDecryptKeyAsm
    70  TEXT ·doEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0
    71  	// Do not change R10 since it's storing the LR value in setDecryptKeyAsm
    72  
    73  	// Check arguments
    74  	MOVD $-1, PTR                  // li    6,-1       exit code to -1 (255)
    75  	CMPU INP, $0                   // cmpldi r3,0      input key pointer set?
    76  	BC 0x0E, 2, enc_key_abort      // beq-  .Lenc_key_abort
    77  	CMPU OUT, $0                   // cmpldi r5,0      output key pointer set?
    78  	BC 0x0E, 2, enc_key_abort      // beq-  .Lenc_key_abort
    79  	MOVD $-2, PTR                  // li    6,-2       exit code to -2 (254)
    80  	CMPW BITS, $128                // cmpwi 4,128      greater or equal to 128
    81  	BC 0x0E, 0, enc_key_abort      // blt-  .Lenc_key_abort
    82  	CMPW BITS, $256                // cmpwi 4,256      lesser or equal to 256
    83  	BC 0x0E, 1, enc_key_abort      // bgt-  .Lenc_key_abort
    84  	ANDCC $0x3f, BITS, TEMP        // andi. 0,4,0x3f   multiple of 64
    85  	BC 0x06, 2, enc_key_abort      // bne-  .Lenc_key_abort
    86  
    87  	MOVD $·rcon(SB), PTR           // PTR point to rcon addr
    88  
    89  	// Get key from memory and write aligned into VR
    90  	NEG INP, R9                    // neg   9,3        R9 is ~INP + 1
    91  	LVX (INP)(R0), IN0             // lvx   1,0,3      Load key inside IN0
    92  	ADD $15, INP, INP              // addi  3,3,15     Add 15B to INP addr
    93  	LVSR (R9)(R0), KEY             // lvsr  3,0,9
    94  	MOVD $0x20, R8                 // li    8,0x20     R8 = 32
    95  	CMPW BITS, $192                // cmpwi 4,192      Key size == 192?
    96  	LVX (INP)(R0), IN1             // lvx   2,0,3
    97  	VSPLTISB $0x0f, MASK           // vspltisb 5,0x0f  0x0f0f0f0f... mask
    98  	LVX (PTR)(R0), RCON            // lvx   4,0,6      Load first 16 bytes into RCON
    99  	VXOR KEY, MASK, KEY            // vxor  3,3,5      Adjust for byte swap
   100  	LVX (PTR)(R8), MASK            // lvx   5,8,6
   101  	ADD $0x10, PTR, PTR            // addi  6,6,0x10   PTR to next 16 bytes of RCON
   102  	VPERM IN0, IN1, KEY, IN0       // vperm 1,1,2,3    Align
   103  	MOVD $8, CNT                   // li    7,8        CNT = 8
   104  	VXOR ZERO, ZERO, ZERO          // vxor  0,0,0      Zero to be zero :)
   105  	MOVD CNT, CTR                  // mtctr 7          Set the counter to 8 (rounds)
   106  
   107  	LVSL (OUT)(R0), OUTPERM        // lvsl  8,0,5
   108  	VSPLTISB $-1, OUTMASK          // vspltisb      9,-1
   109  	LVX (OUT)(R0), OUTHEAD         // lvx   10,0,5
   110  	VPERM OUTMASK, ZERO, OUTPERM, OUTMASK  // vperm 9,9,0,8
   111  
   112  	BLT loop128                    // blt   .Loop128
   113  	ADD $8, INP, INP               // addi  3,3,8
   114  	BEQ l192                       // beq   .L192
   115  	ADD $8, INP, INP               // addi  3,3,8
   116  	JMP l256                       // b     .L256
   117  
   118  loop128:
   119  	// Key schedule (Round 1 to 8)
   120  	VPERM IN0, IN0, MASK, KEY      // vperm 3,1,1,5         Rotate-n-splat
   121  	VSLDOI $12, ZERO, IN0, TMP     // vsldoi 6,0,1,12
   122  	VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8    Rotate
   123  	VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   124  	VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   125  	VCIPHERLAST KEY, RCON, KEY     // vcipherlast 3,3,4
   126  	STVX STAGE, (OUT+R0)           // stvx 7,0,5        Write to output
   127  	ADD $16, OUT, OUT              // addi 5,5,16       Point to the next round
   128  
   129  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   130  	VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   131  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   132  	VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   133  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   134  	VADDUWM RCON, RCON, RCON       // vadduwm 4,4,4
   135  	VXOR IN0, KEY, IN0             // vxor 1,1,3
   136  	BC 0x10, 0, loop128            // bdnz .Loop128
   137  
   138  	LVX (PTR)(R0), RCON            // lvx 4,0,6     Last two round keys
   139  
   140  	// Key schedule (Round 9)
   141  	VPERM IN0, IN0, MASK, KEY      // vperm 3,1,1,5   Rotate-n-spat
   142  	VSLDOI $12, ZERO, IN0, TMP     // vsldoi 6,0,1,12
   143  	VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8  Rotate
   144  	VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   145  	VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   146  	VCIPHERLAST KEY, RCON, KEY     // vcipherlast 3,3,4
   147  	STVX STAGE, (OUT+R0)           // stvx 7,0,5   Round 9
   148  	ADD $16, OUT, OUT              // addi 5,5,16
   149  
   150  	// Key schedule (Round 10)
   151  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   152  	VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   153  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   154  	VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   155  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   156  	VADDUWM RCON, RCON, RCON       // vadduwm 4,4,4
   157  	VXOR IN0, KEY, IN0             // vxor 1,1,3
   158  
   159  	VPERM IN0, IN0, MASK, KEY      // vperm 3,1,1,5   Rotate-n-splat
   160  	VSLDOI $12, ZERO, IN0, TMP     // vsldoi 6,0,1,12
   161  	VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8  Rotate
   162  	VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   163  	VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   164  	VCIPHERLAST KEY, RCON, KEY     // vcipherlast 3,3,4
   165  	STVX STAGE, (OUT+R0)           // stvx 7,0,5    Round 10
   166  	ADD $16, OUT, OUT              // addi 5,5,16
   167  
   168  	// Key schedule (Round 11)
   169  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   170  	VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   171  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   172  	VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   173  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   174  	VXOR IN0, KEY, IN0             // vxor 1,1,3
   175  	VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
   176  	VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   177  	VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   178  	STVX STAGE, (OUT+R0)           // stvx 7,0,5  Round 11
   179  
   180  	ADD $15, OUT, INP              // addi  3,5,15
   181  	ADD $0x50, OUT, OUT            // addi  5,5,0x50
   182  
   183  	MOVD $10, ROUNDS               // li    8,10
   184  	JMP done                       // b     .Ldone
   185  
   186  l192:
   187  	LVX (INP)(R0), TMP             // lvx 6,0,3
   188  	MOVD $4, CNT                   // li 7,4
   189  	VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
   190  	VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   191  	VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   192  	STVX STAGE, (OUT+R0)           // stvx 7,0,5
   193  	ADD $16, OUT, OUT              // addi 5,5,16
   194  	VPERM IN1, TMP, KEY, IN1       // vperm 2,2,6,3
   195  	VSPLTISB $8, KEY               // vspltisb 3,8
   196  	MOVD CNT, CTR                  // mtctr 7
   197  	VSUBUBM MASK, KEY, MASK        // vsububm 5,5,3
   198  
   199  loop192:
   200  	VPERM IN1, IN1, MASK, KEY      // vperm 3,2,2,5
   201  	VSLDOI $12, ZERO, IN0, TMP     // vsldoi 6,0,1,12
   202  	VCIPHERLAST KEY, RCON, KEY     // vcipherlast 3,3,4
   203  
   204  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   205  	VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   206  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   207  	VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   208  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   209  
   210  	VSLDOI $8, ZERO, IN1, STAGE    // vsldoi 7,0,2,8
   211  	VSPLTW $3, IN0, TMP            // vspltw 6,1,3
   212  	VXOR TMP, IN1, TMP             // vxor 6,6,2
   213  	VSLDOI $12, ZERO, IN1, IN1     // vsldoi 2,0,2,12
   214  	VADDUWM RCON, RCON, RCON       // vadduwm 4,4,4
   215  	VXOR IN1, TMP, IN1             // vxor 2,2,6
   216  	VXOR IN0, KEY, IN0             // vxor 1,1,3
   217  	VXOR IN1, KEY, IN1             // vxor 2,2,3
   218  	VSLDOI $8, STAGE, IN0, STAGE   // vsldoi 7,7,1,8
   219  
   220  	VPERM IN1, IN1, MASK, KEY      // vperm 3,2,2,5
   221  	VSLDOI $12, ZERO, IN0, TMP     // vsldoi 6,0,1,12
   222  	VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8
   223  	VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   224  	VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   225  	VCIPHERLAST KEY, RCON, KEY     // vcipherlast 3,3,4
   226  	STVX STAGE, (OUT+R0)           // stvx 7,0,5
   227  	ADD $16, OUT, OUT              // addi 5,5,16
   228  
   229  	VSLDOI $8, IN0, IN1, STAGE     // vsldoi 7,1,2,8
   230  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   231  	VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   232  	VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8
   233  	VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   234  	VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   235  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   236  	VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   237  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   238  	STVX STAGE, (OUT+R0)           // stvx 7,0,5
   239  	ADD $16, OUT, OUT              // addi 5,5,16
   240  
   241  	VSPLTW $3, IN0, TMP            // vspltw 6,1,3
   242  	VXOR TMP, IN1, TMP             // vxor 6,6,2
   243  	VSLDOI $12, ZERO, IN1, IN1     // vsldoi 2,0,2,12
   244  	VADDUWM RCON, RCON, RCON       // vadduwm 4,4,4
   245  	VXOR IN1, TMP, IN1             // vxor 2,2,6
   246  	VXOR IN0, KEY, IN0             // vxor 1,1,3
   247  	VXOR IN1, KEY, IN1             // vxor 2,2,3
   248  	VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
   249  	VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   250  	VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   251  	STVX STAGE, (OUT+R0)           // stvx 7,0,5
   252  	ADD $15, OUT, INP              // addi 3,5,15
   253  	ADD $16, OUT, OUT              // addi 5,5,16
   254  	BC 0x10, 0, loop192           // bdnz .Loop192
   255  
   256  	MOVD $12, ROUNDS               // li 8,12
   257  	ADD $0x20, OUT, OUT            // addi 5,5,0x20
   258  	JMP done                       // b .Ldone
   259  
   260  l256:
   261  	LVX (INP)(R0), TMP             // lvx 6,0,3
   262  	MOVD $7, CNT                   // li 7,7
   263  	MOVD $14, ROUNDS               // li 8,14
   264  	VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
   265  	VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   266  	VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   267  	STVX STAGE, (OUT+R0)           // stvx 7,0,5
   268  	ADD $16, OUT, OUT              // addi 5,5,16
   269  	VPERM IN1, TMP, KEY, IN1       // vperm 2,2,6,3
   270  	MOVD CNT, CTR                  // mtctr 7
   271  
   272  loop256:
   273  	VPERM IN1, IN1, MASK, KEY      // vperm 3,2,2,5
   274  	VSLDOI $12, ZERO, IN0, TMP     // vsldoi 6,0,1,12
   275  	VPERM IN1, IN1, OUTPERM, OUTTAIL // vperm 11,2,2,8
   276  	VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   277  	VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   278  	VCIPHERLAST KEY, RCON, KEY     // vcipherlast 3,3,4
   279  	STVX STAGE, (OUT+R0)           // stvx 7,0,5
   280  	ADD $16, OUT, OUT              // addi 5,5,16
   281  
   282  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   283  	VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   284  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   285  	VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   286  	VXOR IN0, TMP, IN0             // vxor 1,1,6
   287  	VADDUWM RCON, RCON, RCON       // vadduwm 4,4,4
   288  	VXOR IN0, KEY, IN0             // vxor 1,1,3
   289  	VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
   290  	VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   291  	VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   292  	STVX STAGE, (OUT+R0)           // stvx 7,0,5
   293  	ADD $15, OUT, INP              // addi 3,5,15
   294  	ADD $16, OUT, OUT              // addi 5,5,16
   295  	BC 0x12, 0, done               // bdz .Ldone
   296  
   297  	VSPLTW $3, IN0, KEY            // vspltw 3,1,3
   298  	VSLDOI $12, ZERO, IN1, TMP     // vsldoi 6,0,2,12
   299  	VSBOX KEY, KEY                 // vsbox 3,3
   300  
   301  	VXOR IN1, TMP, IN1             // vxor 2,2,6
   302  	VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   303  	VXOR IN1, TMP, IN1             // vxor 2,2,6
   304  	VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   305  	VXOR IN1, TMP, IN1             // vxor 2,2,6
   306  
   307  	VXOR IN1, KEY, IN1             // vxor 2,2,3
   308  	JMP loop256                    // b .Loop256
   309  
   310  done:
   311  	LVX (INP)(R0), IN1             // lvx   2,0,3
   312  	VSEL OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9
   313  	STVX IN1, (INP+R0)             // stvx  2,0,3
   314  	MOVD $0, PTR                   // li    6,0    set PTR to 0 (exit code 0)
   315  	MOVW ROUNDS, 0(OUT)            // stw   8,0(5)
   316  
   317  enc_key_abort:
   318  	MOVD PTR, INP                  // mr    3,6    set exit code with PTR value
   319  	MOVD INP, ret+24(FP)           // Put return value into the FP
   320  	RET                            // blr
   321  
   322  // func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int
   323  TEXT ·setDecryptKeyAsm(SB),NOSPLIT|NOFRAME,$0
   324  	// Load the arguments inside the registers
   325  	MOVD key+0(FP), INP
   326  	MOVD keylen+8(FP), BITS
   327  	MOVD dec+16(FP), OUT
   328  
   329  	MOVD LR, R10                   // mflr 10
   330  	CALL ·doEncryptKeyAsm(SB)
   331  	MOVD R10, LR                   // mtlr 10
   332  
   333  	CMPW INP, $0                   // cmpwi 3,0  exit 0 = ok
   334  	BC 0x06, 2, dec_key_abort      // bne- .Ldec_key_abort
   335  
   336  	// doEncryptKeyAsm set ROUNDS (R8) with the proper value for each mode
   337  	SLW $4, ROUNDS, CNT            // slwi 7,8,4
   338  	SUB $240, OUT, INP             // subi 3,5,240
   339  	SRW $1, ROUNDS, ROUNDS         // srwi 8,8,1
   340  	ADD R7, INP, OUT               // add 5,3,7
   341  	MOVD ROUNDS, CTR               // mtctr 8
   342  
   343  // dec_key will invert the key sequence in order to be used for decrypt
   344  dec_key:
   345  	MOVWZ 0(INP), TEMP             // lwz 0, 0(3)
   346  	MOVWZ 4(INP), R6               // lwz 6, 4(3)
   347  	MOVWZ 8(INP), R7               // lwz 7, 8(3)
   348  	MOVWZ 12(INP), R8              // lwz 8, 12(3)
   349  	ADD $16, INP, INP              // addi 3,3,16
   350  	MOVWZ 0(OUT), R9               // lwz 9, 0(5)
   351  	MOVWZ 4(OUT), R10              // lwz 10,4(5)
   352  	MOVWZ 8(OUT), R11              // lwz 11,8(5)
   353  	MOVWZ 12(OUT), R12             // lwz 12,12(5)
   354  	MOVW TEMP, 0(OUT)              // stw 0, 0(5)
   355  	MOVW R6, 4(OUT)                // stw 6, 4(5)
   356  	MOVW R7, 8(OUT)                // stw 7, 8(5)
   357  	MOVW R8, 12(OUT)               // stw 8, 12(5)
   358  	SUB $16, OUT, OUT              // subi 5,5,16
   359  	MOVW R9, -16(INP)              // stw 9, -16(3)
   360  	MOVW R10, -12(INP)             // stw 10,-12(3)
   361  	MOVW R11, -8(INP)              // stw 11,-8(3)
   362  	MOVW R12, -4(INP)              // stw 12,-4(3)
   363  	BC 0x10, 0, dec_key            // bdnz .Ldeckey
   364  
   365  	XOR R3, R3, R3                 // xor 3,3,3      Clean R3
   366  
   367  dec_key_abort:
   368  	MOVD R3, ret+24(FP)            // Put return value into the FP
   369  	RET                            // blr
   370  
   371  
   372  // func encryptBlockAsm(dst, src *byte, enc *uint32)
   373  TEXT ·encryptBlockAsm(SB),NOSPLIT|NOFRAME,$0
   374  	// Load the arguments inside the registers
   375  	MOVD dst+0(FP), BLK_OUT
   376  	MOVD src+8(FP), BLK_INP
   377  	MOVD enc+16(FP), BLK_KEY
   378  
   379  	MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
   380  	MOVD $15, BLK_IDX              // li 7,15
   381  
   382  	LVX (BLK_INP)(R0), ZERO        // lvx 0,0,3
   383  	NEG BLK_OUT, R11               // neg 11,4
   384  	LVX (BLK_INP)(BLK_IDX), IN0    // lvx 1,7,3
   385  	LVSL (BLK_INP)(R0), IN1        // lvsl 2,0,3
   386  	VSPLTISB $0x0f, RCON           // vspltisb 4,0x0f
   387  	LVSR (R11)(R0), KEY            // lvsr 3,0,11
   388  	VXOR IN1, RCON, IN1            // vxor 2,2,4
   389  	MOVD $16, BLK_IDX              // li 7,16
   390  	VPERM ZERO, IN0, IN1, ZERO     // vperm 0,0,1,2
   391  	LVX (BLK_KEY)(R0), IN0         // lvx 1,0,5
   392  	LVSR (BLK_KEY)(R0), MASK       // lvsr 5,0,5
   393  	SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
   394  	LVX (BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
   395  	ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   396  	SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
   397  	VPERM IN1, IN0, MASK, IN0      // vperm 1,2,1,5
   398  
   399  	VXOR ZERO, IN0, ZERO           // vxor 0,0,1
   400  	LVX (BLK_KEY)(BLK_IDX), IN0    // lvx 1,7,5
   401  	ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   402  	MOVD BLK_ROUNDS, CTR           // mtctr 6
   403  
   404  loop_enc:
   405  	VPERM IN0, IN1, MASK, IN1      // vperm 2,1,2,5
   406  	VCIPHER ZERO, IN1, ZERO        // vcipher 0,0,2
   407  	LVX (BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
   408  	ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   409  	VPERM IN1, IN0, MASK, IN0      // vperm 1,2,1,5
   410  	VCIPHER ZERO, IN0, ZERO        // vcipher 0,0,1
   411  	LVX (BLK_KEY)(BLK_IDX), IN0    // lvx 1,7,5
   412  	ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   413  	BC 0x10, 0, loop_enc           // bdnz .Loop_enc
   414  
   415  	VPERM IN0, IN1, MASK, IN1      // vperm 2,1,2,5
   416  	VCIPHER ZERO, IN1, ZERO        // vcipher 0,0,2
   417  	LVX (BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
   418  	VPERM IN1, IN0, MASK, IN0      // vperm 1,2,1,5
   419  	VCIPHERLAST ZERO, IN0, ZERO    // vcipherlast 0,0,1
   420  
   421  	VSPLTISB $-1, IN1              // vspltisb 2,-1
   422  	VXOR IN0, IN0, IN0             // vxor 1,1,1
   423  	MOVD $15, BLK_IDX              // li 7,15
   424  	VPERM IN1, IN0, KEY, IN1       // vperm 2,2,1,3
   425  	VXOR KEY, RCON, KEY            // vxor 3,3,4
   426  	LVX (BLK_OUT)(R0), IN0         // lvx 1,0,4
   427  	VPERM ZERO, ZERO, KEY, ZERO    // vperm 0,0,0,3
   428  	VSEL IN0, ZERO, IN1, IN0       // vsel 1,1,0,2
   429  	LVX (BLK_OUT)(BLK_IDX), RCON   // lvx 4,7,4
   430  	STVX IN0, (BLK_OUT+R0)         // stvx 1,0,4
   431  	VSEL ZERO, RCON, IN1, ZERO     // vsel 0,0,4,2
   432  	STVX ZERO, (BLK_OUT+BLK_IDX)   // stvx 0,7,4
   433  
   434  	RET                            // blr
   435  
   436  
   437  // func decryptBlockAsm(dst, src *byte, dec *uint32)
   438  TEXT ·decryptBlockAsm(SB),NOSPLIT|NOFRAME,$0
   439  	// Load the arguments inside the registers
   440  	MOVD dst+0(FP), BLK_OUT
   441  	MOVD src+8(FP), BLK_INP
   442  	MOVD dec+16(FP), BLK_KEY
   443  
   444  	MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
   445  	MOVD $15, BLK_IDX              // li 7,15
   446  
   447  	LVX (BLK_INP)(R0), ZERO        // lvx 0,0,3
   448  	NEG BLK_OUT, R11               // neg 11,4
   449  	LVX (BLK_INP)(BLK_IDX), IN0    // lvx 1,7,3
   450  	LVSL (BLK_INP)(R0), IN1        // lvsl 2,0,3
   451  	VSPLTISB $0x0f, RCON           // vspltisb 4,0x0f
   452  	LVSR (R11)(R0), KEY            // lvsr 3,0,11
   453  	VXOR IN1, RCON, IN1            // vxor 2,2,4
   454  	MOVD $16, BLK_IDX              // li 7,16
   455  	VPERM ZERO, IN0, IN1, ZERO     // vperm 0,0,1,2
   456  	LVX (BLK_KEY)(R0), IN0         // lvx 1,0,5
   457  	LVSR (BLK_KEY)(R0), MASK       // lvsr 5,0,5
   458  	SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
   459  	LVX (BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
   460  	ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   461  	SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
   462  	VPERM IN1, IN0, MASK, IN0      // vperm 1,2,1,5
   463  
   464  	VXOR ZERO, IN0, ZERO           // vxor 0,0,1
   465  	LVX (BLK_KEY)(BLK_IDX), IN0    // lvx 1,7,5
   466  	ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   467  	MOVD BLK_ROUNDS, CTR           // mtctr 6
   468  
   469  loop_dec:
   470  	VPERM IN0, IN1, MASK, IN1      // vperm 2,1,2,5
   471  	VNCIPHER ZERO, IN1, ZERO       // vncipher 0,0,2
   472  	LVX (BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
   473  	ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   474  	VPERM IN1, IN0, MASK, IN0      // vperm 1,2,1,5
   475  	VNCIPHER ZERO, IN0, ZERO       // vncipher 0,0,1
   476  	LVX (BLK_KEY)(BLK_IDX), IN0    // lvx 1,7,5
   477  	ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   478  	BC 0x10, 0, loop_dec           // bdnz .Loop_dec
   479  
   480  	VPERM IN0, IN1, MASK, IN1      // vperm 2,1,2,5
   481  	VNCIPHER ZERO, IN1, ZERO       // vncipher 0,0,2
   482  	LVX (BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
   483  	VPERM IN1, IN0, MASK, IN0      // vperm 1,2,1,5
   484  	VNCIPHERLAST ZERO, IN0, ZERO   // vncipherlast 0,0,1
   485  
   486  	VSPLTISB $-1, IN1              // vspltisb 2,-1
   487  	VXOR IN0, IN0, IN0             // vxor 1,1,1
   488  	MOVD $15, BLK_IDX              // li 7,15
   489  	VPERM IN1, IN0, KEY, IN1       // vperm 2,2,1,3
   490  	VXOR KEY, RCON, KEY            // vxor 3,3,4
   491  	LVX (BLK_OUT)(R0), IN0         // lvx 1,0,4
   492  	VPERM ZERO, ZERO, KEY, ZERO    // vperm 0,0,0,3
   493  	VSEL IN0, ZERO, IN1, IN0       // vsel 1,1,0,2
   494  	LVX (BLK_OUT)(BLK_IDX), RCON   // lvx 4,7,4
   495  	STVX IN0, (BLK_OUT+R0)         // stvx 1,0,4
   496  	VSEL ZERO, RCON, IN1, ZERO     // vsel 0,0,4,2
   497  	STVX ZERO, (BLK_OUT+BLK_IDX)   // stvx 0,7,4
   498  
   499  	RET                            // blr