github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/crypto/aes/asm_amd64.s (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // func hasAsm() bool
     8  // returns whether AES-NI is supported
     9  TEXT ·hasAsm(SB),NOSPLIT,$0
    10  	XORQ AX, AX
    11  	INCL AX
    12  	CPUID
    13  	SHRQ $25, CX
    14  	ANDQ $1, CX
    15  	MOVB CX, ret+0(FP)
    16  	RET
    17  
    18  // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
    19  TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
    20  	MOVQ nr+0(FP), CX
    21  	MOVQ xk+8(FP), AX
    22  	MOVQ dst+16(FP), DX
    23  	MOVQ src+24(FP), BX
    24  	MOVUPS 0(AX), X1
    25  	MOVUPS 0(BX), X0
    26  	ADDQ $16, AX
    27  	PXOR X1, X0
    28  	SUBQ $12, CX
    29  	JE Lenc196
    30  	JB Lenc128
    31  Lenc256:
    32  	MOVUPS 0(AX), X1
    33  	AESENC X1, X0
    34  	MOVUPS 16(AX), X1
    35  	AESENC X1, X0
    36  	ADDQ $32, AX
    37  Lenc196:
    38  	MOVUPS 0(AX), X1
    39  	AESENC X1, X0
    40  	MOVUPS 16(AX), X1
    41  	AESENC X1, X0
    42  	ADDQ $32, AX
    43  Lenc128:
    44  	MOVUPS 0(AX), X1
    45  	AESENC X1, X0
    46  	MOVUPS 16(AX), X1
    47  	AESENC X1, X0
    48  	MOVUPS 32(AX), X1
    49  	AESENC X1, X0
    50  	MOVUPS 48(AX), X1
    51  	AESENC X1, X0
    52  	MOVUPS 64(AX), X1
    53  	AESENC X1, X0
    54  	MOVUPS 80(AX), X1
    55  	AESENC X1, X0
    56  	MOVUPS 96(AX), X1
    57  	AESENC X1, X0
    58  	MOVUPS 112(AX), X1
    59  	AESENC X1, X0
    60  	MOVUPS 128(AX), X1
    61  	AESENC X1, X0
    62  	MOVUPS 144(AX), X1
    63  	AESENCLAST X1, X0
    64  	MOVUPS X0, 0(DX)
    65  	RET
    66  
    67  // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
    68  TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
    69  	MOVQ nr+0(FP), CX
    70  	MOVQ xk+8(FP), AX
    71  	MOVQ dst+16(FP), DX
    72  	MOVQ src+24(FP), BX
    73  	MOVUPS 0(AX), X1
    74  	MOVUPS 0(BX), X0
    75  	ADDQ $16, AX
    76  	PXOR X1, X0
    77  	SUBQ $12, CX
    78  	JE Ldec196
    79  	JB Ldec128
    80  Ldec256:
    81  	MOVUPS 0(AX), X1
    82  	AESDEC X1, X0
    83  	MOVUPS 16(AX), X1
    84  	AESDEC X1, X0
    85  	ADDQ $32, AX
    86  Ldec196:
    87  	MOVUPS 0(AX), X1
    88  	AESDEC X1, X0
    89  	MOVUPS 16(AX), X1
    90  	AESDEC X1, X0
    91  	ADDQ $32, AX
    92  Ldec128:
    93  	MOVUPS 0(AX), X1
    94  	AESDEC X1, X0
    95  	MOVUPS 16(AX), X1
    96  	AESDEC X1, X0
    97  	MOVUPS 32(AX), X1
    98  	AESDEC X1, X0
    99  	MOVUPS 48(AX), X1
   100  	AESDEC X1, X0
   101  	MOVUPS 64(AX), X1
   102  	AESDEC X1, X0
   103  	MOVUPS 80(AX), X1
   104  	AESDEC X1, X0
   105  	MOVUPS 96(AX), X1
   106  	AESDEC X1, X0
   107  	MOVUPS 112(AX), X1
   108  	AESDEC X1, X0
   109  	MOVUPS 128(AX), X1
   110  	AESDEC X1, X0
   111  	MOVUPS 144(AX), X1
   112  	AESDECLAST X1, X0
   113  	MOVUPS X0, 0(DX)
   114  	RET
   115  
   116  // func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
   117  // Note that round keys are stored in uint128 format, not uint32
   118  TEXT ·expandKeyAsm(SB),NOSPLIT,$0
   119  	MOVQ nr+0(FP), CX
   120  	MOVQ key+8(FP), AX
   121  	MOVQ enc+16(FP), BX
   122  	MOVQ dec+24(FP), DX
   123  	MOVUPS (AX), X0
   124  	// enc
   125  	MOVUPS X0, (BX)
   126  	ADDQ $16, BX
   127  	PXOR X4, X4 // _expand_key_* expect X4 to be zero
   128  	CMPL CX, $12
   129  	JE Lexp_enc196
   130  	JB Lexp_enc128
   131  Lexp_enc256:
   132  	MOVUPS 16(AX), X2
   133  	MOVUPS X2, (BX)
   134  	ADDQ $16, BX
   135  	AESKEYGENASSIST $0x01, X2, X1
   136  	CALL _expand_key_256a<>(SB)
   137  	AESKEYGENASSIST $0x01, X0, X1
   138  	CALL _expand_key_256b<>(SB)
   139  	AESKEYGENASSIST $0x02, X2, X1
   140  	CALL _expand_key_256a<>(SB)
   141  	AESKEYGENASSIST $0x02, X0, X1
   142  	CALL _expand_key_256b<>(SB)
   143  	AESKEYGENASSIST $0x04, X2, X1
   144  	CALL _expand_key_256a<>(SB)
   145  	AESKEYGENASSIST $0x04, X0, X1
   146  	CALL _expand_key_256b<>(SB)
   147  	AESKEYGENASSIST $0x08, X2, X1
   148  	CALL _expand_key_256a<>(SB)
   149  	AESKEYGENASSIST $0x08, X0, X1
   150  	CALL _expand_key_256b<>(SB)
   151  	AESKEYGENASSIST $0x10, X2, X1
   152  	CALL _expand_key_256a<>(SB)
   153  	AESKEYGENASSIST $0x10, X0, X1
   154  	CALL _expand_key_256b<>(SB)
   155  	AESKEYGENASSIST $0x20, X2, X1
   156  	CALL _expand_key_256a<>(SB)
   157  	AESKEYGENASSIST $0x20, X0, X1
   158  	CALL _expand_key_256b<>(SB)
   159  	AESKEYGENASSIST $0x40, X2, X1
   160  	CALL _expand_key_256a<>(SB)
   161  	JMP Lexp_dec
   162  Lexp_enc196:
   163  	MOVQ 16(AX), X2
   164  	AESKEYGENASSIST $0x01, X2, X1
   165  	CALL _expand_key_192a<>(SB)
   166  	AESKEYGENASSIST $0x02, X2, X1
   167  	CALL _expand_key_192b<>(SB)
   168  	AESKEYGENASSIST $0x04, X2, X1
   169  	CALL _expand_key_192a<>(SB)
   170  	AESKEYGENASSIST $0x08, X2, X1
   171  	CALL _expand_key_192b<>(SB)
   172  	AESKEYGENASSIST $0x10, X2, X1
   173  	CALL _expand_key_192a<>(SB)
   174  	AESKEYGENASSIST $0x20, X2, X1
   175  	CALL _expand_key_192b<>(SB)
   176  	AESKEYGENASSIST $0x40, X2, X1
   177  	CALL _expand_key_192a<>(SB)
   178  	AESKEYGENASSIST $0x80, X2, X1
   179  	CALL _expand_key_192b<>(SB)
   180  	JMP Lexp_dec
   181  Lexp_enc128:
   182  	AESKEYGENASSIST $0x01, X0, X1
   183  	CALL _expand_key_128<>(SB)
   184  	AESKEYGENASSIST $0x02, X0, X1
   185  	CALL _expand_key_128<>(SB)
   186  	AESKEYGENASSIST $0x04, X0, X1
   187  	CALL _expand_key_128<>(SB)
   188  	AESKEYGENASSIST $0x08, X0, X1
   189  	CALL _expand_key_128<>(SB)
   190  	AESKEYGENASSIST $0x10, X0, X1
   191  	CALL _expand_key_128<>(SB)
   192  	AESKEYGENASSIST $0x20, X0, X1
   193  	CALL _expand_key_128<>(SB)
   194  	AESKEYGENASSIST $0x40, X0, X1
   195  	CALL _expand_key_128<>(SB)
   196  	AESKEYGENASSIST $0x80, X0, X1
   197  	CALL _expand_key_128<>(SB)
   198  	AESKEYGENASSIST $0x1b, X0, X1
   199  	CALL _expand_key_128<>(SB)
   200  	AESKEYGENASSIST $0x36, X0, X1
   201  	CALL _expand_key_128<>(SB)
   202  Lexp_dec:
   203  	// dec
   204  	SUBQ $16, BX
   205  	MOVUPS (BX), X1
   206  	MOVUPS X1, (DX)
   207  	DECQ CX
   208  Lexp_dec_loop:
   209  	MOVUPS -16(BX), X1
   210  	AESIMC X1, X0
   211  	MOVUPS X0, 16(DX)
   212  	SUBQ $16, BX
   213  	ADDQ $16, DX
   214  	DECQ CX
   215  	JNZ Lexp_dec_loop
   216  	MOVUPS -16(BX), X0
   217  	MOVUPS X0, 16(DX)
   218  	RET
   219  
   220  #define PSHUFD_X0_X0_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc0
   221  #define PSHUFD_X1_X1_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc9
   222  TEXT _expand_key_128<>(SB),NOSPLIT,$0
   223  	PSHUFD $0xff, X1, X1
   224  	SHUFPS $0x10, X0, X4
   225  	PXOR X4, X0
   226  	SHUFPS $0x8c, X0, X4
   227  	PXOR X4, X0
   228  	PXOR X1, X0
   229  	MOVUPS X0, (BX)
   230  	ADDQ $16, BX
   231  	RET
   232  
   233  #define PSLLDQ_X5_ BYTE $0x66; BYTE $0x0f; BYTE $0x73; BYTE $0xfd
   234  #define PSHUFD_X0_X3_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xd8
   235  TEXT _expand_key_192a<>(SB),NOSPLIT,$0
   236  	PSHUFD $0x55, X1, X1
   237  	SHUFPS $0x10, X0, X4
   238  	PXOR X4, X0
   239  	SHUFPS $0x8c, X0, X4
   240  	PXOR X4, X0
   241  	PXOR X1, X0
   242  
   243  	MOVAPS X2, X5
   244  	MOVAPS X2, X6
   245  	PSLLDQ_X5_; BYTE $0x4
   246  	PSHUFD $0xff, X0, X3
   247  	PXOR X3, X2
   248  	PXOR X5, X2
   249  
   250  	MOVAPS X0, X1
   251  	SHUFPS $0x44, X0, X6
   252  	MOVUPS X6, (BX)
   253  	SHUFPS $0x4e, X2, X1
   254  	MOVUPS X1, 16(BX)
   255  	ADDQ $32, BX
   256  	RET
   257  
   258  TEXT _expand_key_192b<>(SB),NOSPLIT,$0
   259  	PSHUFD $0x55, X1, X1
   260  	SHUFPS $0x10, X0, X4
   261  	PXOR X4, X0
   262  	SHUFPS $0x8c, X0, X4
   263  	PXOR X4, X0
   264  	PXOR X1, X0
   265  
   266  	MOVAPS X2, X5
   267  	PSLLDQ_X5_; BYTE $0x4
   268  	PSHUFD $0xff, X0, X3
   269  	PXOR X3, X2
   270  	PXOR X5, X2
   271  
   272  	MOVUPS X0, (BX)
   273  	ADDQ $16, BX
   274  	RET
   275  
   276  TEXT _expand_key_256a<>(SB),NOSPLIT,$0
   277  	JMP _expand_key_128<>(SB)
   278  
   279  TEXT _expand_key_256b<>(SB),NOSPLIT,$0
   280  	PSHUFD $0xaa, X1, X1
   281  	SHUFPS $0x10, X2, X4
   282  	PXOR X4, X2
   283  	SHUFPS $0x8c, X2, X4
   284  	PXOR X4, X2
   285  	PXOR X1, X2
   286  
   287  	MOVUPS X2, (BX)
   288  	ADDQ $16, BX
   289  	RET