github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/crypto/aes/asm_amd64.s (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // func hasAsm() bool
     6  // returns whether AES-NI is supported
     7  TEXT ·hasAsm(SB),7,$0
     8  	XORQ AX, AX
     9  	INCL AX
    10  	CPUID
    11  	SHRQ $25, CX
    12  	ANDQ $1, CX
    13  	MOVB CX, ret+0(FP)
    14  	RET
    15  
    16  // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
    17  TEXT ·encryptBlockAsm(SB),7,$0
    18  	MOVQ nr+0(FP), CX
    19  	MOVQ xk+8(FP), AX
    20  	MOVQ dst+16(FP), DX
    21  	MOVQ src+24(FP), BX
    22  	MOVUPS 0(AX), X1
    23  	MOVUPS 0(BX), X0
    24  	ADDQ $16, AX
    25  	PXOR X1, X0
    26  	SUBQ $12, CX
    27  	JE Lenc196
    28  	JB Lenc128
    29  Lenc256:
    30  	MOVUPS 0(AX), X1
    31  	AESENC X1, X0
    32  	MOVUPS 16(AX), X1
    33  	AESENC X1, X0
    34  	ADDQ $32, AX
    35  Lenc196:
    36  	MOVUPS 0(AX), X1
    37  	AESENC X1, X0
    38  	MOVUPS 16(AX), X1
    39  	AESENC X1, X0
    40  	ADDQ $32, AX
    41  Lenc128:
    42  	MOVUPS 0(AX), X1
    43  	AESENC X1, X0
    44  	MOVUPS 16(AX), X1
    45  	AESENC X1, X0
    46  	MOVUPS 32(AX), X1
    47  	AESENC X1, X0
    48  	MOVUPS 48(AX), X1
    49  	AESENC X1, X0
    50  	MOVUPS 64(AX), X1
    51  	AESENC X1, X0
    52  	MOVUPS 80(AX), X1
    53  	AESENC X1, X0
    54  	MOVUPS 96(AX), X1
    55  	AESENC X1, X0
    56  	MOVUPS 112(AX), X1
    57  	AESENC X1, X0
    58  	MOVUPS 128(AX), X1
    59  	AESENC X1, X0
    60  	MOVUPS 144(AX), X1
    61  	AESENCLAST X1, X0
    62  	MOVUPS X0, 0(DX)
    63  	RET
    64  
    65  // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
    66  TEXT ·decryptBlockAsm(SB),7,$0
    67  	MOVQ nr+0(FP), CX
    68  	MOVQ xk+8(FP), AX
    69  	MOVQ dst+16(FP), DX
    70  	MOVQ src+24(FP), BX
    71  	MOVUPS 0(AX), X1
    72  	MOVUPS 0(BX), X0
    73  	ADDQ $16, AX
    74  	PXOR X1, X0
    75  	SUBQ $12, CX
    76  	JE Ldec196
    77  	JB Ldec128
    78  Ldec256:
    79  	MOVUPS 0(AX), X1
    80  	AESDEC X1, X0
    81  	MOVUPS 16(AX), X1
    82  	AESDEC X1, X0
    83  	ADDQ $32, AX
    84  Ldec196:
    85  	MOVUPS 0(AX), X1
    86  	AESDEC X1, X0
    87  	MOVUPS 16(AX), X1
    88  	AESDEC X1, X0
    89  	ADDQ $32, AX
    90  Ldec128:
    91  	MOVUPS 0(AX), X1
    92  	AESDEC X1, X0
    93  	MOVUPS 16(AX), X1
    94  	AESDEC X1, X0
    95  	MOVUPS 32(AX), X1
    96  	AESDEC X1, X0
    97  	MOVUPS 48(AX), X1
    98  	AESDEC X1, X0
    99  	MOVUPS 64(AX), X1
   100  	AESDEC X1, X0
   101  	MOVUPS 80(AX), X1
   102  	AESDEC X1, X0
   103  	MOVUPS 96(AX), X1
   104  	AESDEC X1, X0
   105  	MOVUPS 112(AX), X1
   106  	AESDEC X1, X0
   107  	MOVUPS 128(AX), X1
   108  	AESDEC X1, X0
   109  	MOVUPS 144(AX), X1
   110  	AESDECLAST X1, X0
   111  	MOVUPS X0, 0(DX)
   112  	RET
   113  
   114  // func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
   115  // Note that round keys are stored in uint128 format, not uint32
   116  TEXT ·expandKeyAsm(SB),7,$0
   117  	MOVQ nr+0(FP), CX
   118  	MOVQ key+8(FP), AX
   119  	MOVQ enc+16(FP), BX
   120  	MOVQ dec+24(FP), DX
   121  	MOVUPS (AX), X0
   122  	// enc
   123  	MOVUPS X0, (BX)
   124  	ADDQ $16, BX
   125  	PXOR X4, X4 // _expand_key_* expect X4 to be zero
   126  	CMPL CX, $12
   127  	JE Lexp_enc196
   128  	JB Lexp_enc128
   129  Lexp_enc256:
   130  	MOVUPS 16(AX), X2
   131  	MOVUPS X2, (BX)
   132  	ADDQ $16, BX
   133  	AESKEYGENASSIST $0x01, X2, X1
   134  	CALL _expand_key_256a<>(SB)
   135  	AESKEYGENASSIST $0x01, X0, X1
   136  	CALL _expand_key_256b<>(SB)
   137  	AESKEYGENASSIST $0x02, X2, X1
   138  	CALL _expand_key_256a<>(SB)
   139  	AESKEYGENASSIST $0x02, X0, X1
   140  	CALL _expand_key_256b<>(SB)
   141  	AESKEYGENASSIST $0x04, X2, X1
   142  	CALL _expand_key_256a<>(SB)
   143  	AESKEYGENASSIST $0x04, X0, X1
   144  	CALL _expand_key_256b<>(SB)
   145  	AESKEYGENASSIST $0x08, X2, X1
   146  	CALL _expand_key_256a<>(SB)
   147  	AESKEYGENASSIST $0x08, X0, X1
   148  	CALL _expand_key_256b<>(SB)
   149  	AESKEYGENASSIST $0x10, X2, X1
   150  	CALL _expand_key_256a<>(SB)
   151  	AESKEYGENASSIST $0x10, X0, X1
   152  	CALL _expand_key_256b<>(SB)
   153  	AESKEYGENASSIST $0x20, X2, X1
   154  	CALL _expand_key_256a<>(SB)
   155  	AESKEYGENASSIST $0x20, X0, X1
   156  	CALL _expand_key_256b<>(SB)
   157  	AESKEYGENASSIST $0x40, X2, X1
   158  	CALL _expand_key_256a<>(SB)
   159  	JMP Lexp_dec
   160  Lexp_enc196:
   161  	MOVQ 16(AX), X2
   162  	AESKEYGENASSIST $0x01, X2, X1
   163  	CALL _expand_key_192a<>(SB)
   164  	AESKEYGENASSIST $0x02, X2, X1
   165  	CALL _expand_key_192b<>(SB)
   166  	AESKEYGENASSIST $0x04, X2, X1
   167  	CALL _expand_key_192a<>(SB)
   168  	AESKEYGENASSIST $0x08, X2, X1
   169  	CALL _expand_key_192b<>(SB)
   170  	AESKEYGENASSIST $0x10, X2, X1
   171  	CALL _expand_key_192a<>(SB)
   172  	AESKEYGENASSIST $0x20, X2, X1
   173  	CALL _expand_key_192b<>(SB)
   174  	AESKEYGENASSIST $0x40, X2, X1
   175  	CALL _expand_key_192a<>(SB)
   176  	AESKEYGENASSIST $0x80, X2, X1
   177  	CALL _expand_key_192b<>(SB)
   178  	JMP Lexp_dec
   179  Lexp_enc128:
   180  	AESKEYGENASSIST $0x01, X0, X1
   181  	CALL _expand_key_128<>(SB)
   182  	AESKEYGENASSIST $0x02, X0, X1
   183  	CALL _expand_key_128<>(SB)
   184  	AESKEYGENASSIST $0x04, X0, X1
   185  	CALL _expand_key_128<>(SB)
   186  	AESKEYGENASSIST $0x08, X0, X1
   187  	CALL _expand_key_128<>(SB)
   188  	AESKEYGENASSIST $0x10, X0, X1
   189  	CALL _expand_key_128<>(SB)
   190  	AESKEYGENASSIST $0x20, X0, X1
   191  	CALL _expand_key_128<>(SB)
   192  	AESKEYGENASSIST $0x40, X0, X1
   193  	CALL _expand_key_128<>(SB)
   194  	AESKEYGENASSIST $0x80, X0, X1
   195  	CALL _expand_key_128<>(SB)
   196  	AESKEYGENASSIST $0x1b, X0, X1
   197  	CALL _expand_key_128<>(SB)
   198  	AESKEYGENASSIST $0x36, X0, X1
   199  	CALL _expand_key_128<>(SB)
   200  Lexp_dec:
   201  	// dec
   202  	SUBQ $16, BX
   203  	MOVUPS (BX), X1
   204  	MOVUPS X1, (DX)
   205  	DECQ CX
   206  Lexp_dec_loop:
   207  	MOVUPS -16(BX), X1
   208  	AESIMC X1, X0
   209  	MOVUPS X0, 16(DX)
   210  	SUBQ $16, BX
   211  	ADDQ $16, DX
   212  	DECQ CX
   213  	JNZ Lexp_dec_loop
   214  	MOVUPS -16(BX), X0
   215  	MOVUPS X0, 16(DX)
   216  	RET
   217  
   218  #define PSHUFD_X0_X0_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc0
   219  #define PSHUFD_X1_X1_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc9
   220  TEXT _expand_key_128<>(SB),7,$0
   221  	PSHUFD $0xff, X1, X1
   222  	SHUFPS $0x10, X0, X4
   223  	PXOR X4, X0
   224  	SHUFPS $0x8c, X0, X4
   225  	PXOR X4, X0
   226  	PXOR X1, X0
   227  	MOVUPS X0, (BX)
   228  	ADDQ $16, BX
   229  	RET
   230  
   231  #define PSLLDQ_X5_ BYTE $0x66; BYTE $0x0f; BYTE $0x73; BYTE $0xfd
   232  #define PSHUFD_X0_X3_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xd8
   233  TEXT _expand_key_192a<>(SB),7,$0
   234  	PSHUFD $0x55, X1, X1
   235  	SHUFPS $0x10, X0, X4
   236  	PXOR X4, X0
   237  	SHUFPS $0x8c, X0, X4
   238  	PXOR X4, X0
   239  	PXOR X1, X0
   240  
   241  	MOVAPS X2, X5
   242  	MOVAPS X2, X6
   243  	PSLLDQ_X5_; BYTE $0x4
   244  	PSHUFD $0xff, X0, X3
   245  	PXOR X3, X2
   246  	PXOR X5, X2
   247  
   248  	MOVAPS X0, X1
   249  	SHUFPS $0x44, X0, X6
   250  	MOVUPS X6, (BX)
   251  	SHUFPS $0x4e, X2, X1
   252  	MOVUPS X1, 16(BX)
   253  	ADDQ $32, BX
   254  	RET
   255  
   256  TEXT _expand_key_192b<>(SB),7,$0
   257  	PSHUFD $0x55, X1, X1
   258  	SHUFPS $0x10, X0, X4
   259  	PXOR X4, X0
   260  	SHUFPS $0x8c, X0, X4
   261  	PXOR X4, X0
   262  	PXOR X1, X0
   263  
   264  	MOVAPS X2, X5
   265  	PSLLDQ_X5_; BYTE $0x4
   266  	PSHUFD $0xff, X0, X3
   267  	PXOR X3, X2
   268  	PXOR X5, X2
   269  
   270  	MOVUPS X0, (BX)
   271  	ADDQ $16, BX
   272  	RET
   273  
   274  TEXT _expand_key_256a<>(SB),7,$0
   275  	JMP _expand_key_128<>(SB)
   276  
   277  TEXT _expand_key_256b<>(SB),7,$0
   278  	PSHUFD $0xaa, X1, X1
   279  	SHUFPS $0x10, X2, X4
   280  	PXOR X4, X2
   281  	SHUFPS $0x8c, X2, X4
   282  	PXOR X4, X2
   283  	PXOR X1, X2
   284  
   285  	MOVUPS X2, (BX)
   286  	ADDQ $16, BX
   287  	RET