github.com/GitbookIO/syncgroup@v0.0.0-20200915204659-4f0b2961ab10/quickhash/aeshash/aeshash_amd64.s (about)

     1  // Copyright © 2014 Lawrence E. Bakst. All rights reserved.
     2  // Copyright 2009 The Go Authors. All rights reserved.
     3  // Use of this source code is governed by a BSD-style
     4  // license that can be found in the LICENSE file.
     5  //
     6  // Go's hash function used by map on X64 hardware with AESNI
     7  // liberated from go runtime/asm_amd64.s
     8  
     9  #include "textflag.h"
    10  #include "funcdata.h"
    11  
    12  // func Hash(b []byte, seed uint64) uint64
    13  TEXT ·Hash(SB),NOSPLIT,$0-40
    14  	MOVQ	b_base+0(FP), AX	// ptr to bytes
    15  	MOVQ	b_len+8(FP), CX		// length of slice
    16  	MOVQ	seed+24(FP), X0		// seed to low 64 bits of xmm0
    17  	CALL	·aeshashbody(SB)
    18  	MOVQ	X0, ret+32(FP)
    19  	RET
    20  
    21  // func HashStr(s string, seed uint64) uint64
    22  TEXT ·HashStr(SB),NOSPLIT,$0-32
    23  	MOVQ	s_base+0(FP), AX	// ptr to string data
    24  	MOVQ	s_len+8(FP), CX		// length of string
    25  	MOVQ	seed+16(FP), X0		// seed to low 64 bits of xmm0
    26  	CALL	·aeshashbody(SB)
    27  	MOVQ	X0, ret+24(FP)
    28  	RET
    29  
    30  // AX: data
    31  // CX: length
    32  // X0: seed
    33  // func aeshashbody()
    34  TEXT ·aeshashbody(SB),NOSPLIT,$0-0
    35  	PINSRQ	$1, CX, X0		// size to high 64 bits of xmm0
    36  	MOVO	·aeskeysched+0(SB), X2
    37  	MOVO	·aeskeysched+16(SB), X3
    38  	CMPQ	CX, $16
    39  	JB	aessmall
    40  aesloop:
    41  	CMPQ	CX, $16
    42  	JBE	aesloopend
    43  	MOVOU	(AX), X1
    44  	AESENC	X2, X0
    45  	AESENC	X1, X0
    46  	SUBQ	$16, CX
    47  	ADDQ	$16, AX
    48  	JMP	aesloop
    49  // 1-16 bytes remaining
    50  aesloopend:
    51  	// This load may overlap with the previous load above.
    52  	// We'll hash some bytes twice, but that's ok.
    53  	MOVOU	-16(AX)(CX*1), X1
    54  	JMP	partial
    55  // 0-15 bytes
    56  aessmall:
    57  	TESTQ	CX, CX
    58  	JE	finalize	// 0 bytes
    59  
    60  	CMPB	AX, $0xf0
    61  	JA	highpartial
    62  
    63  	// 16 bytes loaded at this address won't cross
    64  	// a page boundary, so we can load it directly.
    65  	MOVOU	(AX), X1
    66  	ADDQ	CX, CX
    67  	MOVQ	$masks<>(SB), BP
    68  	PAND	(BP)(CX*8), X1
    69  	JMP	partial
    70  highpartial:
    71  	// address ends in 1111xxxx.  Might be up against
    72  	// a page boundary, so load ending at last byte.
    73  	// Then shift bytes down using pshufb.
    74  	MOVOU	-16(AX)(CX*1), X1
    75  	ADDQ	CX, CX
    76  	MOVQ	$shifts<>(SB), BP
    77  	PSHUFB	(BP)(CX*8), X1
    78  partial:
    79  	// incorporate partial block into hash
    80  	AESENC	X3, X0
    81  	AESENC	X1, X0
    82  finalize:
    83  	// finalize hash
    84  	AESENC	X2, X0
    85  	AESENC	X3, X0
    86  	AESENC	X2, X0
    87  aesret:
    88  	RET
    89  
    90  
    91  // put the seed s into the low 64 bits of xmm0
    92  // put the data v into the high 64 bits of xmm0
    93  // perform 3 AES rounds with 2 alternating round keys
    94  // func Hash64(k uint64, seed uint64) uint64
    95  TEXT ·Hash64(SB),NOSPLIT,$0-24
    96  	MOVQ	seed+8(FP), X0	// seed
    97  	MOVQ	k+0(FP), AX		// data
    98  	PINSRQ	$1, AX, X0		// 64 bit data key to high order 64 bits of X0
    99  	AESENC	·aeskeysched+0(SB), X0
   100  	AESENC	·aeskeysched+16(SB), X0
   101  	AESENC	·aeskeysched+0(SB), X0
   102  	MOVQ	X0, ret+16(FP)
   103  	RET
   104  
   105  // func Hash32(k uint32, seed uint64) uint64
   106  TEXT ·Hash32(SB),NOSPLIT,$0-24
   107  	MOVQ	seed+8(FP), X0	// seed
   108  	MOVQ	k+0(FP), AX		// 32 bit data key
   109  	PINSRD	$2, AX, X0		// data to the low order 32 bits of the high order 64 bits
   110  	PINSRD	$3, AX, X0		// data to the high order 32 bits of the high order 64 bits
   111  	AESENC	·aeskeysched+0(SB), X0
   112  	AESENC	·aeskeysched+16(SB), X0
   113  	AESENC	·aeskeysched+0(SB), X0
   114  	MOVQ	X0, ret+16(FP)
   115  	RET
   116  
   117  
   118  // simple mask to get rid of data in the high part of the register.
   119  // var masks [32]uint64
   120  DATA masks<>+0x00(SB)/8, $0x0000000000000000
   121  DATA masks<>+0x08(SB)/8, $0x0000000000000000
   122  DATA masks<>+0x10(SB)/8, $0x00000000000000ff
   123  DATA masks<>+0x18(SB)/8, $0x0000000000000000
   124  DATA masks<>+0x20(SB)/8, $0x000000000000ffff
   125  DATA masks<>+0x28(SB)/8, $0x0000000000000000
   126  DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
   127  DATA masks<>+0x38(SB)/8, $0x0000000000000000
   128  DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
   129  DATA masks<>+0x48(SB)/8, $0x0000000000000000
   130  DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
   131  DATA masks<>+0x58(SB)/8, $0x0000000000000000
   132  DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
   133  DATA masks<>+0x68(SB)/8, $0x0000000000000000
   134  DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
   135  DATA masks<>+0x78(SB)/8, $0x0000000000000000
   136  DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
   137  DATA masks<>+0x88(SB)/8, $0x0000000000000000
   138  DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
   139  DATA masks<>+0x98(SB)/8, $0x00000000000000ff
   140  DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
   141  DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
   142  DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
   143  DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
   144  DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
   145  DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
   146  DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
   147  DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
   148  DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
   149  DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
   150  DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
   151  DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
   152  GLOBL masks<>(SB), RODATA, $256
   153  
   154  // these are arguments to pshufb.  They move data down from
   155  // the high bytes of the register to the low bytes of the register.
   156  // index is how many bytes to move.
   157  // var shifts [32]uint64
   158  DATA shifts<>+0x00(SB)/8, $0x0000000000000000
   159  DATA shifts<>+0x08(SB)/8, $0x0000000000000000
   160  DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
   161  DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
   162  DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
   163  DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
   164  DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
   165  DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
   166  DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
   167  DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
   168  DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
   169  DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
   170  DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
   171  DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
   172  DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
   173  DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
   174  DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
   175  DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
   176  DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
   177  DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
   178  DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
   179  DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
   180  DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
   181  DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
   182  DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
   183  DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
   184  DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
   185  DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
   186  DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
   187  DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
   188  DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
   189  DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
   190  GLOBL shifts<>(SB), RODATA, $256