github.com/GitbookIO/syncgroup@v0.0.0-20200915204659-4f0b2961ab10/quickhash/aeshash/aeshash_amd64.s (about) 1 // Copyright © 2014 Lawrence E. Bakst. All rights reserved. 2 // Copyright 2009 The Go Authors. All rights reserved. 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 // 6 // Go's hash function used by map on X64 hardware with AESNI 7 // liberated from go runtime/asm_amd64.s 8 9 #include "textflag.h" 10 #include "funcdata.h" 11 12 // func Hash(b []byte, seed uint64) uint64 13 TEXT ·Hash(SB),NOSPLIT,$0-40 14 MOVQ b_base+0(FP), AX // ptr to bytes 15 MOVQ b_len+8(FP), CX // length of slice 16 MOVQ seed+24(FP), X0 // seed to low 64 bits of xmm0 17 CALL ·aeshashbody(SB) 18 MOVQ X0, ret+32(FP) 19 RET 20 21 // func HashStr(s string, seed uint64) uint64 22 TEXT ·HashStr(SB),NOSPLIT,$0-32 23 MOVQ s_base+0(FP), AX // ptr to string data 24 MOVQ s_len+8(FP), CX // length of string 25 MOVQ seed+16(FP), X0 // seed to low 64 bits of xmm0 26 CALL ·aeshashbody(SB) 27 MOVQ X0, ret+24(FP) 28 RET 29 30 // AX: data 31 // CX: length 32 // X0: seed 33 // func aeshashbody() 34 TEXT ·aeshashbody(SB),NOSPLIT,$0-0 35 PINSRQ $1, CX, X0 // size to high 64 bits of xmm0 36 MOVO ·aeskeysched+0(SB), X2 37 MOVO ·aeskeysched+16(SB), X3 38 CMPQ CX, $16 39 JB aessmall 40 aesloop: 41 CMPQ CX, $16 42 JBE aesloopend 43 MOVOU (AX), X1 44 AESENC X2, X0 45 AESENC X1, X0 46 SUBQ $16, CX 47 ADDQ $16, AX 48 JMP aesloop 49 // 1-16 bytes remaining 50 aesloopend: 51 // This load may overlap with the previous load above. 52 // We'll hash some bytes twice, but that's ok. 53 MOVOU -16(AX)(CX*1), X1 54 JMP partial 55 // 0-15 bytes 56 aessmall: 57 TESTQ CX, CX 58 JE finalize // 0 bytes 59 60 CMPB AX, $0xf0 61 JA highpartial 62 63 // 16 bytes loaded at this address won't cross 64 // a page boundary, so we can load it directly. 65 MOVOU (AX), X1 66 ADDQ CX, CX 67 MOVQ $masks<>(SB), BP 68 PAND (BP)(CX*8), X1 69 JMP partial 70 highpartial: 71 // address ends in 1111xxxx. Might be up against 72 // a page boundary, so load ending at last byte. 73 // Then shift bytes down using pshufb. 74 MOVOU -16(AX)(CX*1), X1 75 ADDQ CX, CX 76 MOVQ $shifts<>(SB), BP 77 PSHUFB (BP)(CX*8), X1 78 partial: 79 // incorporate partial block into hash 80 AESENC X3, X0 81 AESENC X1, X0 82 finalize: 83 // finalize hash 84 AESENC X2, X0 85 AESENC X3, X0 86 AESENC X2, X0 87 aesret: 88 RET 89 90 91 // put the seed s into the low 64 bits of xmm0 92 // put the data v into the high 64 bits of xmm0 93 // perform 3 AES rounds with 2 alternating round keys 94 // func Hash64(k uint64, seed uint64) uint64 95 TEXT ·Hash64(SB),NOSPLIT,$0-24 96 MOVQ seed+8(FP), X0 // seed 97 MOVQ k+0(FP), AX // data 98 PINSRQ $1, AX, X0 // 64 bit data key to high order 64 bits of X0 99 AESENC ·aeskeysched+0(SB), X0 100 AESENC ·aeskeysched+16(SB), X0 101 AESENC ·aeskeysched+0(SB), X0 102 MOVQ X0, ret+16(FP) 103 RET 104 105 // func Hash32(k uint32, seed uint64) uint64 106 TEXT ·Hash32(SB),NOSPLIT,$0-24 107 MOVQ seed+8(FP), X0 // seed 108 MOVQ k+0(FP), AX // 32 bit data key 109 PINSRD $2, AX, X0 // data to the low order 32 bits of the high order 64 bits 110 PINSRD $3, AX, X0 // data to the high order 32 bits of the high order 64 bits 111 AESENC ·aeskeysched+0(SB), X0 112 AESENC ·aeskeysched+16(SB), X0 113 AESENC ·aeskeysched+0(SB), X0 114 MOVQ X0, ret+16(FP) 115 RET 116 117 118 // simple mask to get rid of data in the high part of the register. 119 // var masks [32]uint64 120 DATA masks<>+0x00(SB)/8, $0x0000000000000000 121 DATA masks<>+0x08(SB)/8, $0x0000000000000000 122 DATA masks<>+0x10(SB)/8, $0x00000000000000ff 123 DATA masks<>+0x18(SB)/8, $0x0000000000000000 124 DATA masks<>+0x20(SB)/8, $0x000000000000ffff 125 DATA masks<>+0x28(SB)/8, $0x0000000000000000 126 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff 127 DATA masks<>+0x38(SB)/8, $0x0000000000000000 128 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff 129 DATA masks<>+0x48(SB)/8, $0x0000000000000000 130 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff 131 DATA masks<>+0x58(SB)/8, $0x0000000000000000 132 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff 133 DATA masks<>+0x68(SB)/8, $0x0000000000000000 134 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff 135 DATA masks<>+0x78(SB)/8, $0x0000000000000000 136 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff 137 DATA masks<>+0x88(SB)/8, $0x0000000000000000 138 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff 139 DATA masks<>+0x98(SB)/8, $0x00000000000000ff 140 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff 141 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff 142 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff 143 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff 144 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff 145 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff 146 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff 147 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff 148 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff 149 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff 150 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff 151 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff 152 GLOBL masks<>(SB), RODATA, $256 153 154 // these are arguments to pshufb. They move data down from 155 // the high bytes of the register to the low bytes of the register. 156 // index is how many bytes to move. 157 // var shifts [32]uint64 158 DATA shifts<>+0x00(SB)/8, $0x0000000000000000 159 DATA shifts<>+0x08(SB)/8, $0x0000000000000000 160 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f 161 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff 162 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e 163 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff 164 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d 165 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff 166 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c 167 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff 168 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b 169 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff 170 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a 171 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff 172 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09 173 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff 174 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908 175 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff 176 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807 177 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f 178 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706 179 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e 180 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605 181 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d 182 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504 183 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c 184 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403 185 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b 186 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302 187 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a 188 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201 189 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09 190 GLOBL shifts<>(SB), RODATA, $256