github.com/benz9527/xboot@v0.0.0-20240504061247-c23f15593274/lib/kv/simd/asm.go (about) 1 package main 2 3 import ( 4 . "github.com/mmcloughlin/avo/build" 5 . "github.com/mmcloughlin/avo/operand" 6 ) 7 8 /* 9 SIMD 10 1. Load group (A), uint8 * 16 11 --------------------------------------------- ------------ 12 | 01010111 | 11111111 | 00110110 | 11111111 | ... | 11111111 | 13 --------------------------------------------- ------------ 14 15 2. Set comparable 0b110110 (B), uint8 * 16 16 --------------------------------------------- ------------ 17 | 00110110 | 00110110 | 00110110 | 00110110 | ... | 00110110 | 18 --------------------------------------------- ------------ 19 20 3. Compare A and B, uint8 * 16 21 --------------------------------------------- ------------ 22 | 00000000 | 00000000 | 11111111 | 00000000 | ... | 00000000 | 23 --------------------------------------------- ------------ 24 (success!) 25 26 4. Mask values 27 --------------------------------------------- ------------ 28 | 0 | 0 | 1 | 0 | ... | 0 | 29 --------------------------------------------- ------------ 30 (true) 31 */ 32 33 func main() { 34 ConstraintExpr("amd64") 35 ConstraintExpr("!nosimd") 36 37 TEXT("Fast16WayHashMatch", NOSPLIT, "func(md *[16]int8, hash int8) uint16") 38 Doc("Fast16WayHashMatch performs a 16-way linear probing of short hash (h2, metadata) list by SSE instructions", 39 "short hash list must be an aligned pointer") 40 41 // The AX store the md pointer address. 42 Comment("Move the pointer of md to register AX") 43 mem := Mem{Base: Load(Param("md"), GP64())} 44 45 // Assume that hash is 0b01100110. 46 // After extended into 32 bits, it becomes 0x00 00 00 66 47 Comment("Move the hash value (int8) from mem to register CX then extend the size to int32") 48 h := Load(Param("hash"), GP32()) 49 mask := GP32() 50 51 // XMM 128 bits register 52 x0, x1 := XMM(), XMM() 53 54 // After movd instruction, X0/128bits becomes 0x00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 66. 55 // windows storage order is little endian order. 56 // 0x1234 => low address: 0x32; high address: 0x12. 57 // So the asm read index 0 byte from low address. 58 // In X0, we load the index 0 byte will be 0x66. 59 Comment("Copy hash value from register CX to XMM register X0") 60 Comment("XMM registers are used by SSE or AVX instructions") 61 MOVD(h, x0) 62 63 // Parallel XOR 64 Comment("Clear the XMM register X1") 65 PXOR(x1, x1) 66 67 // https://www.felixcloutier.com/x86/pshufb 68 // PSHUFB xmm1, xmm2/m128 69 // Shuffle bytes in xmm1 according to contents of xmm2/m128. 70 // xmm2 is the shuffle control mask. 71 // If the most significant bit (bit[7]) of each byte of the shuffle 72 // control mask (xmm2) is set, then constant zero is written in the 73 // result byte. 74 // 75 // for i = 0 to 15 { 76 // if (SRC2[(i * 8)+7] = 1) then 77 // DEST[(i*8)+7..(i*8)+0] := 0; 78 // else 79 // index[3..0] := SRC2[(i*8)+3 .. (i*8)+0]; 80 // DEST[(i*8)+7..(i*8)+0] := SRC1[(index*8+7)..(index*8+0)]; 81 // endif 82 // } 83 // DEST[MAXVL-1:128] := 0 84 // 85 // But in go plan9 asm, the xmm2 is the destination register. 86 // x1 all bits are zero, like 0x00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00. 87 // After the pshufb, the x0 will be 0x66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66. 88 Comment("Packed Shuffle Bytes instruction, let hash value in register X0 xor with X1 by byte to generate mask to X0") 89 PSHUFB(x1, x0) 90 91 // Plan9 MOVOU (move vector of oct-words unaligned) 92 // An SSE instruction to load 128-bit data from memory to register. 93 // The mem is AX actually. 94 Comment("Load the metadata from memory to register X1") 95 Comment("(AX) means de-reference of address value in AX") 96 MOVOU(mem, x1) 97 98 Comment("Packed Compare for Equal Byte instruction, compare X1 and X0 by byte then store into X0") 99 Comment("The same byte are 0xFF. Otherwise, they are 0x00") 100 PCMPEQB(x1, x0) 101 102 Comment("Packed Move with Mask Signed Byte, Extract X0 hi part and convert into int16 then store into AX") 103 Comment("The X0 lo part is unused usually") 104 Comment("Now the each bit of AX mapping to the each hash of metadata array whether equals to target") 105 PMOVMSKB(x0, mask) 106 107 Comment("Copy the AX value to mem then return") 108 Store(mask.As16(), ReturnIndex(0)) 109 RET() 110 111 Generate() 112 }