github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/sparse/gather_amd64.go (about) 1 //go:build !purego 2 3 package sparse 4 5 import ( 6 "golang.org/x/sys/cpu" 7 ) 8 9 func gatherBits(dst []byte, src Uint8Array) int { 10 n := min(len(dst)*8, src.Len()) 11 i := 0 12 13 if n >= 8 { 14 i = (n / 8) * 8 15 // Make sure `offset` is at least 4 bytes, otherwise VPGATHERDD may read 16 // data beyond the end of the program memory and trigger a fault. 17 // 18 // If the boolean values do not have enough padding we must fallback to 19 // the scalar algorithm to be able to load single bytes from memory. 20 if src.off >= 4 && cpu.X86.HasAVX2 { 21 gatherBitsAVX2(dst, src.Slice(0, i)) 22 } else { 23 gatherBitsDefault(dst, src.Slice(0, i)) 24 } 25 } 26 27 for i < n { 28 x := i / 8 29 y := i % 8 30 b := src.Index(i) 31 dst[x] = ((b & 1) << y) | (dst[x] & ^(1 << y)) 32 i++ 33 } 34 35 return n 36 } 37 38 func gather32(dst []uint32, src Uint32Array) int { 39 n := min(len(dst), src.Len()) 40 i := 0 41 42 if n >= 16 && cpu.X86.HasAVX2 { 43 i = (n / 8) * 8 44 gather32AVX2(dst[:i:i], src) 45 } 46 47 for i < n { 48 dst[i] = src.Index(i) 49 i++ 50 } 51 52 return n 53 } 54 55 func gather64(dst []uint64, src Uint64Array) int { 56 n := min(len(dst), src.Len()) 57 i := 0 58 59 if n >= 8 && cpu.X86.HasAVX2 { 60 i = (n / 4) * 4 61 gather64AVX2(dst[:i:i], src) 62 } 63 64 for i < n { 65 dst[i] = src.Index(i) 66 i++ 67 } 68 69 return n 70 } 71 72 //go:noescape 73 func gatherBitsAVX2(dst []byte, src Uint8Array) 74 75 //go:noescape 76 func gatherBitsDefault(dst []byte, src Uint8Array) 77 78 //go:noescape 79 func gather32AVX2(dst []uint32, src Uint32Array) 80 81 //go:noescape 82 func gather64AVX2(dst []uint64, src Uint64Array) 83 84 //go:noescape 85 func gather128(dst [][16]byte, src Uint128Array) int