github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/sparse/gather_amd64.go (about) 1 //go:build !purego 2 3 package sparse 4 5 import "golang.org/x/sys/cpu" 6 7 func gatherBits(dst []byte, src Uint8Array) int { 8 n := min(len(dst)*8, src.Len()) 9 i := 0 10 11 if n >= 8 { 12 i = (n / 8) * 8 13 // Make sure `offset` is at least 4 bytes, otherwise VPGATHERDD may read 14 // data beyond the end of the program memory and trigger a fault. 15 // 16 // If the boolean values do not have enough padding we must fallback to 17 // the scalar algorithm to be able to load single bytes from memory. 18 if src.off >= 4 && cpu.X86.HasAVX2 { 19 gatherBitsAVX2(dst, src.Slice(0, i)) 20 } else { 21 gatherBitsDefault(dst, src.Slice(0, i)) 22 } 23 } 24 25 for i < n { 26 x := i / 8 27 y := i % 8 28 b := src.Index(i) 29 dst[x] = ((b & 1) << y) | (dst[x] & ^(1 << y)) 30 i++ 31 } 32 33 return n 34 } 35 36 func gather32(dst []uint32, src Uint32Array) int { 37 n := min(len(dst), src.Len()) 38 i := 0 39 40 if n >= 16 && cpu.X86.HasAVX2 { 41 i = (n / 8) * 8 42 gather32AVX2(dst[:i:i], src) 43 } 44 45 for i < n { 46 dst[i] = src.Index(i) 47 i++ 48 } 49 50 return n 51 } 52 53 func gather64(dst []uint64, src Uint64Array) int { 54 n := min(len(dst), src.Len()) 55 i := 0 56 57 if n >= 8 && cpu.X86.HasAVX2 { 58 i = (n / 4) * 4 59 gather64AVX2(dst[:i:i], src) 60 } 61 62 for i < n { 63 dst[i] = src.Index(i) 64 i++ 65 } 66 67 return n 68 } 69 70 //go:noescape 71 func gatherBitsAVX2(dst []byte, src Uint8Array) 72 73 //go:noescape 74 func gatherBitsDefault(dst []byte, src Uint8Array) 75 76 //go:noescape 77 func gather32AVX2(dst []uint32, src Uint32Array) 78 79 //go:noescape 80 func gather64AVX2(dst []uint64, src Uint64Array) 81 82 //go:noescape 83 func gather128(dst [][16]byte, src Uint128Array) int