github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/sparse/gather_amd64.go (about)

     1  //go:build !purego
     2  
     3  package sparse
     4  
     5  import (
     6  	"golang.org/x/sys/cpu"
     7  )
     8  
     9  func gatherBits(dst []byte, src Uint8Array) int {
    10  	n := min(len(dst)*8, src.Len())
    11  	i := 0
    12  
    13  	if n >= 8 {
    14  		i = (n / 8) * 8
    15  		// Make sure `offset` is at least 4 bytes, otherwise VPGATHERDD may read
    16  		// data beyond the end of the program memory and trigger a fault.
    17  		//
    18  		// If the boolean values do not have enough padding we must fallback to
    19  		// the scalar algorithm to be able to load single bytes from memory.
    20  		if src.off >= 4 && cpu.X86.HasAVX2 {
    21  			gatherBitsAVX2(dst, src.Slice(0, i))
    22  		} else {
    23  			gatherBitsDefault(dst, src.Slice(0, i))
    24  		}
    25  	}
    26  
    27  	for i < n {
    28  		x := i / 8
    29  		y := i % 8
    30  		b := src.Index(i)
    31  		dst[x] = ((b & 1) << y) | (dst[x] & ^(1 << y))
    32  		i++
    33  	}
    34  
    35  	return n
    36  }
    37  
    38  func gather32(dst []uint32, src Uint32Array) int {
    39  	n := min(len(dst), src.Len())
    40  	i := 0
    41  
    42  	if n >= 16 && cpu.X86.HasAVX2 {
    43  		i = (n / 8) * 8
    44  		gather32AVX2(dst[:i:i], src)
    45  	}
    46  
    47  	for i < n {
    48  		dst[i] = src.Index(i)
    49  		i++
    50  	}
    51  
    52  	return n
    53  }
    54  
    55  func gather64(dst []uint64, src Uint64Array) int {
    56  	n := min(len(dst), src.Len())
    57  	i := 0
    58  
    59  	if n >= 8 && cpu.X86.HasAVX2 {
    60  		i = (n / 4) * 4
    61  		gather64AVX2(dst[:i:i], src)
    62  	}
    63  
    64  	for i < n {
    65  		dst[i] = src.Index(i)
    66  		i++
    67  	}
    68  
    69  	return n
    70  }
    71  
    72  //go:noescape
    73  func gatherBitsAVX2(dst []byte, src Uint8Array)
    74  
    75  //go:noescape
    76  func gatherBitsDefault(dst []byte, src Uint8Array)
    77  
    78  //go:noescape
    79  func gather32AVX2(dst []uint32, src Uint32Array)
    80  
    81  //go:noescape
    82  func gather64AVX2(dst []uint64, src Uint64Array)
    83  
    84  //go:noescape
    85  func gather128(dst [][16]byte, src Uint128Array) int