github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/sparse/gather_amd64.go (about)

     1  //go:build !purego
     2  
     3  package sparse
     4  
     5  import "golang.org/x/sys/cpu"
     6  
     7  func gatherBits(dst []byte, src Uint8Array) int {
     8  	n := min(len(dst)*8, src.Len())
     9  	i := 0
    10  
    11  	if n >= 8 {
    12  		i = (n / 8) * 8
    13  		// Make sure `offset` is at least 4 bytes, otherwise VPGATHERDD may read
    14  		// data beyond the end of the program memory and trigger a fault.
    15  		//
    16  		// If the boolean values do not have enough padding we must fallback to
    17  		// the scalar algorithm to be able to load single bytes from memory.
    18  		if src.off >= 4 && cpu.X86.HasAVX2 {
    19  			gatherBitsAVX2(dst, src.Slice(0, i))
    20  		} else {
    21  			gatherBitsDefault(dst, src.Slice(0, i))
    22  		}
    23  	}
    24  
    25  	for i < n {
    26  		x := i / 8
    27  		y := i % 8
    28  		b := src.Index(i)
    29  		dst[x] = ((b & 1) << y) | (dst[x] & ^(1 << y))
    30  		i++
    31  	}
    32  
    33  	return n
    34  }
    35  
    36  func gather32(dst []uint32, src Uint32Array) int {
    37  	n := min(len(dst), src.Len())
    38  	i := 0
    39  
    40  	if n >= 16 && cpu.X86.HasAVX2 {
    41  		i = (n / 8) * 8
    42  		gather32AVX2(dst[:i:i], src)
    43  	}
    44  
    45  	for i < n {
    46  		dst[i] = src.Index(i)
    47  		i++
    48  	}
    49  
    50  	return n
    51  }
    52  
    53  func gather64(dst []uint64, src Uint64Array) int {
    54  	n := min(len(dst), src.Len())
    55  	i := 0
    56  
    57  	if n >= 8 && cpu.X86.HasAVX2 {
    58  		i = (n / 4) * 4
    59  		gather64AVX2(dst[:i:i], src)
    60  	}
    61  
    62  	for i < n {
    63  		dst[i] = src.Index(i)
    64  		i++
    65  	}
    66  
    67  	return n
    68  }
    69  
    70  //go:noescape
    71  func gatherBitsAVX2(dst []byte, src Uint8Array)
    72  
    73  //go:noescape
    74  func gatherBitsDefault(dst []byte, src Uint8Array)
    75  
    76  //go:noescape
    77  func gather32AVX2(dst []uint32, src Uint32Array)
    78  
    79  //go:noescape
    80  func gather64AVX2(dst []uint64, src Uint64Array)
    81  
    82  //go:noescape
    83  func gather128(dst [][16]byte, src Uint128Array) int