github.com/grailbio/base@v0.0.11/simd/multibyte_amd64.go (about)

     1  // Copyright 2021 GRAIL, Inc.  All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build amd64 && !appengine
     6  // +build amd64,!appengine
     7  
     8  package simd
     9  
    10  import (
    11  	"reflect"
    12  	"unsafe"
    13  )
    14  
    15  // This file contains functions which operate on slices of 2- or 4-byte
    16  // elements (typically small structs or integers) in ways that differ from the
    17  // corresponding operations on single-byte elements.
    18  // In this context, there is little point in making the interface based on
    19  // []byte, since the caller will need to unsafely cast to it.  Instead, most
    20  // functions take unsafe.Pointer(s) and a count, and have names ending in
    21  // 'Raw'; the caller should write safe wrappers around them when appropriate.
    22  // We provide sample wrappers for the int16 and uint16 cases.  (Originally did
    23  // this for int32/uint32, but turns out the compiler has hardcoded
    24  // optimizations for those cases which are currently missing for {u}int16.)
    25  
    26  // *** the following functions are defined in multibyte_amd64.s
    27  
    28  //go:noescape
    29  func index16SSE2Asm(main unsafe.Pointer, val, nElem int) int
    30  
    31  //go:noescape
    32  func reverse16InplaceSSSE3Asm(main unsafe.Pointer, nElem int)
    33  
    34  //go:noescape
    35  func reverse16SSSE3Asm(dst, src unsafe.Pointer, nElem int)
    36  
    37  // *** end assembly function signature(s)
    38  
    39  // Memset16Raw assumes dst points to an array of nElem 2-byte elements, and
    40  // valPtr points to a single 2-byte element.  It fills dst with copies of
    41  // *valPtr.
    42  func Memset16Raw(dst, valPtr unsafe.Pointer, nElem int) {
    43  	// Strictly speaking, it may be slightly more efficient to pass val directly
    44  	// on the stack as e.g. a uint16, but this interface lets us avoid worrying
    45  	// about little-endian vs. big-endian and leads to cleaner struct-filling
    46  	// code.
    47  	val := *((*uint16)(valPtr))
    48  	if nElem < BytesPerWord/2 {
    49  		for idx := 0; idx != nElem; idx++ {
    50  			*((*uint16)(dst)) = val
    51  			dst = unsafe.Add(dst, 2)
    52  		}
    53  		return
    54  	}
    55  	valWord := uintptr(0x1000100010001) * uintptr(val)
    56  	nWordMinus1 := (nElem - 1) >> (Log2BytesPerWord - 1)
    57  	dstWordsIter := dst
    58  	for widx := 0; widx != nWordMinus1; widx++ {
    59  		*((*uintptr)(dstWordsIter)) = valWord
    60  		dstWordsIter = unsafe.Add(dstWordsIter, BytesPerWord)
    61  	}
    62  	dstWordsIter = unsafe.Add(dst, nElem*2-BytesPerWord)
    63  	*((*uintptr)(dstWordsIter)) = valWord
    64  }
    65  
    66  // Memset32Raw assumes dst points to an array of nElem 4-byte elements, and
    67  // valPtr points to a single 4-byte element.  It fills dst with copies of
    68  // *valPtr.
    69  func Memset32Raw(dst, valPtr unsafe.Pointer, nElem int) {
    70  	val := *((*uint32)(valPtr))
    71  	if nElem < BytesPerWord/4 {
    72  		if nElem != 0 {
    73  			*((*uint32)(dst)) = val
    74  		}
    75  		return
    76  	}
    77  	valWord := uintptr(0x100000001) * uintptr(val)
    78  	nWordMinus1 := (nElem - 1) >> (Log2BytesPerWord - 2)
    79  	dstWordsIter := dst
    80  	for widx := 0; widx != nWordMinus1; widx++ {
    81  		*((*uintptr)(dstWordsIter)) = valWord
    82  		dstWordsIter = unsafe.Add(dstWordsIter, BytesPerWord)
    83  	}
    84  	dstWordsIter = unsafe.Add(dst, nElem*4-BytesPerWord)
    85  	*((*uintptr)(dstWordsIter)) = valWord
    86  }
    87  
    88  // RepeatI16 fills dst[] with the given int16.
    89  func RepeatI16(dst []int16, val int16) {
    90  	dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
    91  	Memset16Raw(unsafe.Pointer(dstHeader.Data), unsafe.Pointer(&val), dstHeader.Len)
    92  }
    93  
    94  // RepeatU16 fills dst[] with the given uint16.
    95  func RepeatU16(dst []uint16, val uint16) {
    96  	dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
    97  	Memset16Raw(unsafe.Pointer(dstHeader.Data), unsafe.Pointer(&val), dstHeader.Len)
    98  }
    99  
   100  // IndexU16 returns the index of the first instance of val in main, or -1 if
   101  // val is not present in main.
   102  func IndexU16(main []uint16, val uint16) int {
   103  	if len(main) < 8 {
   104  		for i, v := range main {
   105  			if v == val {
   106  				return i
   107  			}
   108  		}
   109  		return -1
   110  	}
   111  	mainHeader := (*reflect.SliceHeader)(unsafe.Pointer(&main))
   112  	return index16SSE2Asm(unsafe.Pointer(mainHeader.Data), int(val), mainHeader.Len)
   113  }
   114  
   115  // (Add a function which has the original little-endian byte-slice semantics if
   116  // we ever need it.)
   117  
   118  // Reverse16InplaceRaw assumes main points to an array of ct 2-byte elements,
   119  // and reverses it in-place.
   120  func Reverse16InplaceRaw(main unsafe.Pointer, nElem int) {
   121  	if nElem <= 8 {
   122  		nElemDiv2 := nElem >> 1
   123  		fwdIter := main
   124  		revIter := unsafe.Add(main, (nElem-1)*2)
   125  		for idx := 0; idx != nElemDiv2; idx++ {
   126  			origLeftVal := *((*uint16)(fwdIter))
   127  			*((*uint16)(fwdIter)) = *((*uint16)(revIter))
   128  			*((*uint16)(revIter)) = origLeftVal
   129  			fwdIter = unsafe.Add(fwdIter, 2)
   130  			revIter = unsafe.Add(revIter, -2)
   131  		}
   132  		return
   133  	}
   134  	reverse16InplaceSSSE3Asm(main, nElem)
   135  }
   136  
   137  // Reverse16Raw assumes dst and src both point to arrays of ct 2-byte elements,
   138  // and sets dst[pos] := src[ct - 1 - pos] for each position.
   139  func Reverse16Raw(dst, src unsafe.Pointer, nElem int) {
   140  	if nElem < 8 {
   141  		srcIter := unsafe.Add(src, (nElem-1)*2)
   142  		dstIter := dst
   143  		for idx := 0; idx != nElem; idx++ {
   144  			*((*uint16)(dstIter)) = *((*uint16)(srcIter))
   145  			srcIter = unsafe.Add(srcIter, -2)
   146  			dstIter = unsafe.Add(dstIter, 2)
   147  		}
   148  		return
   149  	}
   150  	reverse16SSSE3Asm(dst, src, nElem)
   151  }
   152  
   153  // ReverseI16Inplace reverses a []int16 in-place.
   154  func ReverseI16Inplace(main []int16) {
   155  	mainHeader := (*reflect.SliceHeader)(unsafe.Pointer(&main))
   156  	Reverse16InplaceRaw(unsafe.Pointer(mainHeader.Data), mainHeader.Len)
   157  }
   158  
   159  // ReverseU16Inplace reverses a []uint16 in-place.
   160  func ReverseU16Inplace(main []uint16) {
   161  	mainHeader := (*reflect.SliceHeader)(unsafe.Pointer(&main))
   162  	Reverse16InplaceRaw(unsafe.Pointer(mainHeader.Data), mainHeader.Len)
   163  }
   164  
   165  // ReverseI16 sets dst[len(src) - 1 - pos] := src[pos] for each position in
   166  // src.  It panics if len(src) != len(dst).
   167  func ReverseI16(dst, src []int16) {
   168  	srcHeader := (*reflect.SliceHeader)(unsafe.Pointer(&src))
   169  	dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
   170  	nElem := srcHeader.Len
   171  	if nElem != dstHeader.Len {
   172  		panic("ReverseI16() requires len(src) == len(dst).")
   173  	}
   174  	Reverse16Raw(unsafe.Pointer(dstHeader.Data), unsafe.Pointer(srcHeader.Data), nElem)
   175  }
   176  
   177  // ReverseU16 sets dst[len(src) - 1 - pos] := src[pos] for each position in
   178  // src.  It panics if len(src) != len(dst).
   179  func ReverseU16(dst, src []uint16) {
   180  	srcHeader := (*reflect.SliceHeader)(unsafe.Pointer(&src))
   181  	dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
   182  	nElem := srcHeader.Len
   183  	if nElem != dstHeader.Len {
   184  		panic("ReverseU16() requires len(src) == len(dst).")
   185  	}
   186  	Reverse16Raw(unsafe.Pointer(dstHeader.Data), unsafe.Pointer(srcHeader.Data), nElem)
   187  }
   188  
   189  // Benchmark results suggest that Reverse32Raw is unimportant.