github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/simd/add_amd64.go (about)

     1  // Copyright 2018 GRAIL, Inc.  All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build amd64,!appengine
     6  
     7  package simd
     8  
     9  import (
    10  	"reflect"
    11  	"unsafe"
    12  )
    13  
    14  // *** the following functions are defined in add_amd64.s
    15  
    16  //go:noescape
    17  func addConst8TinyInplaceSSSE3Asm(main unsafe.Pointer, val int)
    18  
    19  //go:noescape
    20  func addConst8OddInplaceSSSE3Asm(main unsafe.Pointer, val, nByte int)
    21  
    22  //go:noescape
    23  func addConst8SSSE3Asm(dst, src unsafe.Pointer, val, nByte int)
    24  
    25  //go:noescape
    26  func addConst8OddSSSE3Asm(dst, src unsafe.Pointer, val, nByte int)
    27  
    28  //go:noescape
    29  func subtractFromConst8TinyInplaceSSSE3Asm(main unsafe.Pointer, val int)
    30  
    31  //go:noescape
    32  func subtractFromConst8OddInplaceSSSE3Asm(main unsafe.Pointer, val, nByte int)
    33  
    34  //go:noescape
    35  func subtractFromConst8SSSE3Asm(dst, src unsafe.Pointer, val, nByte int)
    36  
    37  //go:noescape
    38  func subtractFromConst8OddSSSE3Asm(dst, src unsafe.Pointer, val, nByte int)
    39  
    40  // *** end assembly function signature(s)
    41  
    42  // AddConst8UnsafeInplace adds the given constant to every byte of main[], with
    43  // unsigned overflow.
    44  //
    45  // WARNING: This is a function designed to be used in inner loops, which
    46  // assumes without checking that capacity is at least RoundUpPow2(len(main),
    47  // bytesPerVec).  It also assumes that the caller does not care if a few bytes
    48  // past the end of main[] are changed.  Use the safe version of this function
    49  // if any of these properties are problematic.
    50  // These assumptions are always satisfied when the last
    51  // potentially-size-increasing operation on main[] is {Re}makeUnsafe(),
    52  // ResizeUnsafe(), or XcapUnsafe().
    53  func AddConst8UnsafeInplace(main []byte, val byte) {
    54  	// Note that the word-based algorithm doesn't work so well here, since we'd
    55  	// need to guard against bytes in the middle overflowing and polluting
    56  	// adjacent bytes.
    57  	mainLen := len(main)
    58  	mainHeader := (*reflect.SliceHeader)(unsafe.Pointer(&main))
    59  	if mainLen <= 16 {
    60  		addConst8TinyInplaceSSSE3Asm(unsafe.Pointer(mainHeader.Data), int(val))
    61  		return
    62  	}
    63  	addConst8OddInplaceSSSE3Asm(unsafe.Pointer(mainHeader.Data), int(val), mainLen)
    64  }
    65  
    66  // AddConst8Inplace adds the given constant to every byte of main[], with
    67  // unsigned overflow.
    68  func AddConst8Inplace(main []byte, val byte) {
    69  	mainLen := len(main)
    70  	if mainLen < 16 {
    71  		for pos, mainByte := range main {
    72  			main[pos] = val + mainByte
    73  		}
    74  		return
    75  	}
    76  	mainHeader := (*reflect.SliceHeader)(unsafe.Pointer(&main))
    77  	addConst8OddInplaceSSSE3Asm(unsafe.Pointer(mainHeader.Data), int(val), mainLen)
    78  }
    79  
    80  // AddConst8Unsafe sets dst[pos] := src[pos] + val for every byte in src (with
    81  // the usual unsigned overflow).
    82  //
    83  // WARNING: This is a function designed to be used in inner loops, which makes
    84  // assumptions about length and capacity which aren't checked at runtime.  Use
    85  // the safe version of this function when that's a problem.
    86  // Assumptions #2-3 are always satisfied when the last
    87  // potentially-size-increasing operation on src[] is {Re}makeUnsafe(),
    88  // ResizeUnsafe() or XcapUnsafe(), and the same is true for dst[].
    89  //
    90  // 1. len(src) and len(dst) are equal.
    91  //
    92  // 2. Capacities are at least RoundUpPow2(len(src) + 1, bytesPerVec).
    93  //
    94  // 3. The caller does not care if a few bytes past the end of dst[] are
    95  // changed.
    96  func AddConst8Unsafe(dst, src []byte, val byte) {
    97  	srcHeader := (*reflect.SliceHeader)(unsafe.Pointer(&src))
    98  	dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
    99  	addConst8SSSE3Asm(unsafe.Pointer(dstHeader.Data), unsafe.Pointer(srcHeader.Data), int(val), srcHeader.Len)
   100  }
   101  
   102  // AddConst8 sets dst[pos] := src[pos] + val for every byte in src (with the
   103  // usual unsigned overflow).  It panics if len(src) != len(dst).
   104  func AddConst8(dst, src []byte, val byte) {
   105  	srcLen := len(src)
   106  	if len(dst) != srcLen {
   107  		panic("AddConst8() requires len(src) == len(dst).")
   108  	}
   109  	if srcLen < 16 {
   110  		for pos, curByte := range src {
   111  			dst[pos] = curByte + val
   112  		}
   113  		return
   114  	}
   115  	srcHeader := (*reflect.SliceHeader)(unsafe.Pointer(&src))
   116  	dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
   117  	addConst8OddSSSE3Asm(unsafe.Pointer(dstHeader.Data), unsafe.Pointer(srcHeader.Data), int(val), srcLen)
   118  }
   119  
   120  // SubtractFromConst8UnsafeInplace subtracts every byte of main[] from the
   121  // given constant, with unsigned underflow.
   122  //
   123  // WARNING: This is a function designed to be used in inner loops, which
   124  // assumes without checking that capacity is at least RoundUpPow2(len(main),
   125  // bytesPerVec).  It also assumes that the caller does not care if a few bytes
   126  // past the end of main[] are changed.  Use the safe version of this function
   127  // if any of these properties are problematic.
   128  // These assumptions are always satisfied when the last
   129  // potentially-size-increasing operation on main[] is {Re}makeUnsafe(),
   130  // ResizeUnsafe(), or XcapUnsafe().
   131  func SubtractFromConst8UnsafeInplace(main []byte, val byte) {
   132  	mainLen := len(main)
   133  	mainHeader := (*reflect.SliceHeader)(unsafe.Pointer(&main))
   134  	if mainLen <= 16 {
   135  		subtractFromConst8TinyInplaceSSSE3Asm(unsafe.Pointer(mainHeader.Data), int(val))
   136  		return
   137  	}
   138  	subtractFromConst8OddInplaceSSSE3Asm(unsafe.Pointer(mainHeader.Data), int(val), mainLen)
   139  }
   140  
   141  // SubtractFromConst8Inplace subtracts every byte of main[] from the given
   142  // constant, with unsigned underflow.
   143  func SubtractFromConst8Inplace(main []byte, val byte) {
   144  	mainLen := len(main)
   145  	if mainLen < 16 {
   146  		for pos, mainByte := range main {
   147  			main[pos] = val - mainByte
   148  		}
   149  		return
   150  	}
   151  	mainHeader := (*reflect.SliceHeader)(unsafe.Pointer(&main))
   152  	subtractFromConst8OddInplaceSSSE3Asm(unsafe.Pointer(mainHeader.Data), int(val), mainLen)
   153  }
   154  
   155  // SubtractFromConst8Unsafe sets dst[pos] := val - src[pos] for every byte in
   156  // src (with the usual unsigned overflow).
   157  //
   158  // WARNING: This is a function designed to be used in inner loops, which makes
   159  // assumptions about length and capacity which aren't checked at runtime.  Use
   160  // the safe version of this function when that's a problem.
   161  // Assumptions #2-3 are always satisfied when the last
   162  // potentially-size-increasing operation on src[] is {Re}makeUnsafe(),
   163  // ResizeUnsafe() or XcapUnsafe(), and the same is true for dst[].
   164  //
   165  // 1. len(src) and len(dst) are equal.
   166  //
   167  // 2. Capacities are at least RoundUpPow2(len(src) + 1, bytesPerVec).
   168  //
   169  // 3. The caller does not care if a few bytes past the end of dst[] are
   170  // changed.
   171  func SubtractFromConst8Unsafe(dst, src []byte, val byte) {
   172  	srcHeader := (*reflect.SliceHeader)(unsafe.Pointer(&src))
   173  	dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
   174  	subtractFromConst8SSSE3Asm(unsafe.Pointer(dstHeader.Data), unsafe.Pointer(srcHeader.Data), int(val), srcHeader.Len)
   175  }
   176  
   177  // SubtractFromConst8 sets dst[pos] := val - src[pos] for every byte in src
   178  // (with the usual unsigned overflow).  It panics if len(src) != len(dst).
   179  func SubtractFromConst8(dst, src []byte, val byte) {
   180  	srcLen := len(src)
   181  	if len(dst) != srcLen {
   182  		panic("SubtractFromConst8() requires len(src) == len(dst).")
   183  	}
   184  	if srcLen < 16 {
   185  		for pos, curByte := range src {
   186  			dst[pos] = val - curByte
   187  		}
   188  		return
   189  	}
   190  	srcHeader := (*reflect.SliceHeader)(unsafe.Pointer(&src))
   191  	dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
   192  	subtractFromConst8OddSSSE3Asm(unsafe.Pointer(dstHeader.Data), unsafe.Pointer(srcHeader.Data), int(val), srcLen)
   193  }