github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/simd/add_amd64.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache-2.0 3 // license that can be found in the LICENSE file. 4 5 // +build amd64,!appengine 6 7 package simd 8 9 import ( 10 "reflect" 11 "unsafe" 12 ) 13 14 // *** the following functions are defined in add_amd64.s 15 16 //go:noescape 17 func addConst8TinyInplaceSSSE3Asm(main unsafe.Pointer, val int) 18 19 //go:noescape 20 func addConst8OddInplaceSSSE3Asm(main unsafe.Pointer, val, nByte int) 21 22 //go:noescape 23 func addConst8SSSE3Asm(dst, src unsafe.Pointer, val, nByte int) 24 25 //go:noescape 26 func addConst8OddSSSE3Asm(dst, src unsafe.Pointer, val, nByte int) 27 28 //go:noescape 29 func subtractFromConst8TinyInplaceSSSE3Asm(main unsafe.Pointer, val int) 30 31 //go:noescape 32 func subtractFromConst8OddInplaceSSSE3Asm(main unsafe.Pointer, val, nByte int) 33 34 //go:noescape 35 func subtractFromConst8SSSE3Asm(dst, src unsafe.Pointer, val, nByte int) 36 37 //go:noescape 38 func subtractFromConst8OddSSSE3Asm(dst, src unsafe.Pointer, val, nByte int) 39 40 // *** end assembly function signature(s) 41 42 // AddConst8UnsafeInplace adds the given constant to every byte of main[], with 43 // unsigned overflow. 44 // 45 // WARNING: This is a function designed to be used in inner loops, which 46 // assumes without checking that capacity is at least RoundUpPow2(len(main), 47 // bytesPerVec). It also assumes that the caller does not care if a few bytes 48 // past the end of main[] are changed. Use the safe version of this function 49 // if any of these properties are problematic. 50 // These assumptions are always satisfied when the last 51 // potentially-size-increasing operation on main[] is {Re}makeUnsafe(), 52 // ResizeUnsafe(), or XcapUnsafe(). 53 func AddConst8UnsafeInplace(main []byte, val byte) { 54 // Note that the word-based algorithm doesn't work so well here, since we'd 55 // need to guard against bytes in the middle overflowing and polluting 56 // adjacent bytes. 57 mainLen := len(main) 58 mainHeader := (*reflect.SliceHeader)(unsafe.Pointer(&main)) 59 if mainLen <= 16 { 60 addConst8TinyInplaceSSSE3Asm(unsafe.Pointer(mainHeader.Data), int(val)) 61 return 62 } 63 addConst8OddInplaceSSSE3Asm(unsafe.Pointer(mainHeader.Data), int(val), mainLen) 64 } 65 66 // AddConst8Inplace adds the given constant to every byte of main[], with 67 // unsigned overflow. 68 func AddConst8Inplace(main []byte, val byte) { 69 mainLen := len(main) 70 if mainLen < 16 { 71 for pos, mainByte := range main { 72 main[pos] = val + mainByte 73 } 74 return 75 } 76 mainHeader := (*reflect.SliceHeader)(unsafe.Pointer(&main)) 77 addConst8OddInplaceSSSE3Asm(unsafe.Pointer(mainHeader.Data), int(val), mainLen) 78 } 79 80 // AddConst8Unsafe sets dst[pos] := src[pos] + val for every byte in src (with 81 // the usual unsigned overflow). 82 // 83 // WARNING: This is a function designed to be used in inner loops, which makes 84 // assumptions about length and capacity which aren't checked at runtime. Use 85 // the safe version of this function when that's a problem. 86 // Assumptions #2-3 are always satisfied when the last 87 // potentially-size-increasing operation on src[] is {Re}makeUnsafe(), 88 // ResizeUnsafe() or XcapUnsafe(), and the same is true for dst[]. 89 // 90 // 1. len(src) and len(dst) are equal. 91 // 92 // 2. Capacities are at least RoundUpPow2(len(src) + 1, bytesPerVec). 93 // 94 // 3. The caller does not care if a few bytes past the end of dst[] are 95 // changed. 96 func AddConst8Unsafe(dst, src []byte, val byte) { 97 srcHeader := (*reflect.SliceHeader)(unsafe.Pointer(&src)) 98 dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst)) 99 addConst8SSSE3Asm(unsafe.Pointer(dstHeader.Data), unsafe.Pointer(srcHeader.Data), int(val), srcHeader.Len) 100 } 101 102 // AddConst8 sets dst[pos] := src[pos] + val for every byte in src (with the 103 // usual unsigned overflow). It panics if len(src) != len(dst). 104 func AddConst8(dst, src []byte, val byte) { 105 srcLen := len(src) 106 if len(dst) != srcLen { 107 panic("AddConst8() requires len(src) == len(dst).") 108 } 109 if srcLen < 16 { 110 for pos, curByte := range src { 111 dst[pos] = curByte + val 112 } 113 return 114 } 115 srcHeader := (*reflect.SliceHeader)(unsafe.Pointer(&src)) 116 dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst)) 117 addConst8OddSSSE3Asm(unsafe.Pointer(dstHeader.Data), unsafe.Pointer(srcHeader.Data), int(val), srcLen) 118 } 119 120 // SubtractFromConst8UnsafeInplace subtracts every byte of main[] from the 121 // given constant, with unsigned underflow. 122 // 123 // WARNING: This is a function designed to be used in inner loops, which 124 // assumes without checking that capacity is at least RoundUpPow2(len(main), 125 // bytesPerVec). It also assumes that the caller does not care if a few bytes 126 // past the end of main[] are changed. Use the safe version of this function 127 // if any of these properties are problematic. 128 // These assumptions are always satisfied when the last 129 // potentially-size-increasing operation on main[] is {Re}makeUnsafe(), 130 // ResizeUnsafe(), or XcapUnsafe(). 131 func SubtractFromConst8UnsafeInplace(main []byte, val byte) { 132 mainLen := len(main) 133 mainHeader := (*reflect.SliceHeader)(unsafe.Pointer(&main)) 134 if mainLen <= 16 { 135 subtractFromConst8TinyInplaceSSSE3Asm(unsafe.Pointer(mainHeader.Data), int(val)) 136 return 137 } 138 subtractFromConst8OddInplaceSSSE3Asm(unsafe.Pointer(mainHeader.Data), int(val), mainLen) 139 } 140 141 // SubtractFromConst8Inplace subtracts every byte of main[] from the given 142 // constant, with unsigned underflow. 143 func SubtractFromConst8Inplace(main []byte, val byte) { 144 mainLen := len(main) 145 if mainLen < 16 { 146 for pos, mainByte := range main { 147 main[pos] = val - mainByte 148 } 149 return 150 } 151 mainHeader := (*reflect.SliceHeader)(unsafe.Pointer(&main)) 152 subtractFromConst8OddInplaceSSSE3Asm(unsafe.Pointer(mainHeader.Data), int(val), mainLen) 153 } 154 155 // SubtractFromConst8Unsafe sets dst[pos] := val - src[pos] for every byte in 156 // src (with the usual unsigned overflow). 157 // 158 // WARNING: This is a function designed to be used in inner loops, which makes 159 // assumptions about length and capacity which aren't checked at runtime. Use 160 // the safe version of this function when that's a problem. 161 // Assumptions #2-3 are always satisfied when the last 162 // potentially-size-increasing operation on src[] is {Re}makeUnsafe(), 163 // ResizeUnsafe() or XcapUnsafe(), and the same is true for dst[]. 164 // 165 // 1. len(src) and len(dst) are equal. 166 // 167 // 2. Capacities are at least RoundUpPow2(len(src) + 1, bytesPerVec). 168 // 169 // 3. The caller does not care if a few bytes past the end of dst[] are 170 // changed. 171 func SubtractFromConst8Unsafe(dst, src []byte, val byte) { 172 srcHeader := (*reflect.SliceHeader)(unsafe.Pointer(&src)) 173 dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst)) 174 subtractFromConst8SSSE3Asm(unsafe.Pointer(dstHeader.Data), unsafe.Pointer(srcHeader.Data), int(val), srcHeader.Len) 175 } 176 177 // SubtractFromConst8 sets dst[pos] := val - src[pos] for every byte in src 178 // (with the usual unsigned overflow). It panics if len(src) != len(dst). 179 func SubtractFromConst8(dst, src []byte, val byte) { 180 srcLen := len(src) 181 if len(dst) != srcLen { 182 panic("SubtractFromConst8() requires len(src) == len(dst).") 183 } 184 if srcLen < 16 { 185 for pos, curByte := range src { 186 dst[pos] = val - curByte 187 } 188 return 189 } 190 srcHeader := (*reflect.SliceHeader)(unsafe.Pointer(&src)) 191 dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst)) 192 subtractFromConst8OddSSSE3Asm(unsafe.Pointer(dstHeader.Data), unsafe.Pointer(srcHeader.Data), int(val), srcLen) 193 }