github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/simd/xor_amd64.go (about) 1 // Code generated by "../gtl/generate.py --prefix=Xor -DOPCHAR=^ --package=simd --output=xor_amd64.go bitwise_amd64.go.tpl". DO NOT EDIT. 2 3 // Copyright 2021 GRAIL, Inc. All rights reserved. 4 // Use of this source code is governed by the Apache-2.0 5 // license that can be found in the LICENSE file. 6 7 //go:build amd64 && !appengine 8 // +build amd64,!appengine 9 10 package simd 11 12 import ( 13 "reflect" 14 "unsafe" 15 ) 16 17 // XorUnsafeInplace sets main[pos] := main[pos] ^ arg[pos] for every position 18 // in main[]. 19 // 20 // WARNING: This is a function designed to be used in inner loops, which makes 21 // assumptions about length and capacity which aren't checked at runtime. Use 22 // the safe version of this function when that's a problem. 23 // Assumptions #2-3 are always satisfied when the last 24 // potentially-size-increasing operation on arg[] is {Re}makeUnsafe(), 25 // ResizeUnsafe(), or XcapUnsafe(), and the same is true for main[]. 26 // 27 // 1. len(arg) and len(main) must be equal. 28 // 29 // 2. Capacities are at least RoundUpPow2(len(main) + 1, bytesPerVec). 30 // 31 // 3. The caller does not care if a few bytes past the end of main[] are 32 // changed. 33 func XorUnsafeInplace(main, arg []byte) { 34 mainLen := len(main) 35 argData := unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&arg)).Data) 36 mainData := unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&main)).Data) 37 argWordsIter := argData 38 mainWordsIter := mainData 39 if mainLen > 2*BytesPerWord { 40 nWordMinus2 := (mainLen - BytesPerWord - 1) >> Log2BytesPerWord 41 for widx := 0; widx < nWordMinus2; widx++ { 42 mainWord := *((*uintptr)(mainWordsIter)) 43 argWord := *((*uintptr)(argWordsIter)) 44 *((*uintptr)(mainWordsIter)) = mainWord ^ argWord 45 mainWordsIter = unsafe.Add(mainWordsIter, BytesPerWord) 46 argWordsIter = unsafe.Add(argWordsIter, BytesPerWord) 47 } 48 } else if mainLen <= BytesPerWord { 49 mainWord := *((*uintptr)(mainWordsIter)) 50 argWord := *((*uintptr)(argWordsIter)) 51 *((*uintptr)(mainWordsIter)) = mainWord ^ argWord 52 return 53 } 54 // The last two read-and-writes to main[] usually overlap. To avoid a 55 // store-to-load forwarding slowdown, we read both words before writing 56 // either. 57 // shuffleLookupOddInplaceSSSE3Asm() uses the same strategy. 58 mainWord1 := *((*uintptr)(mainWordsIter)) 59 argWord1 := *((*uintptr)(argWordsIter)) 60 finalOffset := uintptr(mainLen - BytesPerWord) 61 mainFinalWordPtr := unsafe.Add(mainData, finalOffset) 62 argFinalWordPtr := unsafe.Add(argData, finalOffset) 63 mainWord2 := *((*uintptr)(mainFinalWordPtr)) 64 argWord2 := *((*uintptr)(argFinalWordPtr)) 65 *((*uintptr)(mainWordsIter)) = mainWord1 ^ argWord1 66 *((*uintptr)(mainFinalWordPtr)) = mainWord2 ^ argWord2 67 } 68 69 // XorInplace sets main[pos] := arg[pos] ^ main[pos] for every position in 70 // main[]. It panics if slice lengths don't match. 71 func XorInplace(main, arg []byte) { 72 // This takes ~6-8% longer than XorUnsafeInplace on the short-array benchmark 73 // on my Mac. 74 mainLen := len(main) 75 if len(arg) != mainLen { 76 panic("XorInplace() requires len(arg) == len(main).") 77 } 78 if mainLen < BytesPerWord { 79 // It's probably possible to do better here (e.g. when mainLen is in 4..7, 80 // operate on uint32s), but I won't worry about it unless/until that's 81 // actually a common case. 82 for pos, argByte := range arg { 83 main[pos] = main[pos] ^ argByte 84 } 85 return 86 } 87 argData := unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&arg)).Data) 88 mainData := unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&main)).Data) 89 argWordsIter := argData 90 mainWordsIter := mainData 91 if mainLen > 2*BytesPerWord { 92 nWordMinus2 := (mainLen - BytesPerWord - 1) >> Log2BytesPerWord 93 for widx := 0; widx < nWordMinus2; widx++ { 94 mainWord := *((*uintptr)(mainWordsIter)) 95 argWord := *((*uintptr)(argWordsIter)) 96 *((*uintptr)(mainWordsIter)) = mainWord ^ argWord 97 mainWordsIter = unsafe.Add(mainWordsIter, BytesPerWord) 98 argWordsIter = unsafe.Add(argWordsIter, BytesPerWord) 99 } 100 } 101 mainWord1 := *((*uintptr)(mainWordsIter)) 102 argWord1 := *((*uintptr)(argWordsIter)) 103 finalOffset := uintptr(mainLen - BytesPerWord) 104 mainFinalWordPtr := unsafe.Add(mainData, finalOffset) 105 argFinalWordPtr := unsafe.Add(argData, finalOffset) 106 mainWord2 := *((*uintptr)(mainFinalWordPtr)) 107 argWord2 := *((*uintptr)(argFinalWordPtr)) 108 *((*uintptr)(mainWordsIter)) = mainWord1 ^ argWord1 109 *((*uintptr)(mainFinalWordPtr)) = mainWord2 ^ argWord2 110 } 111 112 // XorUnsafe sets dst[pos] := src1[pos] ^ src2[pos] for every position in dst. 113 // 114 // WARNING: This is a function designed to be used in inner loops, which makes 115 // assumptions about length and capacity which aren't checked at runtime. Use 116 // the safe version of this function when that's a problem. 117 // Assumptions #2-3 are always satisfied when the last 118 // potentially-size-increasing operation on src1[] is {Re}makeUnsafe(), 119 // ResizeUnsafe(), or XcapUnsafe(), and the same is true for src2[] and dst[]. 120 // 121 // 1. len(src1), len(src2), and len(dst) must be equal. 122 // 123 // 2. Capacities are at least RoundUpPow2(len(dst) + 1, bytesPerVec). 124 // 125 // 3. The caller does not care if a few bytes past the end of dst[] are 126 // changed. 127 func XorUnsafe(dst, src1, src2 []byte) { 128 src1Header := (*reflect.SliceHeader)(unsafe.Pointer(&src1)) 129 src2Header := (*reflect.SliceHeader)(unsafe.Pointer(&src2)) 130 dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst)) 131 nWord := DivUpPow2(len(dst), BytesPerWord, Log2BytesPerWord) 132 133 src1Iter := unsafe.Pointer(src1Header.Data) 134 src2Iter := unsafe.Pointer(src2Header.Data) 135 dstIter := unsafe.Pointer(dstHeader.Data) 136 for widx := 0; widx < nWord; widx++ { 137 src1Word := *((*uintptr)(src1Iter)) 138 src2Word := *((*uintptr)(src2Iter)) 139 *((*uintptr)(dstIter)) = src1Word ^ src2Word 140 src1Iter = unsafe.Add(src1Iter, BytesPerWord) 141 src2Iter = unsafe.Add(src2Iter, BytesPerWord) 142 dstIter = unsafe.Add(dstIter, BytesPerWord) 143 } 144 } 145 146 // Xor sets dst[pos] := src1[pos] ^ src2[pos] for every position in dst. It 147 // panics if slice lengths don't match. 148 func Xor(dst, src1, src2 []byte) { 149 dstLen := len(dst) 150 if (len(src1) != dstLen) || (len(src2) != dstLen) { 151 panic("Xor() requires len(src1) == len(src2) == len(dst).") 152 } 153 if dstLen < BytesPerWord { 154 for pos, src1Byte := range src1 { 155 dst[pos] = src1Byte ^ src2[pos] 156 } 157 return 158 } 159 src1Data := unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&src1)).Data) 160 src2Data := unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&src2)).Data) 161 dstData := unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&dst)).Data) 162 nWordMinus1 := (dstLen - 1) >> Log2BytesPerWord 163 164 src1Iter := src1Data 165 src2Iter := src2Data 166 dstIter := dstData 167 for widx := 0; widx < nWordMinus1; widx++ { 168 src1Word := *((*uintptr)(src1Iter)) 169 src2Word := *((*uintptr)(src2Iter)) 170 *((*uintptr)(dstIter)) = src1Word ^ src2Word 171 src1Iter = unsafe.Add(src1Iter, BytesPerWord) 172 src2Iter = unsafe.Add(src2Iter, BytesPerWord) 173 dstIter = unsafe.Add(dstIter, BytesPerWord) 174 } 175 // No store-forwarding problem here. 176 finalOffset := uintptr(dstLen - BytesPerWord) 177 src1Iter = unsafe.Add(src1Data, finalOffset) 178 src2Iter = unsafe.Add(src2Data, finalOffset) 179 dstIter = unsafe.Add(dstData, finalOffset) 180 src1Word := *((*uintptr)(src1Iter)) 181 src2Word := *((*uintptr)(src2Iter)) 182 *((*uintptr)(dstIter)) = src1Word ^ src2Word 183 } 184 185 // XorConst8UnsafeInplace sets main[pos] := main[pos] ^ val for every position 186 // in main[]. 187 // 188 // WARNING: This is a function designed to be used in inner loops, which makes 189 // assumptions about length and capacity which aren't checked at runtime. Use 190 // the safe version of this function when that's a problem. 191 // These assumptions are always satisfied when the last 192 // potentially-size-increasing operation on main[] is {Re}makeUnsafe(), 193 // ResizeUnsafe(), or XcapUnsafe(). 194 // 195 // 1. cap(main) is at least RoundUpPow2(len(main) + 1, bytesPerVec). 196 // 197 // 2. The caller does not care if a few bytes past the end of main[] are 198 // changed. 199 func XorConst8UnsafeInplace(main []byte, val byte) { 200 mainLen := len(main) 201 argWord := 0x101010101010101 * uintptr(val) 202 mainData := unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&main)).Data) 203 mainWordsIter := mainData 204 if mainLen > 2*BytesPerWord { 205 nWordMinus2 := (mainLen - BytesPerWord - 1) >> Log2BytesPerWord 206 for widx := 0; widx < nWordMinus2; widx++ { 207 mainWord := *((*uintptr)(mainWordsIter)) 208 *((*uintptr)(mainWordsIter)) = mainWord ^ argWord 209 mainWordsIter = unsafe.Add(mainWordsIter, BytesPerWord) 210 } 211 } else if mainLen <= BytesPerWord { 212 mainWord := *((*uintptr)(mainWordsIter)) 213 *((*uintptr)(mainWordsIter)) = mainWord ^ argWord 214 return 215 } 216 mainWord1 := *((*uintptr)(mainWordsIter)) 217 finalOffset := uintptr(mainLen - BytesPerWord) 218 mainFinalWordPtr := unsafe.Add(mainData, finalOffset) 219 mainWord2 := *((*uintptr)(mainFinalWordPtr)) 220 *((*uintptr)(mainWordsIter)) = mainWord1 ^ argWord 221 *((*uintptr)(mainFinalWordPtr)) = mainWord2 ^ argWord 222 } 223 224 // XorConst8Inplace sets main[pos] := main[pos] ^ val for every position in 225 // main[]. 226 func XorConst8Inplace(main []byte, val byte) { 227 mainLen := len(main) 228 if mainLen < BytesPerWord { 229 for pos, mainByte := range main { 230 main[pos] = mainByte ^ val 231 } 232 return 233 } 234 argWord := 0x101010101010101 * uintptr(val) 235 mainData := unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&main)).Data) 236 mainWordsIter := mainData 237 if mainLen > 2*BytesPerWord { 238 nWordMinus2 := (mainLen - BytesPerWord - 1) >> Log2BytesPerWord 239 for widx := 0; widx < nWordMinus2; widx++ { 240 mainWord := *((*uintptr)(mainWordsIter)) 241 *((*uintptr)(mainWordsIter)) = mainWord ^ argWord 242 mainWordsIter = unsafe.Add(mainWordsIter, BytesPerWord) 243 } 244 } 245 mainWord1 := *((*uintptr)(mainWordsIter)) 246 finalOffset := uintptr(mainLen - BytesPerWord) 247 mainFinalWordPtr := unsafe.Add(mainData, finalOffset) 248 mainWord2 := *((*uintptr)(mainFinalWordPtr)) 249 *((*uintptr)(mainWordsIter)) = mainWord1 ^ argWord 250 *((*uintptr)(mainFinalWordPtr)) = mainWord2 ^ argWord 251 } 252 253 // XorConst8Unsafe sets dst[pos] := src[pos] ^ val for every position in dst. 254 // 255 // WARNING: This is a function designed to be used in inner loops, which makes 256 // assumptions about length and capacity which aren't checked at runtime. Use 257 // the safe version of this function when that's a problem. 258 // Assumptions #2-3 are always satisfied when the last 259 // potentially-size-increasing operation on src[] is {Re}makeUnsafe(), 260 // ResizeUnsafe(), or XcapUnsafe(), and the same is true for dst[]. 261 // 262 // 1. len(src) and len(dst) must be equal. 263 // 264 // 2. Capacities are at least RoundUpPow2(len(dst) + 1, bytesPerVec). 265 // 266 // 3. The caller does not care if a few bytes past the end of dst[] are 267 // changed. 268 func XorConst8Unsafe(dst, src []byte, val byte) { 269 srcHeader := (*reflect.SliceHeader)(unsafe.Pointer(&src)) 270 dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst)) 271 nWord := DivUpPow2(len(dst), BytesPerWord, Log2BytesPerWord) 272 argWord := 0x101010101010101 * uintptr(val) 273 274 srcIter := unsafe.Pointer(srcHeader.Data) 275 dstIter := unsafe.Pointer(dstHeader.Data) 276 for widx := 0; widx < nWord; widx++ { 277 srcWord := *((*uintptr)(srcIter)) 278 *((*uintptr)(dstIter)) = srcWord ^ argWord 279 srcIter = unsafe.Add(srcIter, BytesPerWord) 280 dstIter = unsafe.Add(dstIter, BytesPerWord) 281 } 282 } 283 284 // XorConst8 sets dst[pos] := src[pos] ^ val for every position in dst. It 285 // panics if slice lengths don't match. 286 func XorConst8(dst, src []byte, val byte) { 287 dstLen := len(dst) 288 if len(src) != dstLen { 289 panic("XorConst8() requires len(src) == len(dst).") 290 } 291 if dstLen < BytesPerWord { 292 for pos, srcByte := range src { 293 dst[pos] = srcByte ^ val 294 } 295 return 296 } 297 srcData := unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&src)).Data) 298 dstData := unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&dst)).Data) 299 nWordMinus1 := (dstLen - 1) >> Log2BytesPerWord 300 argWord := 0x101010101010101 * uintptr(val) 301 302 srcIter := unsafe.Pointer(srcData) 303 dstIter := unsafe.Pointer(dstData) 304 for widx := 0; widx < nWordMinus1; widx++ { 305 srcWord := *((*uintptr)(srcIter)) 306 *((*uintptr)(dstIter)) = srcWord ^ argWord 307 srcIter = unsafe.Add(srcIter, BytesPerWord) 308 dstIter = unsafe.Add(dstIter, BytesPerWord) 309 } 310 finalOffset := uintptr(dstLen - BytesPerWord) 311 srcIter = unsafe.Add(srcData, finalOffset) 312 dstIter = unsafe.Add(dstData, finalOffset) 313 srcWord := *((*uintptr)(srcIter)) 314 *((*uintptr)(dstIter)) = srcWord ^ argWord 315 }