github.com/primecitizens/pcz/std@v0.2.1/core/atomic/atomic_ppc64x.s (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright 2023 The Prime Citizens 3 // 4 // Copyright 2014 The Go Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file. 7 8 //go:build ppc64 || ppc64le 9 10 #include "textflag.h" 11 12 // For more details about how various memory models are 13 // enforced on POWER, the following paper provides more 14 // details about how they enforce C/C++ like models. This 15 // gives context about why the strange looking code 16 // sequences below work. 17 // 18 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html 19 20 TEXT ·PublicationBarrier(SB),NOSPLIT|NOFRAME,$0-0 21 // LWSYNC is the "export" barrier recommended by Power ISA 22 // v2.07 book II, appendix B.2.2.2. 23 // LWSYNC is a load/load, load/store, and store/store barrier. 24 LWSYNC 25 RET 26 27 // 28 // Store 29 // 30 31 TEXT ·Store8(SB), NOSPLIT, $0-9 32 MOVD ptr+0(FP), R3 33 MOVB val+8(FP), R4 34 SYNC 35 MOVB R4, 0(R3) 36 RET 37 38 TEXT ·Store32(SB), NOSPLIT, $0-12 39 MOVD ptr+0(FP), R3 40 MOVW val+8(FP), R4 41 SYNC 42 MOVW R4, 0(R3) 43 RET 44 45 TEXT ·Store64(SB), NOSPLIT, $0-16 46 MOVD ptr+0(FP), R3 47 MOVD val+8(FP), R4 48 SYNC 49 MOVD R4, 0(R3) 50 RET 51 52 TEXT ·StoreUintptr(SB), NOSPLIT, $0-16 53 BR ·Store64(SB) 54 55 TEXT ·StorePointer(SB), NOSPLIT, $0-16 56 BR ·Store64(SB) 57 58 TEXT ·StoreInt32(SB), NOSPLIT, $0-12 59 BR ·Store(SB) 60 61 TEXT ·StoreInt64(SB), NOSPLIT, $0-16 62 BR ·Store64(SB) 63 64 // 65 // StoreRel 66 // 67 68 TEXT ·StoreRel32(SB), NOSPLIT, $0-12 69 MOVD ptr+0(FP), R3 70 MOVW val+8(FP), R4 71 LWSYNC 72 MOVW R4, 0(R3) 73 RET 74 75 TEXT ·StoreRel64(SB), NOSPLIT, $0-16 76 MOVD ptr+0(FP), R3 77 MOVD val+8(FP), R4 78 LWSYNC 79 MOVD R4, 0(R3) 80 RET 81 82 TEXT ·StoreRelUintptr(SB), NOSPLIT, $0-16 83 BR ·StoreRel64(SB) 84 85 // 86 // Load 87 // 88 89 // uint8 ·Load8(uint8 volatile* ptr) 90 TEXT ·Load8(SB),NOSPLIT|NOFRAME,$-8-9 91 MOVD ptr+0(FP), R3 92 SYNC 93 MOVBZ 0(R3), R3 94 CMP R3, R3, CR7 95 BC 4, 30, 1(PC) // bne- cr7,0x4 96 ISYNC 97 MOVB R3, ret+8(FP) 98 RET 99 100 // uint32 ·Load32(uint32 volatile* ptr) 101 TEXT ·Load32(SB),NOSPLIT|NOFRAME,$-8-12 102 MOVD ptr+0(FP), R3 103 SYNC 104 MOVWZ 0(R3), R3 105 CMPW R3, R3, CR7 106 BC 4, 30, 1(PC) // bne- cr7,0x4 107 ISYNC 108 MOVW R3, ret+8(FP) 109 RET 110 111 // uint64 ·Load64(uint64 volatile* ptr) 112 TEXT ·Load64(SB),NOSPLIT|NOFRAME,$-8-16 113 MOVD ptr+0(FP), R3 114 SYNC 115 MOVD 0(R3), R3 116 CMP R3, R3, CR7 117 BC 4, 30, 1(PC) // bne- cr7,0x4 118 ISYNC 119 MOVD R3, ret+8(FP) 120 RET 121 122 TEXT ·LoadUintptr(SB), NOSPLIT|NOFRAME, $0-16 123 BR ·Load64(SB) 124 125 // void *·LoadPointer(void *volatile *ptr) 126 TEXT ·LoadPointer(SB),NOSPLIT|NOFRAME,$-8-16 127 MOVD ptr+0(FP), R3 128 SYNC 129 MOVD 0(R3), R3 130 CMP R3, R3, CR7 131 BC 4, 30, 1(PC) // bne- cr7,0x4 132 ISYNC 133 MOVD R3, ret+8(FP) 134 RET 135 136 TEXT ·LoadUint(SB), NOSPLIT|NOFRAME, $0-16 137 BR ·Load64(SB) 138 139 TEXT ·LoadInt32(SB), NOSPLIT, $0-12 140 BR ·Load32(SB) 141 142 TEXT ·LoadInt64(SB), NOSPLIT, $0-16 143 BR ·Load64(SB) 144 145 // 146 // LoadAcq 147 // 148 149 // uint32 ·LoadAcq32(uint32 volatile* ptr) 150 TEXT ·LoadAcq32(SB),NOSPLIT|NOFRAME,$-8-12 151 MOVD ptr+0(FP), R3 152 MOVWZ 0(R3), R3 153 CMPW R3, R3, CR7 154 BC 4, 30, 1(PC) // bne- cr7, 0x4 155 ISYNC 156 MOVW R3, ret+8(FP) 157 RET 158 159 // uint64 ·LoadAcq64(uint64 volatile* ptr) 160 TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$-8-16 161 MOVD ptr+0(FP), R3 162 MOVD 0(R3), R3 163 CMP R3, R3, CR7 164 BC 4, 30, 1(PC) // bne- cr7, 0x4 165 ISYNC 166 MOVD R3, ret+8(FP) 167 RET 168 169 TEXT ·LoadAcqUintptr(SB), NOSPLIT|NOFRAME, $0-16 170 BR ·LoadAcq64(SB) 171 172 // 173 // bitwise 174 // 175 176 // void ·Or8(byte volatile*, byte); 177 TEXT ·Or8(SB), NOSPLIT, $0-9 178 MOVD ptr+0(FP), R3 179 MOVBZ val+8(FP), R4 180 LWSYNC 181 again: 182 LBAR (R3), R6 183 OR R4, R6 184 STBCCC R6, (R3) 185 BNE again 186 RET 187 188 // func Or32(addr *uint32, v uint32) 189 TEXT ·Or32(SB), NOSPLIT, $0-12 190 MOVD ptr+0(FP), R3 191 MOVW val+8(FP), R4 192 LWSYNC 193 again: 194 LWAR (R3), R6 195 OR R4, R6 196 STWCCC R6, (R3) 197 BNE again 198 RET 199 200 // void ·And8(byte volatile*, byte); 201 TEXT ·And8(SB), NOSPLIT, $0-9 202 MOVD ptr+0(FP), R3 203 MOVBZ val+8(FP), R4 204 LWSYNC 205 again: 206 LBAR (R3), R6 207 AND R4, R6 208 STBCCC R6, (R3) 209 BNE again 210 RET 211 212 // func And32(addr *uint32, v uint32) 213 TEXT ·And32(SB), NOSPLIT, $0-12 214 MOVD ptr+0(FP), R3 215 MOVW val+8(FP), R4 216 LWSYNC 217 again: 218 LWAR (R3),R6 219 AND R4, R6 220 STWCCC R6, (R3) 221 BNE again 222 RET 223 224 // 225 // Swap 226 // 227 228 // uint32 Swap32(ptr *uint32, new uint32) 229 TEXT ·Swap32(SB), NOSPLIT, $0-20 230 MOVD ptr+0(FP), R4 231 MOVW new+8(FP), R5 232 LWSYNC 233 LWAR (R4), R3 234 STWCCC R5, (R4) 235 BNE -2(PC) 236 ISYNC 237 MOVW R3, ret+16(FP) 238 RET 239 240 // uint64 Swap64(ptr *uint64, new uint64) 241 TEXT ·Swap64(SB), NOSPLIT, $0-24 242 MOVD ptr+0(FP), R4 243 MOVD new+8(FP), R5 244 LWSYNC 245 LDAR (R4), R3 246 STDCCC R5, (R4) 247 BNE -2(PC) 248 ISYNC 249 MOVD R3, ret+16(FP) 250 RET 251 252 TEXT ·SwapUintptr(SB), NOSPLIT, $0-24 253 BR ·Swap64(SB) 254 255 TEXT ·SwapInt32(SB), NOSPLIT, $0-20 256 BR ·Swap32(SB) 257 258 TEXT ·SwapInt64(SB), NOSPLIT, $0-24 259 BR ·Swap64(SB) 260 261 // 262 // Add 263 // 264 265 // uint32 Add32(uint32 volatile *ptr, int32 delta) 266 TEXT ·Add32(SB), NOSPLIT, $0-20 267 MOVD ptr+0(FP), R4 268 MOVW delta+8(FP), R5 269 LWSYNC 270 LWAR (R4), R3 271 ADD R5, R3 272 STWCCC R3, (R4) 273 BNE -3(PC) 274 MOVW R3, ret+16(FP) 275 RET 276 277 // uint64 Add64(uint64 volatile *val, int64 delta) 278 TEXT ·Add64(SB), NOSPLIT, $0-24 279 MOVD ptr+0(FP), R4 280 MOVD delta+8(FP), R5 281 LWSYNC 282 LDAR (R4), R3 283 ADD R5, R3 284 STDCCC R3, (R4) 285 BNE -3(PC) 286 MOVD R3, ret+16(FP) 287 RET 288 289 TEXT ·AddUintptr(SB), NOSPLIT, $0-24 290 BR ·Add64(SB) 291 292 TEXT ·AddInt32(SB), NOSPLIT, $0-20 293 BR ·Add32(SB) 294 295 TEXT ·AddInt64(SB), NOSPLIT, $0-24 296 BR ·Add64(SB) 297 298 // 299 // Compare and swap 300 // 301 302 // bool Cas32(uint32 *ptr, uint32 old, uint32 new) 303 TEXT ·Cas32(SB), NOSPLIT, $0-17 304 MOVD ptr+0(FP), R3 305 MOVWZ old+8(FP), R4 306 MOVWZ new+12(FP), R5 307 LWSYNC 308 cas_again: 309 LWAR (R3), R6 310 CMPW R6, R4 311 BNE cas_fail 312 STWCCC R5, (R3) 313 BNE cas_again 314 MOVD $1, R3 315 LWSYNC 316 MOVB R3, ret+16(FP) 317 RET 318 cas_fail: 319 MOVB R0, ret+16(FP) 320 RET 321 322 // bool ·Cas64(uint64 *ptr, uint64 old, uint64 new) 323 TEXT ·Cas64(SB), NOSPLIT, $0-25 324 MOVD ptr+0(FP), R3 325 MOVD old+8(FP), R4 326 MOVD new+16(FP), R5 327 LWSYNC 328 cas64_again: 329 LDAR (R3), R6 330 CMP R6, R4 331 BNE cas64_fail 332 STDCCC R5, (R3) 333 BNE cas64_again 334 MOVD $1, R3 335 LWSYNC 336 MOVB R3, ret+24(FP) 337 RET 338 cas64_fail: 339 MOVB R0, ret+24(FP) 340 RET 341 342 TEXT ·CasUintptr(SB), NOSPLIT, $0-25 343 BR ·Cas64(SB) 344 345 // bool CasUnsafePointer(void **val, void *old, void *new) 346 TEXT ·CasUnsafePointer(SB), NOSPLIT, $0-25 347 BR ·Cas64(SB) 348 349 TEXT ·CasInt32(SB), NOSPLIT, $0-17 350 BR ·Cas32(SB) 351 352 TEXT ·CasInt64(SB), NOSPLIT, $0-25 353 BR ·Cas64(SB) 354 355 // 356 // CasRel 357 // 358 359 TEXT ·CasRel32(SB), NOSPLIT, $0-17 360 MOVD ptr+0(FP), R3 361 MOVWZ old+8(FP), R4 362 MOVWZ new+12(FP), R5 363 LWSYNC 364 cas_again: 365 LWAR (R3), $0, R6 // 0 = Mutex release hint 366 CMPW R6, R4 367 BNE cas_fail 368 STWCCC R5, (R3) 369 BNE cas_again 370 MOVD $1, R3 371 MOVB R3, ret+16(FP) 372 RET 373 cas_fail: 374 MOVB R0, ret+16(FP) 375 RET