github.com/primecitizens/pcz/std@v0.2.1/core/atomic/atomic_ppc64x.s (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2014 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  //go:build ppc64 || ppc64le
     9  
    10  #include "textflag.h"
    11  
    12  // For more details about how various memory models are
    13  // enforced on POWER, the following paper provides more
    14  // details about how they enforce C/C++ like models. This
    15  // gives context about why the strange looking code
    16  // sequences below work.
    17  //
    18  // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
    19  
    20  TEXT ·PublicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
    21  	// LWSYNC is the "export" barrier recommended by Power ISA
    22  	// v2.07 book II, appendix B.2.2.2.
    23  	// LWSYNC is a load/load, load/store, and store/store barrier.
    24  	LWSYNC
    25  	RET
    26  
    27  //
    28  // Store
    29  //
    30  
    31  TEXT ·Store8(SB), NOSPLIT, $0-9
    32  	MOVD ptr+0(FP), R3
    33  	MOVB val+8(FP), R4
    34  	SYNC
    35  	MOVB R4, 0(R3)
    36  	RET
    37  
    38  TEXT ·Store32(SB), NOSPLIT, $0-12
    39  	MOVD ptr+0(FP), R3
    40  	MOVW val+8(FP), R4
    41  	SYNC
    42  	MOVW R4, 0(R3)
    43  	RET
    44  
    45  TEXT ·Store64(SB), NOSPLIT, $0-16
    46  	MOVD ptr+0(FP), R3
    47  	MOVD val+8(FP), R4
    48  	SYNC
    49  	MOVD R4, 0(R3)
    50  	RET
    51  
    52  TEXT ·StoreUintptr(SB), NOSPLIT, $0-16
    53  	BR ·Store64(SB)
    54  
    55  TEXT ·StorePointer(SB), NOSPLIT, $0-16
    56  	BR ·Store64(SB)
    57  
    58  TEXT ·StoreInt32(SB), NOSPLIT, $0-12
    59  	BR ·Store(SB)
    60  
    61  TEXT ·StoreInt64(SB), NOSPLIT, $0-16
    62  	BR ·Store64(SB)
    63  
    64  //
    65  // StoreRel
    66  //
    67  
    68  TEXT ·StoreRel32(SB), NOSPLIT, $0-12
    69  	MOVD ptr+0(FP), R3
    70  	MOVW val+8(FP), R4
    71  	LWSYNC
    72  	MOVW R4, 0(R3)
    73  	RET
    74  
    75  TEXT ·StoreRel64(SB), NOSPLIT, $0-16
    76  	MOVD ptr+0(FP), R3
    77  	MOVD val+8(FP), R4
    78  	LWSYNC
    79  	MOVD R4, 0(R3)
    80  	RET
    81  
    82  TEXT ·StoreRelUintptr(SB), NOSPLIT, $0-16
    83  	BR ·StoreRel64(SB)
    84  
    85  //
    86  // Load
    87  //
    88  
    89  // uint8 ·Load8(uint8 volatile* ptr)
    90  TEXT ·Load8(SB),NOSPLIT|NOFRAME,$-8-9
    91  	MOVD ptr+0(FP), R3
    92  	SYNC
    93  	MOVBZ 0(R3), R3
    94  	CMP R3, R3, CR7
    95  	BC 4, 30, 1(PC) // bne- cr7,0x4
    96  	ISYNC
    97  	MOVB R3, ret+8(FP)
    98  	RET
    99  
   100  // uint32 ·Load32(uint32 volatile* ptr)
   101  TEXT ·Load32(SB),NOSPLIT|NOFRAME,$-8-12
   102  	MOVD ptr+0(FP), R3
   103  	SYNC
   104  	MOVWZ 0(R3), R3
   105  	CMPW R3, R3, CR7
   106  	BC 4, 30, 1(PC) // bne- cr7,0x4
   107  	ISYNC
   108  	MOVW R3, ret+8(FP)
   109  	RET
   110  
   111  // uint64 ·Load64(uint64 volatile* ptr)
   112  TEXT ·Load64(SB),NOSPLIT|NOFRAME,$-8-16
   113  	MOVD ptr+0(FP), R3
   114  	SYNC
   115  	MOVD 0(R3), R3
   116  	CMP R3, R3, CR7
   117  	BC 4, 30, 1(PC) // bne- cr7,0x4
   118  	ISYNC
   119  	MOVD R3, ret+8(FP)
   120  	RET
   121  
   122  TEXT ·LoadUintptr(SB),  NOSPLIT|NOFRAME, $0-16
   123  	BR ·Load64(SB)
   124  
   125  // void *·LoadPointer(void *volatile *ptr)
   126  TEXT ·LoadPointer(SB),NOSPLIT|NOFRAME,$-8-16
   127  	MOVD ptr+0(FP), R3
   128  	SYNC
   129  	MOVD 0(R3), R3
   130  	CMP R3, R3, CR7
   131  	BC 4, 30, 1(PC) // bne- cr7,0x4
   132  	ISYNC
   133  	MOVD R3, ret+8(FP)
   134  	RET
   135  
   136  TEXT ·LoadUint(SB), NOSPLIT|NOFRAME, $0-16
   137  	BR ·Load64(SB)
   138  
   139  TEXT ·LoadInt32(SB), NOSPLIT, $0-12
   140  	BR ·Load32(SB)
   141  
   142  TEXT ·LoadInt64(SB), NOSPLIT, $0-16
   143  	BR ·Load64(SB)
   144  
   145  //
   146  // LoadAcq
   147  //
   148  
   149  // uint32 ·LoadAcq32(uint32 volatile* ptr)
   150  TEXT ·LoadAcq32(SB),NOSPLIT|NOFRAME,$-8-12
   151  	MOVD   ptr+0(FP), R3
   152  	MOVWZ  0(R3), R3
   153  	CMPW   R3, R3, CR7
   154  	BC     4, 30, 1(PC) // bne- cr7, 0x4
   155  	ISYNC
   156  	MOVW   R3, ret+8(FP)
   157  	RET
   158  
   159  // uint64 ·LoadAcq64(uint64 volatile* ptr)
   160  TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$-8-16
   161  	MOVD   ptr+0(FP), R3
   162  	MOVD   0(R3), R3
   163  	CMP    R3, R3, CR7
   164  	BC     4, 30, 1(PC) // bne- cr7, 0x4
   165  	ISYNC
   166  	MOVD   R3, ret+8(FP)
   167  	RET
   168  
   169  TEXT ·LoadAcqUintptr(SB),  NOSPLIT|NOFRAME, $0-16
   170  	BR ·LoadAcq64(SB)
   171  
   172  //
   173  // bitwise
   174  //
   175  
   176  // void ·Or8(byte volatile*, byte);
   177  TEXT ·Or8(SB), NOSPLIT, $0-9
   178  	MOVD ptr+0(FP), R3
   179  	MOVBZ val+8(FP), R4
   180  	LWSYNC
   181  again:
   182  	LBAR (R3), R6
   183  	OR R4, R6
   184  	STBCCC R6, (R3)
   185  	BNE again
   186  	RET
   187  
   188  // func Or32(addr *uint32, v uint32)
   189  TEXT ·Or32(SB), NOSPLIT, $0-12
   190  	MOVD ptr+0(FP), R3
   191  	MOVW val+8(FP), R4
   192  	LWSYNC
   193  again:
   194  	LWAR (R3), R6
   195  	OR R4, R6
   196  	STWCCC R6, (R3)
   197  	BNE again
   198  	RET
   199  
   200  // void ·And8(byte volatile*, byte);
   201  TEXT ·And8(SB), NOSPLIT, $0-9
   202  	MOVD ptr+0(FP), R3
   203  	MOVBZ val+8(FP), R4
   204  	LWSYNC
   205  again:
   206  	LBAR (R3), R6
   207  	AND R4, R6
   208  	STBCCC R6, (R3)
   209  	BNE again
   210  	RET
   211  
   212  // func And32(addr *uint32, v uint32)
   213  TEXT ·And32(SB), NOSPLIT, $0-12
   214  	MOVD ptr+0(FP), R3
   215  	MOVW val+8(FP), R4
   216  	LWSYNC
   217  again:
   218  	LWAR (R3),R6
   219  	AND R4, R6
   220  	STWCCC R6, (R3)
   221  	BNE again
   222  	RET
   223  
   224  //
   225  // Swap
   226  //
   227  
   228  // uint32 Swap32(ptr *uint32, new uint32)
   229  TEXT ·Swap32(SB), NOSPLIT, $0-20
   230  	MOVD ptr+0(FP), R4
   231  	MOVW new+8(FP), R5
   232  	LWSYNC
   233  	LWAR (R4), R3
   234  	STWCCC R5, (R4)
   235  	BNE -2(PC)
   236  	ISYNC
   237  	MOVW R3, ret+16(FP)
   238  	RET
   239  
   240  // uint64 Swap64(ptr *uint64, new uint64)
   241  TEXT ·Swap64(SB), NOSPLIT, $0-24
   242  	MOVD ptr+0(FP), R4
   243  	MOVD new+8(FP), R5
   244  	LWSYNC
   245  	LDAR (R4), R3
   246  	STDCCC R5, (R4)
   247  	BNE -2(PC)
   248  	ISYNC
   249  	MOVD R3, ret+16(FP)
   250  	RET
   251  
   252  TEXT ·SwapUintptr(SB), NOSPLIT, $0-24
   253  	BR ·Swap64(SB)
   254  
   255  TEXT ·SwapInt32(SB), NOSPLIT, $0-20
   256  	BR ·Swap32(SB)
   257  
   258  TEXT ·SwapInt64(SB), NOSPLIT, $0-24
   259  	BR ·Swap64(SB)
   260  
   261  //
   262  // Add
   263  //
   264  
   265  // uint32 Add32(uint32 volatile *ptr, int32 delta)
   266  TEXT ·Add32(SB), NOSPLIT, $0-20
   267  	MOVD ptr+0(FP), R4
   268  	MOVW delta+8(FP), R5
   269  	LWSYNC
   270  	LWAR (R4), R3
   271  	ADD R5, R3
   272  	STWCCC R3, (R4)
   273  	BNE -3(PC)
   274  	MOVW R3, ret+16(FP)
   275  	RET
   276  
   277  // uint64 Add64(uint64 volatile *val, int64 delta)
   278  TEXT ·Add64(SB), NOSPLIT, $0-24
   279  	MOVD ptr+0(FP), R4
   280  	MOVD delta+8(FP), R5
   281  	LWSYNC
   282  	LDAR (R4), R3
   283  	ADD R5, R3
   284  	STDCCC R3, (R4)
   285  	BNE -3(PC)
   286  	MOVD R3, ret+16(FP)
   287  	RET
   288  
   289  TEXT ·AddUintptr(SB), NOSPLIT, $0-24
   290  	BR ·Add64(SB)
   291  
   292  TEXT ·AddInt32(SB), NOSPLIT, $0-20
   293  	BR ·Add32(SB)
   294  
   295  TEXT ·AddInt64(SB), NOSPLIT, $0-24
   296  	BR ·Add64(SB)
   297  
   298  //
   299  // Compare and swap
   300  //
   301  
   302  // bool Cas32(uint32 *ptr, uint32 old, uint32 new)
   303  TEXT ·Cas32(SB), NOSPLIT, $0-17
   304  	MOVD ptr+0(FP), R3
   305  	MOVWZ old+8(FP), R4
   306  	MOVWZ new+12(FP), R5
   307  	LWSYNC
   308  cas_again:
   309  	LWAR (R3), R6
   310  	CMPW R6, R4
   311  	BNE cas_fail
   312  	STWCCC R5, (R3)
   313  	BNE cas_again
   314  	MOVD $1, R3
   315  	LWSYNC
   316  	MOVB R3, ret+16(FP)
   317  	RET
   318  cas_fail:
   319  	MOVB R0, ret+16(FP)
   320  	RET
   321  
   322  // bool ·Cas64(uint64 *ptr, uint64 old, uint64 new)
   323  TEXT ·Cas64(SB), NOSPLIT, $0-25
   324  	MOVD ptr+0(FP), R3
   325  	MOVD old+8(FP), R4
   326  	MOVD new+16(FP), R5
   327  	LWSYNC
   328  cas64_again:
   329  	LDAR (R3), R6
   330  	CMP R6, R4
   331  	BNE cas64_fail
   332  	STDCCC R5, (R3)
   333  	BNE cas64_again
   334  	MOVD $1, R3
   335  	LWSYNC
   336  	MOVB R3, ret+24(FP)
   337  	RET
   338  cas64_fail:
   339  	MOVB R0, ret+24(FP)
   340  	RET
   341  
   342  TEXT ·CasUintptr(SB), NOSPLIT, $0-25
   343  	BR ·Cas64(SB)
   344  
   345  // bool CasUnsafePointer(void **val, void *old, void *new)
   346  TEXT ·CasUnsafePointer(SB), NOSPLIT, $0-25
   347  	BR ·Cas64(SB)
   348  
   349  TEXT ·CasInt32(SB), NOSPLIT, $0-17
   350  	BR ·Cas32(SB)
   351  
   352  TEXT ·CasInt64(SB), NOSPLIT, $0-25
   353  	BR ·Cas64(SB)
   354  
   355  //
   356  // CasRel
   357  //
   358  
   359  TEXT ·CasRel32(SB), NOSPLIT, $0-17
   360  	MOVD    ptr+0(FP), R3
   361  	MOVWZ   old+8(FP), R4
   362  	MOVWZ   new+12(FP), R5
   363  	LWSYNC
   364  cas_again:
   365  	LWAR    (R3), $0, R6        // 0 = Mutex release hint
   366  	CMPW    R6, R4
   367  	BNE     cas_fail
   368  	STWCCC  R5, (R3)
   369  	BNE     cas_again
   370  	MOVD    $1, R3
   371  	MOVB    R3, ret+16(FP)
   372  	RET
   373  cas_fail:
   374  	MOVB    R0, ret+16(FP)
   375  	RET