github.com/x04/go/src@v0.0.0-20200202162449-3d481ceb3525/runtime/race_ppc64le.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build race
     6  
     7  #include "go_asm.h"
     8  #include "go_tls.h"
     9  #include "funcdata.h"
    10  #include "textflag.h"
    11  #include "asm_ppc64x.h"
    12  
    13  // The following functions allow calling the clang-compiled race runtime directly
    14  // from Go code without going all the way through cgo.
    15  // First, it's much faster (up to 50% speedup for real Go programs).
    16  // Second, it eliminates race-related special cases from cgocall and scheduler.
    17  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    18  
    19  // A brief recap of the ppc64le calling convention.
    20  // Arguments are passed in R3, R4, R5 ...
    21  // SP must be 16-byte aligned.
    22  
    23  // Note that for ppc64x, LLVM follows the standard ABI and
    24  // expects arguments in registers, so these functions move
    25  // the arguments from storage to the registers expected
    26  // by the ABI.
    27  
    28  // When calling from Go to Clang tsan code:
    29  // R3 is the 1st argument and is usually the ThreadState*
    30  // R4-? are the 2nd, 3rd, 4th, etc. arguments
    31  
    32  // When calling racecalladdr:
    33  // R8 is the call target address
    34  
    35  // The race ctx is passed in R3 and loaded in
    36  // racecalladdr.
    37  //
    38  // The sequence used to get the race ctx:
    39  //    MOVD    runtime·tls_g(SB), R10	// offset to TLS
    40  //    MOVD    0(R13)(R10*1), g		// R13=TLS for this thread, g = R30
    41  //    MOVD    g_racectx(g), R3		// racectx == ThreadState
    42  
    43  // func runtime·RaceRead(addr uintptr)
    44  // Called from instrumented Go code
    45  TEXT	runtime·raceread(SB), NOSPLIT, $0-8
    46  	MOVD	addr+0(FP), R4
    47  	MOVD	LR, R5 // caller of this?
    48  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    49  	MOVD	$__tsan_read(SB), R8
    50  	BR	racecalladdr<>(SB)
    51  
    52  TEXT    runtime·RaceRead(SB), NOSPLIT, $0-8
    53  	BR	runtime·raceread(SB)
    54  
    55  // void runtime·racereadpc(void *addr, void *callpc, void *pc)
    56  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    57  	MOVD	addr+0(FP), R4
    58  	MOVD	callpc+8(FP), R5
    59  	MOVD	pc+16(FP), R6
    60  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    61  	MOVD	$__tsan_read_pc(SB), R8
    62  	BR	racecalladdr<>(SB)
    63  
    64  // func runtime·RaceWrite(addr uintptr)
    65  // Called from instrumented Go code
    66  TEXT	runtime·racewrite(SB), NOSPLIT, $0-8
    67  	MOVD	addr+0(FP), R4
    68  	MOVD	LR, R5 // caller has set LR via BL inst
    69  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    70  	MOVD	$__tsan_write(SB), R8
    71  	BR	racecalladdr<>(SB)
    72  
    73  TEXT    runtime·RaceWrite(SB), NOSPLIT, $0-8
    74  	JMP	runtime·racewrite(SB)
    75  
    76  // void runtime·racewritepc(void *addr, void *callpc, void *pc)
    77  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    78  	MOVD	addr+0(FP), R4
    79  	MOVD	callpc+8(FP), R5
    80  	MOVD	pc+16(FP), R6
    81  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    82  	MOVD	$__tsan_write_pc(SB), R8
    83  	BR	racecalladdr<>(SB)
    84  
    85  // func runtime·RaceReadRange(addr, size uintptr)
    86  // Called from instrumented Go code.
    87  TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
    88  	MOVD	addr+0(FP), R4
    89  	MOVD	size+8(FP), R5
    90  	MOVD	LR, R6
    91  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    92  	MOVD	$__tsan_read_range(SB), R8
    93  	BR	racecalladdr<>(SB)
    94  
    95  // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
    96  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
    97  	MOVD    addr+0(FP), R4
    98  	MOVD    size+8(FP), R5
    99  	MOVD    pc+16(FP), R6
   100  	ADD	$4, R6		// tsan wants return addr
   101  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   102  	MOVD    $__tsan_read_range(SB), R8
   103  	BR	racecalladdr<>(SB)
   104  
   105  TEXT    runtime·RaceReadRange(SB), NOSPLIT, $0-16
   106  	BR	runtime·racereadrange(SB)
   107  
   108  // func runtime·RaceWriteRange(addr, size uintptr)
   109  // Called from instrumented Go code.
   110  TEXT	runtime·racewriterange(SB), NOSPLIT, $0-16
   111  	MOVD	addr+0(FP), R4
   112  	MOVD	size+8(FP), R5
   113  	MOVD	LR, R6
   114  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   115  	MOVD	$__tsan_write_range(SB), R8
   116  	BR	racecalladdr<>(SB)
   117  
   118  TEXT    runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   119  	BR	runtime·racewriterange(SB)
   120  
   121  // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   122  // Called from instrumented Go code
   123  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   124  	MOVD	addr+0(FP), R4
   125  	MOVD	size+8(FP), R5
   126  	MOVD	pc+16(FP), R6
   127  	ADD	$4, R6			// add 4 to inst offset?
   128  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   129  	MOVD	$__tsan_write_range(SB), R8
   130  	BR	racecalladdr<>(SB)
   131  
   132  // Call a __tsan function from Go code.
   133  // R8 = tsan function address
   134  // R3 = *ThreadState a.k.a. g_racectx from g
   135  // R4 = addr passed to __tsan function
   136  //
   137  // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   138  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   139  	MOVD    runtime·tls_g(SB), R10
   140  	MOVD	0(R13)(R10*1), g
   141  	MOVD	g_racectx(g), R3	// goroutine context
   142  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   143  	MOVD	runtime·racearenastart(SB), R9
   144  	CMP	R4, R9
   145  	BLT	data
   146  	MOVD	runtime·racearenaend(SB), R9
   147  	CMP	R4, R9
   148  	BLT	call
   149  data:
   150  	MOVD	runtime·racedatastart(SB), R9
   151  	CMP	R4, R9
   152  	BLT	ret
   153  	MOVD	runtime·racedataend(SB), R9
   154  	CMP	R4, R9
   155  	BGT	ret
   156  call:
   157  	// Careful!! racecall will save LR on its
   158  	// stack, which is OK as long as racecalladdr
   159  	// doesn't change in a way that generates a stack.
   160  	// racecall should return to the caller of
   161  	// recalladdr.
   162  	BR	racecall<>(SB)
   163  ret:
   164  	RET
   165  
   166  // func runtime·racefuncenterfp()
   167  // Called from instrumented Go code.
   168  // Like racefuncenter but doesn't pass an arg, uses the caller pc
   169  // from the first slot on the stack.
   170  TEXT	runtime·racefuncenterfp(SB), NOSPLIT, $0-0
   171  	MOVD	0(R1), R8
   172  	BR	racefuncenter<>(SB)
   173  
   174  // func runtime·racefuncenter(pc uintptr)
   175  // Called from instrumented Go code.
   176  // Not used now since gc/racewalk.go doesn't pass the
   177  // correct caller pc and racefuncenterfp can do it.
   178  TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   179  	MOVD	callpc+0(FP), R8
   180  	BR	racefuncenter<>(SB)
   181  
   182  // Common code for racefuncenter/racefuncenterfp
   183  // R11 = caller's return address
   184  TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   185  	MOVD    runtime·tls_g(SB), R10
   186  	MOVD    0(R13)(R10*1), g
   187  	MOVD    g_racectx(g), R3        // goroutine racectx aka *ThreadState
   188  	MOVD	R8, R4			// caller pc set by caller in R8
   189  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   190  	MOVD	$__tsan_func_enter(SB), R8
   191  	BR	racecall<>(SB)
   192  	RET
   193  
   194  // func runtime·racefuncexit()
   195  // Called from Go instrumented code.
   196  TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   197  	MOVD    runtime·tls_g(SB), R10
   198  	MOVD    0(R13)(R10*1), g
   199  	MOVD    g_racectx(g), R3        // goroutine racectx aka *ThreadState
   200  	// void __tsan_func_exit(ThreadState *thr);
   201  	MOVD	$__tsan_func_exit(SB), R8
   202  	BR	racecall<>(SB)
   203  
   204  // Atomic operations for sync/atomic package.
   205  // Some use the __tsan versions instead
   206  // R6 = addr of arguments passed to this function
   207  // R3, R4, R5 set in racecallatomic
   208  
   209  // Load atomic in tsan
   210  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0
   211  	// void __tsan_go_atomic32_load(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
   212  	MOVD	$__tsan_go_atomic32_load(SB), R8
   213  	ADD	$32, R1, R6	// addr of caller's 1st arg
   214  	BR	racecallatomic<>(SB)
   215  	RET
   216  
   217  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0
   218  	// void __tsan_go_atomic64_load(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
   219  	MOVD	$__tsan_go_atomic64_load(SB), R8
   220  	ADD	$32, R1, R6	// addr of caller's 1st arg
   221  	BR	racecallatomic<>(SB)
   222  	RET
   223  
   224  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0
   225  	BR	sync∕atomic·LoadInt32(SB)
   226  
   227  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0
   228  	BR	sync∕atomic·LoadInt64(SB)
   229  
   230  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0
   231  	BR	sync∕atomic·LoadInt64(SB)
   232  
   233  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0
   234  	BR	sync∕atomic·LoadInt64(SB)
   235  
   236  // Store atomic in tsan
   237  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0
   238  	// void __tsan_go_atomic32_store(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
   239  	MOVD	$__tsan_go_atomic32_store(SB), R8
   240  	ADD	$32, R1, R6	// addr of caller's 1st arg
   241  	BR	racecallatomic<>(SB)
   242  
   243  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0
   244  	// void __tsan_go_atomic64_store(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
   245  	MOVD	$__tsan_go_atomic64_store(SB), R8
   246  	ADD	$32, R1, R6	// addr of caller's 1st arg
   247  	BR	racecallatomic<>(SB)
   248  
   249  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0
   250  	BR	sync∕atomic·StoreInt32(SB)
   251  
   252  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0
   253  	BR	sync∕atomic·StoreInt64(SB)
   254  
   255  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0
   256  	BR	sync∕atomic·StoreInt64(SB)
   257  
   258  // Swap in tsan
   259  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0
   260  	// void __tsan_go_atomic32_exchange(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
   261  	MOVD	$__tsan_go_atomic32_exchange(SB), R8
   262  	ADD	$32, R1, R6	// addr of caller's 1st arg
   263  	BR	racecallatomic<>(SB)
   264  
   265  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0
   266  	// void __tsan_go_atomic64_exchange(ThreadState *thr, uptr cpc, uptr pc, u8 *a)
   267  	MOVD	$__tsan_go_atomic64_exchange(SB), R8
   268  	ADD	$32, R1, R6	// addr of caller's 1st arg
   269  	BR	racecallatomic<>(SB)
   270  
   271  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0
   272  	BR	sync∕atomic·SwapInt32(SB)
   273  
   274  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0
   275  	BR	sync∕atomic·SwapInt64(SB)
   276  
   277  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0
   278  	BR	sync∕atomic·SwapInt64(SB)
   279  
   280  // Add atomic in tsan
   281  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-0
   282  	// void __tsan_go_atomic32_fetch_add(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
   283  	MOVD	$__tsan_go_atomic32_fetch_add(SB), R8
   284  	ADD	$64, R1, R6	// addr of caller's 1st arg
   285  	BL	racecallatomic<>(SB)
   286  	// The tsan fetch_add result is not as expected by Go,
   287  	// so the 'add' must be added to the result.
   288  	MOVW	add+8(FP), R3	// The tsa fetch_add does not return the
   289  	MOVW	ret+16(FP), R4	// result as expected by go, so fix it.
   290  	ADD	R3, R4, R3
   291  	MOVW	R3, ret+16(FP)
   292  	RET
   293  
   294  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-0
   295  	// void __tsan_go_atomic64_fetch_add(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
   296  	MOVD	$__tsan_go_atomic64_fetch_add(SB), R8
   297  	ADD	$64, R1, R6	// addr of caller's 1st arg
   298  	BL	racecallatomic<>(SB)
   299  	// The tsan fetch_add result is not as expected by Go,
   300  	// so the 'add' must be added to the result.
   301  	MOVD	add+8(FP), R3
   302  	MOVD	ret+16(FP), R4
   303  	ADD	R3, R4, R3
   304  	MOVD	R3, ret+16(FP)
   305  	RET
   306  
   307  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-0
   308  	BR	sync∕atomic·AddInt32(SB)
   309  
   310  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-0
   311  	BR	sync∕atomic·AddInt64(SB)
   312  
   313  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0
   314  	BR	sync∕atomic·AddInt64(SB)
   315  
   316  // CompareAndSwap in tsan
   317  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0
   318  	// void __tsan_go_atomic32_compare_exchange(
   319  	//   ThreadState *thr, uptr cpc, uptr pc, u8 *a)
   320  	MOVD	$__tsan_go_atomic32_compare_exchange(SB), R8
   321  	ADD	$32, R1, R6	// addr of caller's 1st arg
   322  	BR	racecallatomic<>(SB)
   323  
   324  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0
   325  	// void __tsan_go_atomic32_compare_exchange(
   326  	//   ThreadState *thr, uptr cpc, uptr pc, u8 *a)
   327  	MOVD	$__tsan_go_atomic64_compare_exchange(SB), R8
   328  	ADD	$32, R1, R6	// addr of caller's 1st arg
   329  	BR	racecallatomic<>(SB)
   330  
   331  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0
   332  	BR	sync∕atomic·CompareAndSwapInt32(SB)
   333  
   334  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0
   335  	BR	sync∕atomic·CompareAndSwapInt64(SB)
   336  
   337  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0
   338  	BR	sync∕atomic·CompareAndSwapInt64(SB)
   339  
   340  // Common function used to call tsan's atomic functions
   341  // R3 = *ThreadState
   342  // R4 = TODO: What's this supposed to be?
   343  // R5 = caller pc
   344  // R6 = addr of incoming arg list
   345  // R8 contains addr of target function.
   346  TEXT	racecallatomic<>(SB), NOSPLIT, $0-0
   347  	// Trigger SIGSEGV early if address passed to atomic function is bad.
   348  	MOVD	(R6), R7	// 1st arg is addr
   349  	MOVD	(R7), R9	// segv here if addr is bad
   350  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   351  	MOVD	runtime·racearenastart(SB), R9
   352  	CMP	R7, R9
   353  	BLT	racecallatomic_data
   354  	MOVD	runtime·racearenaend(SB), R9
   355  	CMP	R7, R9
   356  	BLT	racecallatomic_ok
   357  racecallatomic_data:
   358  	MOVD	runtime·racedatastart(SB), R9
   359  	CMP	R7, R9
   360  	BLT	racecallatomic_ignore
   361  	MOVD	runtime·racedataend(SB), R9
   362  	CMP	R7, R9
   363  	BGE	racecallatomic_ignore
   364  racecallatomic_ok:
   365  	// Addr is within the good range, call the atomic function.
   366  	MOVD    runtime·tls_g(SB), R10
   367  	MOVD    0(R13)(R10*1), g
   368  	MOVD    g_racectx(g), R3        // goroutine racectx aka *ThreadState
   369  	MOVD	R8, R5			// pc is the function called
   370  	MOVD	(R1), R4		// caller pc from stack
   371  	BL	racecall<>(SB)		// BL needed to maintain stack consistency
   372  	RET				//
   373  racecallatomic_ignore:
   374  	// Addr is outside the good range.
   375  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   376  	// An attempt to synchronize on the address would cause crash.
   377  	MOVD	R8, R15	// save the original function
   378  	MOVD	R6, R17 // save the original arg list addr
   379  	MOVD	$__tsan_go_ignore_sync_begin(SB), R8 // func addr to call
   380  	MOVD    runtime·tls_g(SB), R10
   381  	MOVD    0(R13)(R10*1), g
   382  	MOVD    g_racectx(g), R3        // goroutine context
   383  	BL	racecall<>(SB)
   384  	MOVD	R15, R8	// restore the original function
   385  	MOVD	R17, R6 // restore arg list addr
   386  	// Call the atomic function.
   387  	// racecall will call LLVM race code which might clobber r30 (g)
   388  	MOVD	runtime·tls_g(SB), R10
   389  	MOVD	0(R13)(R10*1), g
   390  
   391  	MOVD	g_racectx(g), R3
   392  	MOVD	R8, R4		// pc being called same TODO as above
   393  	MOVD	(R1), R5	// caller pc from latest LR
   394  	BL	racecall<>(SB)
   395  	// Call __tsan_go_ignore_sync_end.
   396  	MOVD	$__tsan_go_ignore_sync_end(SB), R8
   397  	MOVD	g_racectx(g), R3	// goroutine context g should sitll be good?
   398  	BL	racecall<>(SB)
   399  	RET
   400  
   401  // void runtime·racecall(void(*f)(...), ...)
   402  // Calls C function f from race runtime and passes up to 4 arguments to it.
   403  // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   404  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   405  	MOVD	fn+0(FP), R8
   406  	MOVD	arg0+8(FP), R3
   407  	MOVD	arg1+16(FP), R4
   408  	MOVD	arg2+24(FP), R5
   409  	MOVD	arg3+32(FP), R6
   410  	JMP	racecall<>(SB)
   411  
   412  // Finds g0 and sets its stack
   413  // Arguments were loaded for call from Go to C
   414  TEXT	racecall<>(SB), NOSPLIT, $0-0
   415  	// Set the LR slot for the ppc64 ABI
   416  	MOVD	LR, R10
   417  	MOVD	R10, 0(R1)	// Go expectation
   418  	MOVD	R10, 16(R1)	// C ABI
   419  	// Get info from the current goroutine
   420  	MOVD    runtime·tls_g(SB), R10	// g offset in TLS
   421  	MOVD    0(R13)(R10*1), g	// R13 = current TLS
   422  	MOVD	g_m(g), R7		// m for g
   423  	MOVD	R1, R16			// callee-saved, preserved across C call
   424  	MOVD	m_g0(R7), R10		// g0 for m
   425  	CMP	R10, g			// same g0?
   426  	BEQ	call			// already on g0
   427  	MOVD	(g_sched+gobuf_sp)(R10), R1 // switch R1
   428  call:
   429  	MOVD	R8, CTR			// R8 = caller addr
   430  	MOVD	R8, R12			// expected by PPC64 ABI
   431  	BL	(CTR)
   432  	XOR     R0, R0			// clear R0 on return from Clang
   433  	MOVD	R16, R1			// restore R1; R16 nonvol in Clang
   434  	MOVD    runtime·tls_g(SB), R10	// find correct g
   435  	MOVD    0(R13)(R10*1), g
   436  	MOVD	16(R1), R10		// LR was saved away, restore for return
   437  	MOVD	R10, LR
   438  	RET
   439  
   440  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   441  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   442  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   443  // RARG0 contains command code. RARG1 contains command-specific context.
   444  // See racecallback for command codes.
   445  TEXT	runtime·racecallbackthunk(SB), NOSPLIT, $-8
   446  	// Handle command raceGetProcCmd (0) here.
   447  	// First, code below assumes that we are on curg, while raceGetProcCmd
   448  	// can be executed on g0. Second, it is called frequently, so will
   449  	// benefit from this fast path.
   450  	XOR	R0, R0		// clear R0 since we came from C code
   451  	CMP	R3, $0
   452  	BNE	rest
   453  	// g0 TODO: Don't modify g here since R30 is nonvolatile
   454  	MOVD	g, R9
   455  	MOVD    runtime·tls_g(SB), R10
   456  	MOVD    0(R13)(R10*1), g
   457  	MOVD	g_m(g), R3
   458  	MOVD	m_p(R3), R3
   459  	MOVD	p_raceprocctx(R3), R3
   460  	MOVD	R3, (R4)
   461  	MOVD	R9, g		// restore R30 ??
   462  	RET
   463  
   464  	// This is all similar to what cgo does
   465  	// Save registers according to the ppc64 ABI
   466  rest:
   467  	MOVD	LR, R10	// save link register
   468  	MOVD	R10, 16(R1)
   469  	MOVW	CR, R10
   470  	MOVW	R10, 8(R1)
   471  	MOVDU   R1, -336(R1) // Allocate frame needed for outargs and register save area
   472  
   473  	MOVD    R14, 328(R1)
   474  	MOVD    R15, 48(R1)
   475  	MOVD    R16, 56(R1)
   476  	MOVD    R17, 64(R1)
   477  	MOVD    R18, 72(R1)
   478  	MOVD    R19, 80(R1)
   479  	MOVD    R20, 88(R1)
   480  	MOVD    R21, 96(R1)
   481  	MOVD    R22, 104(R1)
   482  	MOVD    R23, 112(R1)
   483  	MOVD    R24, 120(R1)
   484  	MOVD    R25, 128(R1)
   485  	MOVD    R26, 136(R1)
   486  	MOVD    R27, 144(R1)
   487  	MOVD    R28, 152(R1)
   488  	MOVD    R29, 160(R1)
   489  	MOVD    g, 168(R1) // R30
   490  	MOVD    R31, 176(R1)
   491  	FMOVD   F14, 184(R1)
   492  	FMOVD   F15, 192(R1)
   493  	FMOVD   F16, 200(R1)
   494  	FMOVD   F17, 208(R1)
   495  	FMOVD   F18, 216(R1)
   496  	FMOVD   F19, 224(R1)
   497  	FMOVD   F20, 232(R1)
   498  	FMOVD   F21, 240(R1)
   499  	FMOVD   F22, 248(R1)
   500  	FMOVD   F23, 256(R1)
   501  	FMOVD   F24, 264(R1)
   502  	FMOVD   F25, 272(R1)
   503  	FMOVD   F26, 280(R1)
   504  	FMOVD   F27, 288(R1)
   505  	FMOVD   F28, 296(R1)
   506  	FMOVD   F29, 304(R1)
   507  	FMOVD   F30, 312(R1)
   508  	FMOVD   F31, 320(R1)
   509  
   510  	MOVD	R3, FIXED_FRAME+0(R1)
   511  	MOVD	R4, FIXED_FRAME+8(R1)
   512  
   513  	MOVD    runtime·tls_g(SB), R10
   514  	MOVD    0(R13)(R10*1), g
   515  
   516  	MOVD	g_m(g), R7
   517  	MOVD	m_g0(R7), R8
   518  	CMP	g, R8
   519  	BEQ	noswitch
   520  
   521  	MOVD	R8, g // set g = m-> g0
   522  
   523  	BL	runtime·racecallback(SB)
   524  
   525  	// All registers are clobbered after Go code, reload.
   526  	MOVD    runtime·tls_g(SB), R10
   527  	MOVD    0(R13)(R10*1), g
   528  
   529  	MOVD	g_m(g), R7
   530  	MOVD	m_curg(R7), g // restore g = m->curg
   531  
   532  ret:
   533  	MOVD    328(R1), R14
   534  	MOVD    48(R1), R15
   535  	MOVD    56(R1), R16
   536  	MOVD    64(R1), R17
   537  	MOVD    72(R1), R18
   538  	MOVD    80(R1), R19
   539  	MOVD    88(R1), R20
   540  	MOVD    96(R1), R21
   541  	MOVD    104(R1), R22
   542  	MOVD    112(R1), R23
   543  	MOVD    120(R1), R24
   544  	MOVD    128(R1), R25
   545  	MOVD    136(R1), R26
   546  	MOVD    144(R1), R27
   547  	MOVD    152(R1), R28
   548  	MOVD    160(R1), R29
   549  	MOVD    168(R1), g // R30
   550  	MOVD    176(R1), R31
   551  	FMOVD   184(R1), F14
   552  	FMOVD   192(R1), F15
   553  	FMOVD   200(R1), F16
   554  	FMOVD   208(R1), F17
   555  	FMOVD   216(R1), F18
   556  	FMOVD   224(R1), F19
   557  	FMOVD   232(R1), F20
   558  	FMOVD   240(R1), F21
   559  	FMOVD   248(R1), F22
   560  	FMOVD   256(R1), F23
   561  	FMOVD   264(R1), F24
   562  	FMOVD   272(R1), F25
   563  	FMOVD   280(R1), F26
   564  	FMOVD   288(R1), F27
   565  	FMOVD   296(R1), F28
   566  	FMOVD   304(R1), F29
   567  	FMOVD   312(R1), F30
   568  	FMOVD   320(R1), F31
   569  
   570  	ADD     $336, R1
   571  	MOVD    8(R1), R10
   572  	MOVFL   R10, $0xff // Restore of CR
   573  	MOVD    16(R1), R10	// needed?
   574  	MOVD    R10, LR
   575  	RET
   576  
   577  noswitch:
   578  	BL      runtime·racecallback(SB)
   579  	JMP     ret
   580  
   581  // tls_g, g value for each thread in TLS
   582  GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8