github.com/rafaeltorres324/go/src@v0.0.0-20210519164414-9fdf653a9838/runtime/race_arm64.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build race
     6  
     7  #include "go_asm.h"
     8  #include "funcdata.h"
     9  #include "textflag.h"
    10  #include "tls_arm64.h"
    11  
    12  // The following thunks allow calling the gcc-compiled race runtime directly
    13  // from Go code without going all the way through cgo.
    14  // First, it's much faster (up to 50% speedup for real Go programs).
    15  // Second, it eliminates race-related special cases from cgocall and scheduler.
    16  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    17  
    18  // A brief recap of the arm64 calling convention.
    19  // Arguments are passed in R0...R7, the rest is on stack.
    20  // Callee-saved registers are: R19...R28.
    21  // Temporary registers are: R9...R15
    22  // SP must be 16-byte aligned.
    23  
    24  // When calling racecalladdr, R9 is the call target address.
    25  
    26  // The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
    27  
    28  // Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s)
    29  // No-op on other OSes.
    30  #ifdef TLS_darwin
    31  #define TP_ALIGN	AND	$~7, R0
    32  #else
    33  #define TP_ALIGN
    34  #endif
    35  
    36  // Load g from TLS. (See tls_arm64.s)
    37  #define load_g \
    38  	MRS_TPIDR_R0 \
    39  	TP_ALIGN \
    40  	MOVD    runtime·tls_g(SB), R11 \
    41  	MOVD    (R0)(R11), g
    42  
    43  // func runtime·raceread(addr uintptr)
    44  // Called from instrumented code.
    45  TEXT	runtime·raceread(SB), NOSPLIT, $0-8
    46  	MOVD	addr+0(FP), R1
    47  	MOVD	LR, R2
    48  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    49  	MOVD	$__tsan_read(SB), R9
    50  	JMP	racecalladdr<>(SB)
    51  
    52  // func runtime·RaceRead(addr uintptr)
    53  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    54  	// This needs to be a tail call, because raceread reads caller pc.
    55  	JMP	runtime·raceread(SB)
    56  
    57  // func runtime·racereadpc(void *addr, void *callpc, void *pc)
    58  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    59  	MOVD	addr+0(FP), R1
    60  	MOVD	callpc+8(FP), R2
    61  	MOVD	pc+16(FP), R3
    62  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    63  	MOVD	$__tsan_read_pc(SB), R9
    64  	JMP	racecalladdr<>(SB)
    65  
    66  // func runtime·racewrite(addr uintptr)
    67  // Called from instrumented code.
    68  TEXT	runtime·racewrite(SB), NOSPLIT, $0-8
    69  	MOVD	addr+0(FP), R1
    70  	MOVD	LR, R2
    71  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    72  	MOVD	$__tsan_write(SB), R9
    73  	JMP	racecalladdr<>(SB)
    74  
    75  // func runtime·RaceWrite(addr uintptr)
    76  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    77  	// This needs to be a tail call, because racewrite reads caller pc.
    78  	JMP	runtime·racewrite(SB)
    79  
    80  // func runtime·racewritepc(void *addr, void *callpc, void *pc)
    81  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    82  	MOVD	addr+0(FP), R1
    83  	MOVD	callpc+8(FP), R2
    84  	MOVD	pc+16(FP), R3
    85  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    86  	MOVD	$__tsan_write_pc(SB), R9
    87  	JMP	racecalladdr<>(SB)
    88  
    89  // func runtime·racereadrange(addr, size uintptr)
    90  // Called from instrumented code.
    91  TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
    92  	MOVD	addr+0(FP), R1
    93  	MOVD	size+8(FP), R2
    94  	MOVD	LR, R3
    95  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    96  	MOVD	$__tsan_read_range(SB), R9
    97  	JMP	racecalladdr<>(SB)
    98  
    99  // func runtime·RaceReadRange(addr, size uintptr)
   100  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   101  	// This needs to be a tail call, because racereadrange reads caller pc.
   102  	JMP	runtime·racereadrange(SB)
   103  
   104  // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   105  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   106  	MOVD	addr+0(FP), R1
   107  	MOVD	size+8(FP), R2
   108  	MOVD	pc+16(FP), R3
   109  	ADD	$4, R3	// pc is function start, tsan wants return address.
   110  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   111  	MOVD	$__tsan_read_range(SB), R9
   112  	JMP	racecalladdr<>(SB)
   113  
   114  // func runtime·racewriterange(addr, size uintptr)
   115  // Called from instrumented code.
   116  TEXT	runtime·racewriterange(SB), NOSPLIT, $0-16
   117  	MOVD	addr+0(FP), R1
   118  	MOVD	size+8(FP), R2
   119  	MOVD	LR, R3
   120  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   121  	MOVD	$__tsan_write_range(SB), R9
   122  	JMP	racecalladdr<>(SB)
   123  
   124  // func runtime·RaceWriteRange(addr, size uintptr)
   125  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   126  	// This needs to be a tail call, because racewriterange reads caller pc.
   127  	JMP	runtime·racewriterange(SB)
   128  
   129  // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   130  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   131  	MOVD	addr+0(FP), R1
   132  	MOVD	size+8(FP), R2
   133  	MOVD	pc+16(FP), R3
   134  	ADD	$4, R3	// pc is function start, tsan wants return address.
   135  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   136  	MOVD	$__tsan_write_range(SB), R9
   137  	JMP	racecalladdr<>(SB)
   138  
   139  // If addr (R1) is out of range, do nothing.
   140  // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   141  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   142  	load_g
   143  	MOVD	g_racectx(g), R0
   144  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   145  	MOVD	runtime·racearenastart(SB), R10
   146  	CMP	R10, R1
   147  	BLT	data
   148  	MOVD	runtime·racearenaend(SB), R10
   149  	CMP	R10, R1
   150  	BLT	call
   151  data:
   152  	MOVD	runtime·racedatastart(SB), R10
   153  	CMP	R10, R1
   154  	BLT	ret
   155  	MOVD	runtime·racedataend(SB), R10
   156  	CMP	R10, R1
   157  	BGT	ret
   158  call:
   159  	JMP	racecall<>(SB)
   160  ret:
   161  	RET
   162  
   163  // func runtime·racefuncenterfp(fp uintptr)
   164  // Called from instrumented code.
   165  // Like racefuncenter but doesn't passes an arg, uses the caller pc
   166  // from the first slot on the stack
   167  TEXT	runtime·racefuncenterfp(SB), NOSPLIT, $0-0
   168  	MOVD	0(RSP), R9
   169  	JMP	racefuncenter<>(SB)
   170  
   171  // func runtime·racefuncenter(pc uintptr)
   172  // Called from instrumented code.
   173  TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   174  	MOVD	callpc+0(FP), R9
   175  	JMP	racefuncenter<>(SB)
   176  
   177  // Common code for racefuncenter/racefuncenterfp
   178  // R9 = caller's return address
   179  TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   180  	load_g
   181  	MOVD	g_racectx(g), R0	// goroutine racectx
   182  	MOVD	R9, R1
   183  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   184  	MOVD	$__tsan_func_enter(SB), R9
   185  	BL	racecall<>(SB)
   186  	RET
   187  
   188  // func runtime·racefuncexit()
   189  // Called from instrumented code.
   190  TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   191  	load_g
   192  	MOVD	g_racectx(g), R0	// race context
   193  	// void __tsan_func_exit(ThreadState *thr);
   194  	MOVD	$__tsan_func_exit(SB), R9
   195  	JMP	racecall<>(SB)
   196  
   197  // Atomic operations for sync/atomic package.
   198  // R3 = addr of arguments passed to this function, it can
   199  // be fetched at 40(RSP) in racecallatomic after two times BL
   200  // R0, R1, R2 set in racecallatomic
   201  
   202  // Load
   203  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
   204  	GO_ARGS
   205  	MOVD	$__tsan_go_atomic32_load(SB), R9
   206  	BL	racecallatomic<>(SB)
   207  	RET
   208  
   209  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
   210  	GO_ARGS
   211  	MOVD	$__tsan_go_atomic64_load(SB), R9
   212  	BL	racecallatomic<>(SB)
   213  	RET
   214  
   215  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
   216  	GO_ARGS
   217  	JMP	sync∕atomic·LoadInt32(SB)
   218  
   219  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
   220  	GO_ARGS
   221  	JMP	sync∕atomic·LoadInt64(SB)
   222  
   223  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
   224  	GO_ARGS
   225  	JMP	sync∕atomic·LoadInt64(SB)
   226  
   227  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
   228  	GO_ARGS
   229  	JMP	sync∕atomic·LoadInt64(SB)
   230  
   231  // Store
   232  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
   233  	GO_ARGS
   234  	MOVD	$__tsan_go_atomic32_store(SB), R9
   235  	BL	racecallatomic<>(SB)
   236  	RET
   237  
   238  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
   239  	GO_ARGS
   240  	MOVD	$__tsan_go_atomic64_store(SB), R9
   241  	BL	racecallatomic<>(SB)
   242  	RET
   243  
   244  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
   245  	GO_ARGS
   246  	JMP	sync∕atomic·StoreInt32(SB)
   247  
   248  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
   249  	GO_ARGS
   250  	JMP	sync∕atomic·StoreInt64(SB)
   251  
   252  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
   253  	GO_ARGS
   254  	JMP	sync∕atomic·StoreInt64(SB)
   255  
   256  // Swap
   257  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
   258  	GO_ARGS
   259  	MOVD	$__tsan_go_atomic32_exchange(SB), R9
   260  	BL	racecallatomic<>(SB)
   261  	RET
   262  
   263  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
   264  	GO_ARGS
   265  	MOVD	$__tsan_go_atomic64_exchange(SB), R9
   266  	BL	racecallatomic<>(SB)
   267  	RET
   268  
   269  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
   270  	GO_ARGS
   271  	JMP	sync∕atomic·SwapInt32(SB)
   272  
   273  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
   274  	GO_ARGS
   275  	JMP	sync∕atomic·SwapInt64(SB)
   276  
   277  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
   278  	GO_ARGS
   279  	JMP	sync∕atomic·SwapInt64(SB)
   280  
   281  // Add
   282  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
   283  	GO_ARGS
   284  	MOVD	$__tsan_go_atomic32_fetch_add(SB), R9
   285  	BL	racecallatomic<>(SB)
   286  	MOVW	add+8(FP), R0	// convert fetch_add to add_fetch
   287  	MOVW	ret+16(FP), R1
   288  	ADD	R0, R1, R0
   289  	MOVW	R0, ret+16(FP)
   290  	RET
   291  
   292  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
   293  	GO_ARGS
   294  	MOVD	$__tsan_go_atomic64_fetch_add(SB), R9
   295  	BL	racecallatomic<>(SB)
   296  	MOVD	add+8(FP), R0	// convert fetch_add to add_fetch
   297  	MOVD	ret+16(FP), R1
   298  	ADD	R0, R1, R0
   299  	MOVD	R0, ret+16(FP)
   300  	RET
   301  
   302  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
   303  	GO_ARGS
   304  	JMP	sync∕atomic·AddInt32(SB)
   305  
   306  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
   307  	GO_ARGS
   308  	JMP	sync∕atomic·AddInt64(SB)
   309  
   310  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
   311  	GO_ARGS
   312  	JMP	sync∕atomic·AddInt64(SB)
   313  
   314  // CompareAndSwap
   315  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
   316  	GO_ARGS
   317  	MOVD	$__tsan_go_atomic32_compare_exchange(SB), R9
   318  	BL	racecallatomic<>(SB)
   319  	RET
   320  
   321  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
   322  	GO_ARGS
   323  	MOVD	$__tsan_go_atomic64_compare_exchange(SB), R9
   324  	BL	racecallatomic<>(SB)
   325  	RET
   326  
   327  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
   328  	GO_ARGS
   329  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   330  
   331  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
   332  	GO_ARGS
   333  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   334  
   335  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
   336  	GO_ARGS
   337  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   338  
   339  // Generic atomic operation implementation.
   340  // R9 = addr of target function
   341  TEXT	racecallatomic<>(SB), NOSPLIT, $0
   342  	// Set up these registers
   343  	// R0 = *ThreadState
   344  	// R1 = caller pc
   345  	// R2 = pc
   346  	// R3 = addr of incoming arg list
   347  
   348  	// Trigger SIGSEGV early.
   349  	MOVD	40(RSP), R3	// 1st arg is addr. after two times BL, get it at 40(RSP)
   350  	MOVD	(R3), R13	// segv here if addr is bad
   351  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   352  	MOVD	runtime·racearenastart(SB), R10
   353  	CMP	R10, R3
   354  	BLT	racecallatomic_data
   355  	MOVD	runtime·racearenaend(SB), R10
   356  	CMP	R10, R3
   357  	BLT	racecallatomic_ok
   358  racecallatomic_data:
   359  	MOVD	runtime·racedatastart(SB), R10
   360  	CMP	R10, R3
   361  	BLT	racecallatomic_ignore
   362  	MOVD	runtime·racedataend(SB), R10
   363  	CMP	R10, R3
   364  	BGE	racecallatomic_ignore
   365  racecallatomic_ok:
   366  	// Addr is within the good range, call the atomic function.
   367  	load_g
   368  	MOVD	g_racectx(g), R0	// goroutine context
   369  	MOVD	16(RSP), R1	// caller pc
   370  	MOVD	R9, R2	// pc
   371  	ADD	$40, RSP, R3
   372  	JMP	racecall<>(SB)	// does not return
   373  racecallatomic_ignore:
   374  	// Addr is outside the good range.
   375  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   376  	// An attempt to synchronize on the address would cause crash.
   377  	MOVD	R9, R20	// remember the original function
   378  	MOVD	$__tsan_go_ignore_sync_begin(SB), R9
   379  	load_g
   380  	MOVD	g_racectx(g), R0	// goroutine context
   381  	BL	racecall<>(SB)
   382  	MOVD	R20, R9	// restore the original function
   383  	// Call the atomic function.
   384  	// racecall will call LLVM race code which might clobber R28 (g)
   385  	load_g
   386  	MOVD	g_racectx(g), R0	// goroutine context
   387  	MOVD	16(RSP), R1	// caller pc
   388  	MOVD	R9, R2	// pc
   389  	ADD	$40, RSP, R3	// arguments
   390  	BL	racecall<>(SB)
   391  	// Call __tsan_go_ignore_sync_end.
   392  	MOVD	$__tsan_go_ignore_sync_end(SB), R9
   393  	MOVD	g_racectx(g), R0	// goroutine context
   394  	BL	racecall<>(SB)
   395  	RET
   396  
   397  // func runtime·racecall(void(*f)(...), ...)
   398  // Calls C function f from race runtime and passes up to 4 arguments to it.
   399  // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   400  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   401  	MOVD	fn+0(FP), R9
   402  	MOVD	arg0+8(FP), R0
   403  	MOVD	arg1+16(FP), R1
   404  	MOVD	arg2+24(FP), R2
   405  	MOVD	arg3+32(FP), R3
   406  	JMP	racecall<>(SB)
   407  
   408  // Switches SP to g0 stack and calls (R9). Arguments already set.
   409  TEXT	racecall<>(SB), NOSPLIT, $0-0
   410  	MOVD	g_m(g), R10
   411  	// Switch to g0 stack.
   412  	MOVD	RSP, R19	// callee-saved, preserved across the CALL
   413  	MOVD	m_g0(R10), R11
   414  	CMP	R11, g
   415  	BEQ	call	// already on g0
   416  	MOVD	(g_sched+gobuf_sp)(R11), R12
   417  	MOVD	R12, RSP
   418  call:
   419  	BL	R9
   420  	MOVD	R19, RSP
   421  	RET
   422  
   423  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   424  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   425  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   426  // R0 contains command code. R1 contains command-specific context.
   427  // See racecallback for command codes.
   428  TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
   429  	// Handle command raceGetProcCmd (0) here.
   430  	// First, code below assumes that we are on curg, while raceGetProcCmd
   431  	// can be executed on g0. Second, it is called frequently, so will
   432  	// benefit from this fast path.
   433  	CBNZ	R0, rest
   434  	MOVD	g, R13
   435  #ifdef TLS_darwin
   436  	MOVD	R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it
   437  #endif
   438  	load_g
   439  #ifdef TLS_darwin
   440  	MOVD	R12, R27
   441  #endif
   442  	MOVD	g_m(g), R0
   443  	MOVD	m_p(R0), R0
   444  	MOVD	p_raceprocctx(R0), R0
   445  	MOVD	R0, (R1)
   446  	MOVD	R13, g
   447  	JMP	(LR)
   448  rest:
   449  	// Save callee-saved registers (Go code won't respect that).
   450  	// 8(RSP) and 16(RSP) are for args passed through racecallback
   451  	SUB	$112, RSP
   452  	MOVD	LR, 0(RSP)
   453  	STP	(R19, R20), 24(RSP)
   454  	STP	(R21, R22), 40(RSP)
   455  	STP	(R23, R24), 56(RSP)
   456  	STP	(R25, R26), 72(RSP)
   457  	STP	(R27,   g), 88(RSP)
   458  	// Set g = g0.
   459  	// load_g will clobber R0, Save R0
   460  	MOVD	R0, R13
   461  	load_g
   462  	// restore R0
   463  	MOVD	R13, R0
   464  	MOVD	g_m(g), R13
   465  	MOVD	m_g0(R13), R14
   466  	CMP	R14, g
   467  	BEQ	noswitch	// branch if already on g0
   468  	MOVD	R14, g
   469  
   470  	MOVD	R0, 8(RSP)	// func arg
   471  	MOVD	R1, 16(RSP)	// func arg
   472  	BL	runtime·racecallback(SB)
   473  
   474  	// All registers are smashed after Go code, reload.
   475  	MOVD	g_m(g), R13
   476  	MOVD	m_curg(R13), g	// g = m->curg
   477  ret:
   478  	// Restore callee-saved registers.
   479  	MOVD	0(RSP), LR
   480  	LDP	24(RSP), (R19, R20)
   481  	LDP	40(RSP), (R21, R22)
   482  	LDP	56(RSP), (R23, R24)
   483  	LDP	72(RSP), (R25, R26)
   484  	LDP	88(RSP), (R27,   g)
   485  	ADD	$112, RSP
   486  	JMP	(LR)
   487  
   488  noswitch:
   489  	// already on g0
   490  	MOVD	R0, 8(RSP)	// func arg
   491  	MOVD	R1, 16(RSP)	// func arg
   492  	BL	runtime·racecallback(SB)
   493  	JMP	ret
   494  
   495  #ifndef TLSG_IS_VARIABLE
   496  // tls_g, g value for each thread in TLS
   497  GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
   498  #endif