github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/runtime/race_arm64.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build race
     6  
     7  #include "go_asm.h"
     8  #include "funcdata.h"
     9  #include "textflag.h"
    10  #include "tls_arm64.h"
    11  
    12  // The following thunks allow calling the gcc-compiled race runtime directly
    13  // from Go code without going all the way through cgo.
    14  // First, it's much faster (up to 50% speedup for real Go programs).
    15  // Second, it eliminates race-related special cases from cgocall and scheduler.
    16  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    17  
    18  // A brief recap of the arm64 calling convention.
    19  // Arguments are passed in R0...R7, the rest is on stack.
    20  // Callee-saved registers are: R19...R28.
    21  // Temporary registers are: R9...R15
    22  // SP must be 16-byte aligned.
    23  
    24  // When calling racecalladdr, R9 is the call target address.
    25  
    26  // The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
    27  
    28  #define load_g \
    29  	MRS_TPIDR_R0 \
    30  	MOVD    runtime·tls_g(SB), R11 \
    31  	ADD     R11, R0 \
    32  	MOVD    0(R0), g
    33  
    34  // func runtime·raceread(addr uintptr)
    35  // Called from instrumented code.
    36  TEXT	runtime·raceread(SB), NOSPLIT, $0-8
    37  	MOVD	addr+0(FP), R1
    38  	MOVD	LR, R2
    39  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    40  	MOVD	$__tsan_read(SB), R9
    41  	JMP	racecalladdr<>(SB)
    42  
    43  // func runtime·RaceRead(addr uintptr)
    44  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    45  	// This needs to be a tail call, because raceread reads caller pc.
    46  	JMP	runtime·raceread(SB)
    47  
    48  // func runtime·racereadpc(void *addr, void *callpc, void *pc)
    49  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    50  	MOVD	addr+0(FP), R1
    51  	MOVD	callpc+8(FP), R2
    52  	MOVD	pc+16(FP), R3
    53  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    54  	MOVD	$__tsan_read_pc(SB), R9
    55  	JMP	racecalladdr<>(SB)
    56  
    57  // func runtime·racewrite(addr uintptr)
    58  // Called from instrumented code.
    59  TEXT	runtime·racewrite(SB), NOSPLIT, $0-8
    60  	MOVD	addr+0(FP), R1
    61  	MOVD	LR, R2
    62  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    63  	MOVD	$__tsan_write(SB), R9
    64  	JMP	racecalladdr<>(SB)
    65  
    66  // func runtime·RaceWrite(addr uintptr)
    67  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    68  	// This needs to be a tail call, because racewrite reads caller pc.
    69  	JMP	runtime·racewrite(SB)
    70  
    71  // func runtime·racewritepc(void *addr, void *callpc, void *pc)
    72  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    73  	MOVD	addr+0(FP), R1
    74  	MOVD	callpc+8(FP), R2
    75  	MOVD	pc+16(FP), R3
    76  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    77  	MOVD	$__tsan_write_pc(SB), R9
    78  	JMP	racecalladdr<>(SB)
    79  
    80  // func runtime·racereadrange(addr, size uintptr)
    81  // Called from instrumented code.
    82  TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
    83  	MOVD	addr+0(FP), R1
    84  	MOVD	size+8(FP), R2
    85  	MOVD	LR, R3
    86  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    87  	MOVD	$__tsan_read_range(SB), R9
    88  	JMP	racecalladdr<>(SB)
    89  
    90  // func runtime·RaceReadRange(addr, size uintptr)
    91  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
    92  	// This needs to be a tail call, because racereadrange reads caller pc.
    93  	JMP	runtime·racereadrange(SB)
    94  
    95  // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
    96  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
    97  	MOVD	addr+0(FP), R1
    98  	MOVD	size+8(FP), R2
    99  	MOVD	pc+16(FP), R3
   100  	ADD	$4, R3	// pc is function start, tsan wants return address.
   101  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   102  	MOVD	$__tsan_read_range(SB), R9
   103  	JMP	racecalladdr<>(SB)
   104  
   105  // func runtime·racewriterange(addr, size uintptr)
   106  // Called from instrumented code.
   107  TEXT	runtime·racewriterange(SB), NOSPLIT, $0-16
   108  	MOVD	addr+0(FP), R1
   109  	MOVD	size+8(FP), R2
   110  	MOVD	LR, R3
   111  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   112  	MOVD	$__tsan_write_range(SB), R9
   113  	JMP	racecalladdr<>(SB)
   114  
   115  // func runtime·RaceWriteRange(addr, size uintptr)
   116  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   117  	// This needs to be a tail call, because racewriterange reads caller pc.
   118  	JMP	runtime·racewriterange(SB)
   119  
   120  // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   121  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   122  	MOVD	addr+0(FP), R1
   123  	MOVD	size+8(FP), R2
   124  	MOVD	pc+16(FP), R3
   125  	ADD	$4, R3	// pc is function start, tsan wants return address.
   126  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   127  	MOVD	$__tsan_write_range(SB), R9
   128  	JMP	racecalladdr<>(SB)
   129  
   130  // If addr (R1) is out of range, do nothing.
   131  // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   132  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   133  	load_g
   134  	MOVD	g_racectx(g), R0
   135  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   136  	MOVD	runtime·racearenastart(SB), R10
   137  	CMP	R10, R1
   138  	BLT	data
   139  	MOVD	runtime·racearenaend(SB), R10
   140  	CMP	R10, R1
   141  	BLT	call
   142  data:
   143  	MOVD	runtime·racedatastart(SB), R10
   144  	CMP	R10, R1
   145  	BLT	ret
   146  	MOVD	runtime·racedataend(SB), R10
   147  	CMP	R10, R1
   148  	BGT	ret
   149  call:
   150  	JMP	racecall<>(SB)
   151  ret:
   152  	RET
   153  
   154  // func runtime·racefuncenterfp(fp uintptr)
   155  // Called from instrumented code.
   156  // Like racefuncenter but doesn't passes an arg, uses the caller pc
   157  // from the first slot on the stack
   158  TEXT	runtime·racefuncenterfp(SB), NOSPLIT, $0-0
   159  	MOVD	0(RSP), R9
   160  	JMP	racefuncenter<>(SB)
   161  
   162  // func runtime·racefuncenter(pc uintptr)
   163  // Called from instrumented code.
   164  TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   165  	MOVD	callpc+0(FP), R9
   166  	JMP	racefuncenter<>(SB)
   167  
   168  // Common code for racefuncenter/racefuncenterfp
   169  // R9 = caller's return address
   170  TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   171  	load_g
   172  	MOVD	g_racectx(g), R0	// goroutine racectx
   173  	MOVD	R9, R1
   174  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   175  	MOVD	$__tsan_func_enter(SB), R9
   176  	BL	racecall<>(SB)
   177  	RET
   178  
   179  // func runtime·racefuncexit()
   180  // Called from instrumented code.
   181  TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   182  	load_g
   183  	MOVD	g_racectx(g), R0	// race context
   184  	// void __tsan_func_exit(ThreadState *thr);
   185  	MOVD	$__tsan_func_exit(SB), R9
   186  	JMP	racecall<>(SB)
   187  
   188  // Atomic operations for sync/atomic package.
   189  // R3 = addr of arguments passed to this function, it can
   190  // be fetched at 40(RSP) in racecallatomic after two times BL
   191  // R0, R1, R2 set in racecallatomic
   192  
   193  // Load
   194  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0
   195  	GO_ARGS
   196  	MOVD	$__tsan_go_atomic32_load(SB), R9
   197  	BL	racecallatomic<>(SB)
   198  	RET
   199  
   200  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0
   201  	GO_ARGS
   202  	MOVD	$__tsan_go_atomic64_load(SB), R9
   203  	BL	racecallatomic<>(SB)
   204  	RET
   205  
   206  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0
   207  	GO_ARGS
   208  	JMP	sync∕atomic·LoadInt32(SB)
   209  
   210  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0
   211  	GO_ARGS
   212  	JMP	sync∕atomic·LoadInt64(SB)
   213  
   214  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0
   215  	GO_ARGS
   216  	JMP	sync∕atomic·LoadInt64(SB)
   217  
   218  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0
   219  	GO_ARGS
   220  	JMP	sync∕atomic·LoadInt64(SB)
   221  
   222  // Store
   223  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0
   224  	GO_ARGS
   225  	MOVD	$__tsan_go_atomic32_store(SB), R9
   226  	BL	racecallatomic<>(SB)
   227  	RET
   228  
   229  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0
   230  	GO_ARGS
   231  	MOVD	$__tsan_go_atomic64_store(SB), R9
   232  	BL	racecallatomic<>(SB)
   233  	RET
   234  
   235  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0
   236  	GO_ARGS
   237  	JMP	sync∕atomic·StoreInt32(SB)
   238  
   239  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0
   240  	GO_ARGS
   241  	JMP	sync∕atomic·StoreInt64(SB)
   242  
   243  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0
   244  	GO_ARGS
   245  	JMP	sync∕atomic·StoreInt64(SB)
   246  
   247  // Swap
   248  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0
   249  	GO_ARGS
   250  	MOVD	$__tsan_go_atomic32_exchange(SB), R9
   251  	BL	racecallatomic<>(SB)
   252  	RET
   253  
   254  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0
   255  	GO_ARGS
   256  	MOVD	$__tsan_go_atomic64_exchange(SB), R9
   257  	BL	racecallatomic<>(SB)
   258  	RET
   259  
   260  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0
   261  	GO_ARGS
   262  	JMP	sync∕atomic·SwapInt32(SB)
   263  
   264  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0
   265  	GO_ARGS
   266  	JMP	sync∕atomic·SwapInt64(SB)
   267  
   268  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0
   269  	GO_ARGS
   270  	JMP	sync∕atomic·SwapInt64(SB)
   271  
   272  // Add
   273  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0
   274  	GO_ARGS
   275  	MOVD	$__tsan_go_atomic32_fetch_add(SB), R9
   276  	BL	racecallatomic<>(SB)
   277  	MOVW	add+8(FP), R0	// convert fetch_add to add_fetch
   278  	MOVW	ret+16(FP), R1
   279  	ADD	R0, R1, R0
   280  	MOVW	R0, ret+16(FP)
   281  	RET
   282  
   283  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0
   284  	GO_ARGS
   285  	MOVD	$__tsan_go_atomic64_fetch_add(SB), R9
   286  	BL	racecallatomic<>(SB)
   287  	MOVD	add+8(FP), R0	// convert fetch_add to add_fetch
   288  	MOVD	ret+16(FP), R1
   289  	ADD	R0, R1, R0
   290  	MOVD	R0, ret+16(FP)
   291  	RET
   292  
   293  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0
   294  	GO_ARGS
   295  	JMP	sync∕atomic·AddInt32(SB)
   296  
   297  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0
   298  	GO_ARGS
   299  	JMP	sync∕atomic·AddInt64(SB)
   300  
   301  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0
   302  	GO_ARGS
   303  	JMP	sync∕atomic·AddInt64(SB)
   304  
   305  // CompareAndSwap
   306  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0
   307  	GO_ARGS
   308  	MOVD	$__tsan_go_atomic32_compare_exchange(SB), R9
   309  	BL	racecallatomic<>(SB)
   310  	RET
   311  
   312  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0
   313  	GO_ARGS
   314  	MOVD	$__tsan_go_atomic64_compare_exchange(SB), R9
   315  	BL	racecallatomic<>(SB)
   316  	RET
   317  
   318  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0
   319  	GO_ARGS
   320  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   321  
   322  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0
   323  	GO_ARGS
   324  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   325  
   326  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0
   327  	GO_ARGS
   328  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   329  
   330  // Generic atomic operation implementation.
   331  // R9 = addr of target function
   332  TEXT	racecallatomic<>(SB), NOSPLIT, $0
   333  	// Set up these registers
   334  	// R0 = *ThreadState
   335  	// R1 = caller pc
   336  	// R2 = pc
   337  	// R3 = addr of incoming arg list
   338  
   339  	// Trigger SIGSEGV early.
   340  	MOVD	40(RSP), R3	// 1st arg is addr. after two times BL, get it at 40(RSP)
   341  	MOVD	(R3), R13	// segv here if addr is bad
   342  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   343  	MOVD	runtime·racearenastart(SB), R10
   344  	CMP	R10, R3
   345  	BLT	racecallatomic_data
   346  	MOVD	runtime·racearenaend(SB), R10
   347  	CMP	R10, R3
   348  	BLT	racecallatomic_ok
   349  racecallatomic_data:
   350  	MOVD	runtime·racedatastart(SB), R10
   351  	CMP	R10, R3
   352  	BLT	racecallatomic_ignore
   353  	MOVD	runtime·racedataend(SB), R10
   354  	CMP	R10, R3
   355  	BGE	racecallatomic_ignore
   356  racecallatomic_ok:
   357  	// Addr is within the good range, call the atomic function.
   358  	load_g
   359  	MOVD	g_racectx(g), R0	// goroutine context
   360  	MOVD	16(RSP), R1	// caller pc
   361  	MOVD	R9, R2	// pc
   362  	ADD	$40, RSP, R3
   363  	JMP	racecall<>(SB)	// does not return
   364  racecallatomic_ignore:
   365  	// Addr is outside the good range.
   366  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   367  	// An attempt to synchronize on the address would cause crash.
   368  	MOVD	R9, R20	// remember the original function
   369  	MOVD	$__tsan_go_ignore_sync_begin(SB), R9
   370  	load_g
   371  	MOVD	g_racectx(g), R0	// goroutine context
   372  	BL	racecall<>(SB)
   373  	MOVD	R20, R9	// restore the original function
   374  	// Call the atomic function.
   375  	// racecall will call LLVM race code which might clobber R28 (g)
   376  	load_g
   377  	MOVD	g_racectx(g), R0	// goroutine context
   378  	MOVD	16(RSP), R1	// caller pc
   379  	MOVD	R9, R2	// pc
   380  	ADD	$40, RSP, R3	// arguments
   381  	BL	racecall<>(SB)
   382  	// Call __tsan_go_ignore_sync_end.
   383  	MOVD	$__tsan_go_ignore_sync_end(SB), R9
   384  	MOVD	g_racectx(g), R0	// goroutine context
   385  	BL	racecall<>(SB)
   386  	RET
   387  
   388  // func runtime·racecall(void(*f)(...), ...)
   389  // Calls C function f from race runtime and passes up to 4 arguments to it.
   390  // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   391  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   392  	MOVD	fn+0(FP), R9
   393  	MOVD	arg0+8(FP), R0
   394  	MOVD	arg1+16(FP), R1
   395  	MOVD	arg2+24(FP), R2
   396  	MOVD	arg3+32(FP), R3
   397  	JMP	racecall<>(SB)
   398  
   399  // Switches SP to g0 stack and calls (R9). Arguments already set.
   400  TEXT	racecall<>(SB), NOSPLIT, $0-0
   401  	MOVD	g_m(g), R10
   402  	// Switch to g0 stack.
   403  	MOVD	RSP, R19	// callee-saved, preserved across the CALL
   404  	MOVD	m_g0(R10), R11
   405  	CMP	R11, g
   406  	BEQ	call	// already on g0
   407  	MOVD	(g_sched+gobuf_sp)(R11), R12
   408  	MOVD	R12, RSP
   409  call:
   410  	BL	R9
   411  	MOVD	R19, RSP
   412  	RET
   413  
   414  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   415  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   416  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   417  // R0 contains command code. R1 contains command-specific context.
   418  // See racecallback for command codes.
   419  TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
   420  	// Handle command raceGetProcCmd (0) here.
   421  	// First, code below assumes that we are on curg, while raceGetProcCmd
   422  	// can be executed on g0. Second, it is called frequently, so will
   423  	// benefit from this fast path.
   424  	CMP	$0, R0
   425  	BNE	rest
   426  	MOVD	g, R13
   427  	load_g
   428  	MOVD	g_m(g), R0
   429  	MOVD	m_p(R0), R0
   430  	MOVD	p_racectx(R0), R0
   431  	MOVD	R0, (R1)
   432  	MOVD	R13, g
   433  	JMP	(LR)
   434  rest:
   435          // Save callee-saved registers (Go code won't respect that).
   436  	// 8(RSP) and 16(RSP) are for args passed through racecallback
   437  	SUB	$96, RSP
   438  	MOVD	LR, 0(RSP)
   439  	STP	(R19, R20), 24(RSP)
   440  	STP	(R21, R22), 40(RSP)
   441  	STP	(R23, R24), 56(RSP)
   442  	STP	(R25, R26), 72(RSP)
   443  	MOVD	R27, 88(RSP)
   444  	// Set g = g0.
   445  	// load_g will clobber R0, Save R0
   446  	MOVD	R0, R13
   447  	load_g
   448  	// restore R0
   449  	MOVD	R13, R0
   450  	MOVD	g_m(g), R13
   451  	MOVD	m_g0(R13), g
   452  
   453  	MOVD	R0, 8(RSP)	// func arg
   454  	MOVD	R1, 16(RSP)	// func arg
   455  	BL	runtime·racecallback(SB)
   456  
   457  	// All registers are smashed after Go code, reload.
   458  	MOVD	g_m(g), R13
   459  	MOVD	m_curg(R13), g	// g = m->curg
   460  	// Restore callee-saved registers.
   461  	MOVD	0(RSP), LR
   462  	LDP	24(RSP), (R19, R20)
   463  	LDP	40(RSP), (R21, R22)
   464  	LDP	56(RSP), (R23, R24)
   465  	LDP	72(RSP), (R25, R26)
   466  	MOVD	88(RSP), R27
   467  	ADD	$96, RSP
   468  	JMP	(LR)
   469  
   470  // tls_g, g value for each thread in TLS
   471  GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8