github.com/x04/go/src@v0.0.0-20200202162449-3d481ceb3525/runtime/race_amd64.s (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build race
     6  
     7  #include "go_asm.h"
     8  #include "go_tls.h"
     9  #include "funcdata.h"
    10  #include "textflag.h"
    11  
    12  // The following thunks allow calling the gcc-compiled race runtime directly
    13  // from Go code without going all the way through cgo.
    14  // First, it's much faster (up to 50% speedup for real Go programs).
    15  // Second, it eliminates race-related special cases from cgocall and scheduler.
    16  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    17  
    18  // A brief recap of the amd64 calling convention.
    19  // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
    20  // Callee-saved registers are: BX, BP, R12-R15.
    21  // SP must be 16-byte aligned.
    22  // On Windows:
    23  // Arguments are passed in CX, DX, R8, R9, the rest is on stack.
    24  // Callee-saved registers are: BX, BP, DI, SI, R12-R15.
    25  // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
    26  // https://msdn.microsoft.com/en-us/library/ms235286.aspx
    27  // We do not do this, because it seems to be intended for vararg/unprototyped functions.
    28  // Gcc-compiled race runtime does not try to use that space.
    29  
    30  #ifdef GOOS_windows
    31  #define RARG0 CX
    32  #define RARG1 DX
    33  #define RARG2 R8
    34  #define RARG3 R9
    35  #else
    36  #define RARG0 DI
    37  #define RARG1 SI
    38  #define RARG2 DX
    39  #define RARG3 CX
    40  #endif
    41  
    42  // func runtime·raceread(addr uintptr)
    43  // Called from instrumented code.
    44  TEXT	runtime·raceread(SB), NOSPLIT, $0-8
    45  	MOVQ	addr+0(FP), RARG1
    46  	MOVQ	(SP), RARG2
    47  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    48  	MOVQ	$__tsan_read(SB), AX
    49  	JMP	racecalladdr<>(SB)
    50  
    51  // func runtime·RaceRead(addr uintptr)
    52  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    53  	// This needs to be a tail call, because raceread reads caller pc.
    54  	JMP	runtime·raceread(SB)
    55  
    56  // void runtime·racereadpc(void *addr, void *callpc, void *pc)
    57  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    58  	MOVQ	addr+0(FP), RARG1
    59  	MOVQ	callpc+8(FP), RARG2
    60  	MOVQ	pc+16(FP), RARG3
    61  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    62  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    63  	MOVQ	$__tsan_read_pc(SB), AX
    64  	JMP	racecalladdr<>(SB)
    65  
    66  // func runtime·racewrite(addr uintptr)
    67  // Called from instrumented code.
    68  TEXT	runtime·racewrite(SB), NOSPLIT, $0-8
    69  	MOVQ	addr+0(FP), RARG1
    70  	MOVQ	(SP), RARG2
    71  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    72  	MOVQ	$__tsan_write(SB), AX
    73  	JMP	racecalladdr<>(SB)
    74  
    75  // func runtime·RaceWrite(addr uintptr)
    76  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    77  	// This needs to be a tail call, because racewrite reads caller pc.
    78  	JMP	runtime·racewrite(SB)
    79  
    80  // void runtime·racewritepc(void *addr, void *callpc, void *pc)
    81  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    82  	MOVQ	addr+0(FP), RARG1
    83  	MOVQ	callpc+8(FP), RARG2
    84  	MOVQ	pc+16(FP), RARG3
    85  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    86  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    87  	MOVQ	$__tsan_write_pc(SB), AX
    88  	JMP	racecalladdr<>(SB)
    89  
    90  // func runtime·racereadrange(addr, size uintptr)
    91  // Called from instrumented code.
    92  TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
    93  	MOVQ	addr+0(FP), RARG1
    94  	MOVQ	size+8(FP), RARG2
    95  	MOVQ	(SP), RARG3
    96  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    97  	MOVQ	$__tsan_read_range(SB), AX
    98  	JMP	racecalladdr<>(SB)
    99  
   100  // func runtime·RaceReadRange(addr, size uintptr)
   101  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   102  	// This needs to be a tail call, because racereadrange reads caller pc.
   103  	JMP	runtime·racereadrange(SB)
   104  
   105  // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   106  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   107  	MOVQ	addr+0(FP), RARG1
   108  	MOVQ	size+8(FP), RARG2
   109  	MOVQ	pc+16(FP), RARG3
   110  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   111  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   112  	MOVQ	$__tsan_read_range(SB), AX
   113  	JMP	racecalladdr<>(SB)
   114  
   115  // func runtime·racewriterange(addr, size uintptr)
   116  // Called from instrumented code.
   117  TEXT	runtime·racewriterange(SB), NOSPLIT, $0-16
   118  	MOVQ	addr+0(FP), RARG1
   119  	MOVQ	size+8(FP), RARG2
   120  	MOVQ	(SP), RARG3
   121  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   122  	MOVQ	$__tsan_write_range(SB), AX
   123  	JMP	racecalladdr<>(SB)
   124  
   125  // func runtime·RaceWriteRange(addr, size uintptr)
   126  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   127  	// This needs to be a tail call, because racewriterange reads caller pc.
   128  	JMP	runtime·racewriterange(SB)
   129  
   130  // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   131  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   132  	MOVQ	addr+0(FP), RARG1
   133  	MOVQ	size+8(FP), RARG2
   134  	MOVQ	pc+16(FP), RARG3
   135  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   136  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   137  	MOVQ	$__tsan_write_range(SB), AX
   138  	JMP	racecalladdr<>(SB)
   139  
   140  // If addr (RARG1) is out of range, do nothing.
   141  // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   142  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   143  	get_tls(R12)
   144  	MOVQ	g(R12), R14
   145  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   146  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   147  	CMPQ	RARG1, runtime·racearenastart(SB)
   148  	JB	data
   149  	CMPQ	RARG1, runtime·racearenaend(SB)
   150  	JB	call
   151  data:
   152  	CMPQ	RARG1, runtime·racedatastart(SB)
   153  	JB	ret
   154  	CMPQ	RARG1, runtime·racedataend(SB)
   155  	JAE	ret
   156  call:
   157  	MOVQ	AX, AX		// w/o this 6a miscompiles this function
   158  	JMP	racecall<>(SB)
   159  ret:
   160  	RET
   161  
   162  // func runtime·racefuncenterfp(fp uintptr)
   163  // Called from instrumented code.
   164  // Like racefuncenter but passes FP, not PC
   165  TEXT	runtime·racefuncenterfp(SB), NOSPLIT, $0-8
   166  	MOVQ	fp+0(FP), R11
   167  	MOVQ	-8(R11), R11
   168  	JMP	racefuncenter<>(SB)
   169  
   170  // func runtime·racefuncenter(pc uintptr)
   171  // Called from instrumented code.
   172  TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   173  	MOVQ	callpc+0(FP), R11
   174  	JMP	racefuncenter<>(SB)
   175  
   176  // Common code for racefuncenter/racefuncenterfp
   177  // R11 = caller's return address
   178  TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   179  	MOVQ	DX, R15		// save function entry context (for closures)
   180  	get_tls(R12)
   181  	MOVQ	g(R12), R14
   182  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   183  	MOVQ	R11, RARG1
   184  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   185  	MOVQ	$__tsan_func_enter(SB), AX
   186  	// racecall<> preserves R15
   187  	CALL	racecall<>(SB)
   188  	MOVQ	R15, DX	// restore function entry context
   189  	RET
   190  
   191  // func runtime·racefuncexit()
   192  // Called from instrumented code.
   193  TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   194  	get_tls(R12)
   195  	MOVQ	g(R12), R14
   196  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   197  	// void __tsan_func_exit(ThreadState *thr);
   198  	MOVQ	$__tsan_func_exit(SB), AX
   199  	JMP	racecall<>(SB)
   200  
   201  // Atomic operations for sync/atomic package.
   202  
   203  // Load
   204  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0
   205  	MOVQ	$__tsan_go_atomic32_load(SB), AX
   206  	CALL	racecallatomic<>(SB)
   207  	RET
   208  
   209  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0
   210  	MOVQ	$__tsan_go_atomic64_load(SB), AX
   211  	CALL	racecallatomic<>(SB)
   212  	RET
   213  
   214  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0
   215  	JMP	sync∕atomic·LoadInt32(SB)
   216  
   217  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0
   218  	JMP	sync∕atomic·LoadInt64(SB)
   219  
   220  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0
   221  	JMP	sync∕atomic·LoadInt64(SB)
   222  
   223  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0
   224  	JMP	sync∕atomic·LoadInt64(SB)
   225  
   226  // Store
   227  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0
   228  	MOVQ	$__tsan_go_atomic32_store(SB), AX
   229  	CALL	racecallatomic<>(SB)
   230  	RET
   231  
   232  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0
   233  	MOVQ	$__tsan_go_atomic64_store(SB), AX
   234  	CALL	racecallatomic<>(SB)
   235  	RET
   236  
   237  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0
   238  	JMP	sync∕atomic·StoreInt32(SB)
   239  
   240  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0
   241  	JMP	sync∕atomic·StoreInt64(SB)
   242  
   243  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0
   244  	JMP	sync∕atomic·StoreInt64(SB)
   245  
   246  // Swap
   247  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0
   248  	MOVQ	$__tsan_go_atomic32_exchange(SB), AX
   249  	CALL	racecallatomic<>(SB)
   250  	RET
   251  
   252  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0
   253  	MOVQ	$__tsan_go_atomic64_exchange(SB), AX
   254  	CALL	racecallatomic<>(SB)
   255  	RET
   256  
   257  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0
   258  	JMP	sync∕atomic·SwapInt32(SB)
   259  
   260  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0
   261  	JMP	sync∕atomic·SwapInt64(SB)
   262  
   263  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0
   264  	JMP	sync∕atomic·SwapInt64(SB)
   265  
   266  // Add
   267  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-0
   268  	MOVQ	$__tsan_go_atomic32_fetch_add(SB), AX
   269  	CALL	racecallatomic<>(SB)
   270  	MOVL	add+8(FP), AX	// convert fetch_add to add_fetch
   271  	ADDL	AX, ret+16(FP)
   272  	RET
   273  
   274  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-0
   275  	MOVQ	$__tsan_go_atomic64_fetch_add(SB), AX
   276  	CALL	racecallatomic<>(SB)
   277  	MOVQ	add+8(FP), AX	// convert fetch_add to add_fetch
   278  	ADDQ	AX, ret+16(FP)
   279  	RET
   280  
   281  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-0
   282  	JMP	sync∕atomic·AddInt32(SB)
   283  
   284  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-0
   285  	JMP	sync∕atomic·AddInt64(SB)
   286  
   287  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0
   288  	JMP	sync∕atomic·AddInt64(SB)
   289  
   290  // CompareAndSwap
   291  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0
   292  	MOVQ	$__tsan_go_atomic32_compare_exchange(SB), AX
   293  	CALL	racecallatomic<>(SB)
   294  	RET
   295  
   296  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0
   297  	MOVQ	$__tsan_go_atomic64_compare_exchange(SB), AX
   298  	CALL	racecallatomic<>(SB)
   299  	RET
   300  
   301  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0
   302  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   303  
   304  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0
   305  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   306  
   307  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0
   308  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   309  
   310  // Generic atomic operation implementation.
   311  // AX already contains target function.
   312  TEXT	racecallatomic<>(SB), NOSPLIT, $0-0
   313  	// Trigger SIGSEGV early.
   314  	MOVQ	16(SP), R12
   315  	MOVL	(R12), R13
   316  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   317  	CMPQ	R12, runtime·racearenastart(SB)
   318  	JB	racecallatomic_data
   319  	CMPQ	R12, runtime·racearenaend(SB)
   320  	JB	racecallatomic_ok
   321  racecallatomic_data:
   322  	CMPQ	R12, runtime·racedatastart(SB)
   323  	JB	racecallatomic_ignore
   324  	CMPQ	R12, runtime·racedataend(SB)
   325  	JAE	racecallatomic_ignore
   326  racecallatomic_ok:
   327  	// Addr is within the good range, call the atomic function.
   328  	get_tls(R12)
   329  	MOVQ	g(R12), R14
   330  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   331  	MOVQ	8(SP), RARG1	// caller pc
   332  	MOVQ	(SP), RARG2	// pc
   333  	LEAQ	16(SP), RARG3	// arguments
   334  	JMP	racecall<>(SB)	// does not return
   335  racecallatomic_ignore:
   336  	// Addr is outside the good range.
   337  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   338  	// An attempt to synchronize on the address would cause crash.
   339  	MOVQ	AX, R15	// remember the original function
   340  	MOVQ	$__tsan_go_ignore_sync_begin(SB), AX
   341  	get_tls(R12)
   342  	MOVQ	g(R12), R14
   343  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   344  	CALL	racecall<>(SB)
   345  	MOVQ	R15, AX	// restore the original function
   346  	// Call the atomic function.
   347  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   348  	MOVQ	8(SP), RARG1	// caller pc
   349  	MOVQ	(SP), RARG2	// pc
   350  	LEAQ	16(SP), RARG3	// arguments
   351  	CALL	racecall<>(SB)
   352  	// Call __tsan_go_ignore_sync_end.
   353  	MOVQ	$__tsan_go_ignore_sync_end(SB), AX
   354  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   355  	JMP	racecall<>(SB)
   356  
   357  // void runtime·racecall(void(*f)(...), ...)
   358  // Calls C function f from race runtime and passes up to 4 arguments to it.
   359  // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   360  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   361  	MOVQ	fn+0(FP), AX
   362  	MOVQ	arg0+8(FP), RARG0
   363  	MOVQ	arg1+16(FP), RARG1
   364  	MOVQ	arg2+24(FP), RARG2
   365  	MOVQ	arg3+32(FP), RARG3
   366  	JMP	racecall<>(SB)
   367  
   368  // Switches SP to g0 stack and calls (AX). Arguments already set.
   369  TEXT	racecall<>(SB), NOSPLIT, $0-0
   370  	get_tls(R12)
   371  	MOVQ	g(R12), R14
   372  	MOVQ	g_m(R14), R13
   373  	// Switch to g0 stack.
   374  	MOVQ	SP, R12		// callee-saved, preserved across the CALL
   375  	MOVQ	m_g0(R13), R10
   376  	CMPQ	R10, R14
   377  	JE	call	// already on g0
   378  	MOVQ	(g_sched+gobuf_sp)(R10), SP
   379  call:
   380  	ANDQ	$~15, SP	// alignment for gcc ABI
   381  	CALL	AX
   382  	MOVQ	R12, SP
   383  	RET
   384  
   385  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   386  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   387  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   388  // RARG0 contains command code. RARG1 contains command-specific context.
   389  // See racecallback for command codes.
   390  TEXT	runtime·racecallbackthunk(SB), NOSPLIT, $56-8
   391  	// Handle command raceGetProcCmd (0) here.
   392  	// First, code below assumes that we are on curg, while raceGetProcCmd
   393  	// can be executed on g0. Second, it is called frequently, so will
   394  	// benefit from this fast path.
   395  	CMPQ	RARG0, $0
   396  	JNE	rest
   397  	get_tls(RARG0)
   398  	MOVQ	g(RARG0), RARG0
   399  	MOVQ	g_m(RARG0), RARG0
   400  	MOVQ	m_p(RARG0), RARG0
   401  	MOVQ	p_raceprocctx(RARG0), RARG0
   402  	MOVQ	RARG0, (RARG1)
   403  	RET
   404  
   405  rest:
   406  	// Save callee-saved registers (Go code won't respect that).
   407  	// This is superset of darwin/linux/windows registers.
   408  	PUSHQ	BX
   409  	PUSHQ	BP
   410  	PUSHQ	DI
   411  	PUSHQ	SI
   412  	PUSHQ	R12
   413  	PUSHQ	R13
   414  	PUSHQ	R14
   415  	PUSHQ	R15
   416  	// Set g = g0.
   417  	get_tls(R12)
   418  	MOVQ	g(R12), R13
   419  	MOVQ	g_m(R13), R14
   420  	MOVQ	m_g0(R14), R15
   421  	CMPQ	R13, R15
   422  	JEQ	noswitch	// branch if already on g0
   423  	MOVQ	R15, g(R12)	// g = m->g0
   424  	PUSHQ	RARG1	// func arg
   425  	PUSHQ	RARG0	// func arg
   426  	CALL	runtime·racecallback(SB)
   427  	POPQ	R12
   428  	POPQ	R12
   429  	// All registers are smashed after Go code, reload.
   430  	get_tls(R12)
   431  	MOVQ	g(R12), R13
   432  	MOVQ	g_m(R13), R13
   433  	MOVQ	m_curg(R13), R14
   434  	MOVQ	R14, g(R12)	// g = m->curg
   435  ret:
   436  	// Restore callee-saved registers.
   437  	POPQ	R15
   438  	POPQ	R14
   439  	POPQ	R13
   440  	POPQ	R12
   441  	POPQ	SI
   442  	POPQ	DI
   443  	POPQ	BP
   444  	POPQ	BX
   445  	RET
   446  
   447  noswitch:
   448  	// already on g0
   449  	PUSHQ	RARG1	// func arg
   450  	PUSHQ	RARG0	// func arg
   451  	CALL	runtime·racecallback(SB)
   452  	POPQ	R12
   453  	POPQ	R12
   454  	JMP	ret