github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/runtime/race_amd64.s (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build race
     6  
     7  #include "go_asm.h"
     8  #include "go_tls.h"
     9  #include "funcdata.h"
    10  #include "textflag.h"
    11  
    12  // The following thunks allow calling the gcc-compiled race runtime directly
    13  // from Go code without going all the way through cgo.
    14  // First, it's much faster (up to 50% speedup for real Go programs).
    15  // Second, it eliminates race-related special cases from cgocall and scheduler.
    16  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    17  
    18  // A brief recap of the amd64 calling convention.
    19  // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
    20  // Callee-saved registers are: BX, BP, R12-R15.
    21  // SP must be 16-byte aligned.
    22  // On Windows:
    23  // Arguments are passed in CX, DX, R8, R9, the rest is on stack.
    24  // Callee-saved registers are: BX, BP, DI, SI, R12-R15.
    25  // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
    26  // http://msdn.microsoft.com/en-us/library/ms235286.aspx
    27  // We do not do this, because it seems to be intended for vararg/unprototyped functions.
    28  // Gcc-compiled race runtime does not try to use that space.
    29  
    30  #ifdef GOOS_windows
    31  #define RARG0 CX
    32  #define RARG1 DX
    33  #define RARG2 R8
    34  #define RARG3 R9
    35  #else
    36  #define RARG0 DI
    37  #define RARG1 SI
    38  #define RARG2 DX
    39  #define RARG3 CX
    40  #endif
    41  
    42  // func runtime·raceread(addr uintptr)
    43  // Called from instrumented code.
    44  TEXT	runtime·raceread(SB), NOSPLIT, $0-8
    45  	MOVQ	addr+0(FP), RARG1
    46  	MOVQ	(SP), RARG2
    47  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    48  	MOVQ	$__tsan_read(SB), AX
    49  	JMP	racecalladdr<>(SB)
    50  
    51  // func runtime·RaceRead(addr uintptr)
    52  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    53  	// This needs to be a tail call, because raceread reads caller pc.
    54  	JMP	runtime·raceread(SB)
    55  
    56  // void runtime·racereadpc(void *addr, void *callpc, void *pc)
    57  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    58  	MOVQ	addr+0(FP), RARG1
    59  	MOVQ	callpc+8(FP), RARG2
    60  	MOVQ	pc+16(FP), RARG3
    61  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    62  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    63  	MOVQ	$__tsan_read_pc(SB), AX
    64  	JMP	racecalladdr<>(SB)
    65  
    66  // func runtime·racewrite(addr uintptr)
    67  // Called from instrumented code.
    68  TEXT	runtime·racewrite(SB), NOSPLIT, $0-8
    69  	MOVQ	addr+0(FP), RARG1
    70  	MOVQ	(SP), RARG2
    71  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    72  	MOVQ	$__tsan_write(SB), AX
    73  	JMP	racecalladdr<>(SB)
    74  
    75  // func runtime·RaceWrite(addr uintptr)
    76  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    77  	// This needs to be a tail call, because racewrite reads caller pc.
    78  	JMP	runtime·racewrite(SB)
    79  
    80  // void runtime·racewritepc(void *addr, void *callpc, void *pc)
    81  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    82  	MOVQ	addr+0(FP), RARG1
    83  	MOVQ	callpc+8(FP), RARG2
    84  	MOVQ	pc+16(FP), RARG3
    85  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    86  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    87  	MOVQ	$__tsan_write_pc(SB), AX
    88  	JMP	racecalladdr<>(SB)
    89  
    90  // func runtime·racereadrange(addr, size uintptr)
    91  // Called from instrumented code.
    92  TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
    93  	MOVQ	addr+0(FP), RARG1
    94  	MOVQ	size+8(FP), RARG2
    95  	MOVQ	(SP), RARG3
    96  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    97  	MOVQ	$__tsan_read_range(SB), AX
    98  	JMP	racecalladdr<>(SB)
    99  
   100  // func runtime·RaceReadRange(addr, size uintptr)
   101  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   102  	// This needs to be a tail call, because racereadrange reads caller pc.
   103  	JMP	runtime·racereadrange(SB)
   104  
   105  // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   106  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   107  	MOVQ	addr+0(FP), RARG1
   108  	MOVQ	size+8(FP), RARG2
   109  	MOVQ	pc+16(FP), RARG3
   110  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   111  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   112  	MOVQ	$__tsan_read_range(SB), AX
   113  	JMP	racecalladdr<>(SB)
   114  
   115  // func runtime·racewriterange(addr, size uintptr)
   116  // Called from instrumented code.
   117  TEXT	runtime·racewriterange(SB), NOSPLIT, $0-16
   118  	MOVQ	addr+0(FP), RARG1
   119  	MOVQ	size+8(FP), RARG2
   120  	MOVQ	(SP), RARG3
   121  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   122  	MOVQ	$__tsan_write_range(SB), AX
   123  	JMP	racecalladdr<>(SB)
   124  
   125  // func runtime·RaceWriteRange(addr, size uintptr)
   126  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   127  	// This needs to be a tail call, because racewriterange reads caller pc.
   128  	JMP	runtime·racewriterange(SB)
   129  
   130  // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   131  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   132  	MOVQ	addr+0(FP), RARG1
   133  	MOVQ	size+8(FP), RARG2
   134  	MOVQ	pc+16(FP), RARG3
   135  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   136  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   137  	MOVQ	$__tsan_write_range(SB), AX
   138  	JMP	racecalladdr<>(SB)
   139  
   140  // If addr (RARG1) is out of range, do nothing.
   141  // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   142  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   143  	get_tls(R12)
   144  	MOVQ	g(R12), R14
   145  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   146  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   147  	CMPQ	RARG1, runtime·racearenastart(SB)
   148  	JB	data
   149  	CMPQ	RARG1, runtime·racearenaend(SB)
   150  	JB	call
   151  data:
   152  	CMPQ	RARG1, runtime·racedatastart(SB)
   153  	JB	ret
   154  	CMPQ	RARG1, runtime·racedataend(SB)
   155  	JAE	ret
   156  call:
   157  	MOVQ	AX, AX		// w/o this 6a miscompiles this function
   158  	JMP	racecall<>(SB)
   159  ret:
   160  	RET
   161  
   162  // func runtime·racefuncenterfp(fp uintptr)
   163  // Called from instrumented code.
   164  // Like racefuncenter but passes FP, not PC
   165  TEXT	runtime·racefuncenterfp(SB), NOSPLIT, $0-8
   166  	MOVQ	fp+0(FP), R11
   167  	MOVQ	-8(R11), R11
   168  	JMP	racefuncenter<>(SB)
   169  
   170  // func runtime·racefuncenter(pc uintptr)
   171  // Called from instrumented code.
   172  TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   173  	MOVQ	callpc+0(FP), R11
   174  	JMP	racefuncenter<>(SB)
   175  
   176  // Common code for racefuncenter/racefuncenterfp
   177  // R11 = caller's return address
   178  TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   179  	MOVQ	DX, R15		// save function entry context (for closures)
   180  	get_tls(R12)
   181  	MOVQ	g(R12), R14
   182  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   183  	MOVQ	R11, RARG1
   184  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   185  	MOVQ	$__tsan_func_enter(SB), AX
   186  	// racecall<> preserves R15
   187  	CALL	racecall<>(SB)
   188  	MOVQ	R15, DX	// restore function entry context
   189  	RET
   190  
   191  // func runtime·racefuncexit()
   192  // Called from instrumented code.
   193  TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   194  	get_tls(R12)
   195  	MOVQ	g(R12), R14
   196  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   197  	// void __tsan_func_exit(ThreadState *thr);
   198  	MOVQ	$__tsan_func_exit(SB), AX
   199  	JMP	racecall<>(SB)
   200  
   201  // Atomic operations for sync/atomic package.
   202  
   203  // Load
   204  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0
   205  	MOVQ	$__tsan_go_atomic32_load(SB), AX
   206  	CALL	racecallatomic<>(SB)
   207  	RET
   208  
   209  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0
   210  	MOVQ	$__tsan_go_atomic64_load(SB), AX
   211  	CALL	racecallatomic<>(SB)
   212  	RET
   213  
   214  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0
   215  	JMP	sync∕atomic·LoadInt32(SB)
   216  
   217  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0
   218  	JMP	sync∕atomic·LoadInt64(SB)
   219  
   220  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0
   221  	JMP	sync∕atomic·LoadInt64(SB)
   222  
   223  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0
   224  	JMP	sync∕atomic·LoadInt64(SB)
   225  
   226  // Store
   227  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0
   228  	MOVQ	$__tsan_go_atomic32_store(SB), AX
   229  	CALL	racecallatomic<>(SB)
   230  	RET
   231  
   232  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0
   233  	MOVQ	$__tsan_go_atomic64_store(SB), AX
   234  	CALL	racecallatomic<>(SB)
   235  	RET
   236  
   237  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0
   238  	JMP	sync∕atomic·StoreInt32(SB)
   239  
   240  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0
   241  	JMP	sync∕atomic·StoreInt64(SB)
   242  
   243  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0
   244  	JMP	sync∕atomic·StoreInt64(SB)
   245  
   246  // Swap
   247  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0
   248  	MOVQ	$__tsan_go_atomic32_exchange(SB), AX
   249  	CALL	racecallatomic<>(SB)
   250  	RET
   251  
   252  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0
   253  	MOVQ	$__tsan_go_atomic64_exchange(SB), AX
   254  	CALL	racecallatomic<>(SB)
   255  	RET
   256  
   257  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0
   258  	JMP	sync∕atomic·SwapInt32(SB)
   259  
   260  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0
   261  	JMP	sync∕atomic·SwapInt64(SB)
   262  
   263  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0
   264  	JMP	sync∕atomic·SwapInt64(SB)
   265  
   266  // Add
   267  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-0
   268  	MOVQ	$__tsan_go_atomic32_fetch_add(SB), AX
   269  	CALL	racecallatomic<>(SB)
   270  	MOVL	add+8(FP), AX	// convert fetch_add to add_fetch
   271  	ADDL	AX, ret+16(FP)
   272  	RET
   273  
   274  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-0
   275  	MOVQ	$__tsan_go_atomic64_fetch_add(SB), AX
   276  	CALL	racecallatomic<>(SB)
   277  	MOVQ	add+8(FP), AX	// convert fetch_add to add_fetch
   278  	ADDQ	AX, ret+16(FP)
   279  	RET
   280  
   281  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-0
   282  	JMP	sync∕atomic·AddInt32(SB)
   283  
   284  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-0
   285  	JMP	sync∕atomic·AddInt64(SB)
   286  
   287  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0
   288  	JMP	sync∕atomic·AddInt64(SB)
   289  
   290  // CompareAndSwap
   291  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0
   292  	MOVQ	$__tsan_go_atomic32_compare_exchange(SB), AX
   293  	CALL	racecallatomic<>(SB)
   294  	RET
   295  
   296  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0
   297  	MOVQ	$__tsan_go_atomic64_compare_exchange(SB), AX
   298  	CALL	racecallatomic<>(SB)
   299  	RET
   300  
   301  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0
   302  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   303  
   304  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0
   305  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   306  
   307  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0
   308  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   309  
   310  // Generic atomic operation implementation.
   311  // AX already contains target function.
   312  TEXT	racecallatomic<>(SB), NOSPLIT, $0-0
   313  	// Trigger SIGSEGV early.
   314  	MOVQ	16(SP), R12
   315  	MOVL	(R12), R13
   316  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   317  	CMPQ	R12, runtime·racearenastart(SB)
   318  	JB	racecallatomic_data
   319  	CMPQ	R12, runtime·racearenaend(SB)
   320  	JB	racecallatomic_ok
   321  racecallatomic_data:
   322  	CMPQ	R12, runtime·racedatastart(SB)
   323  	JB	racecallatomic_ignore
   324  	CMPQ	R12, runtime·racedataend(SB)
   325  	JAE	racecallatomic_ignore
   326  racecallatomic_ok:
   327  	// Addr is within the good range, call the atomic function.
   328  	get_tls(R12)
   329  	MOVQ	g(R12), R14
   330  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   331  	MOVQ	8(SP), RARG1	// caller pc
   332  	MOVQ	(SP), RARG2	// pc
   333  	LEAQ	16(SP), RARG3	// arguments
   334  	JMP	racecall<>(SB)	// does not return
   335  racecallatomic_ignore:
   336  	// Addr is outside the good range.
   337  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   338  	// An attempt to synchronize on the address would cause crash.
   339  	MOVQ	AX, R15	// remember the original function
   340  	MOVQ	$__tsan_go_ignore_sync_begin(SB), AX
   341  	get_tls(R12)
   342  	MOVQ	g(R12), R14
   343  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   344  	CALL	racecall<>(SB)
   345  	MOVQ	R15, AX	// restore the original function
   346  	// Call the atomic function.
   347  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   348  	MOVQ	8(SP), RARG1	// caller pc
   349  	MOVQ	(SP), RARG2	// pc
   350  	LEAQ	16(SP), RARG3	// arguments
   351  	CALL	racecall<>(SB)
   352  	// Call __tsan_go_ignore_sync_end.
   353  	MOVQ	$__tsan_go_ignore_sync_end(SB), AX
   354  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   355  	JMP	racecall<>(SB)
   356  
   357  // void runtime·racecall(void(*f)(...), ...)
   358  // Calls C function f from race runtime and passes up to 4 arguments to it.
   359  // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   360  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   361  	MOVQ	fn+0(FP), AX
   362  	MOVQ	arg0+8(FP), RARG0
   363  	MOVQ	arg1+16(FP), RARG1
   364  	MOVQ	arg2+24(FP), RARG2
   365  	MOVQ	arg3+32(FP), RARG3
   366  	JMP	racecall<>(SB)
   367  
   368  // Switches SP to g0 stack and calls (AX). Arguments already set.
   369  TEXT	racecall<>(SB), NOSPLIT, $0-0
   370  	get_tls(R12)
   371  	MOVQ	g(R12), R14
   372  	MOVQ	g_m(R14), R13
   373  	// Switch to g0 stack.
   374  	MOVQ	SP, R12		// callee-saved, preserved across the CALL
   375  	MOVQ	m_g0(R13), R10
   376  	CMPQ	R10, R14
   377  	JE	call	// already on g0
   378  	MOVQ	(g_sched+gobuf_sp)(R10), SP
   379  call:
   380  	ANDQ	$~15, SP	// alignment for gcc ABI
   381  	CALL	AX
   382  	MOVQ	R12, SP
   383  	RET
   384  
   385  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   386  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   387  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   388  // RARG0 contains command code. RARG1 contains command-specific context.
   389  // See racecallback for command codes.
   390  TEXT	runtime·racecallbackthunk(SB), NOSPLIT, $56-8
   391  	// Handle command raceGetProcCmd (0) here.
   392  	// First, code below assumes that we are on curg, while raceGetProcCmd
   393  	// can be executed on g0. Second, it is called frequently, so will
   394  	// benefit from this fast path.
   395  	CMPQ	RARG0, $0
   396  	JNE	rest
   397  	get_tls(RARG0)
   398  	MOVQ	g(RARG0), RARG0
   399  	MOVQ	g_m(RARG0), RARG0
   400  	MOVQ	m_p(RARG0), RARG0
   401  	MOVQ	p_racectx(RARG0), RARG0
   402  	MOVQ	RARG0, (RARG1)
   403  	RET
   404  
   405  rest:
   406  	// Save callee-saved registers (Go code won't respect that).
   407  	// This is superset of darwin/linux/windows registers.
   408  	PUSHQ	BX
   409  	PUSHQ	BP
   410  	PUSHQ	DI
   411  	PUSHQ	SI
   412  	PUSHQ	R12
   413  	PUSHQ	R13
   414  	PUSHQ	R14
   415  	PUSHQ	R15
   416  	// Set g = g0.
   417  	get_tls(R12)
   418  	MOVQ	g(R12), R13
   419  	MOVQ	g_m(R13), R13
   420  	MOVQ	m_g0(R13), R14
   421  	MOVQ	R14, g(R12)	// g = m->g0
   422  	PUSHQ	RARG1	// func arg
   423  	PUSHQ	RARG0	// func arg
   424  	CALL	runtime·racecallback(SB)
   425  	POPQ	R12
   426  	POPQ	R12
   427  	// All registers are smashed after Go code, reload.
   428  	get_tls(R12)
   429  	MOVQ	g(R12), R13
   430  	MOVQ	g_m(R13), R13
   431  	MOVQ	m_curg(R13), R14
   432  	MOVQ	R14, g(R12)	// g = m->curg
   433  	// Restore callee-saved registers.
   434  	POPQ	R15
   435  	POPQ	R14
   436  	POPQ	R13
   437  	POPQ	R12
   438  	POPQ	SI
   439  	POPQ	DI
   440  	POPQ	BP
   441  	POPQ	BX
   442  	RET