github.com/reiver/go@v0.0.0-20150109200633-1d0c7792f172/src/runtime/race_amd64.s

github.com/reiver/go@v0.0.0-20150109200633-1d0c7792f172/src/runtime/race_amd64.s (about)

     1  // Copyright 2013 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build race
     6  
     7  #include "go_asm.h"
     8  #include "go_tls.h"
     9  #include "funcdata.h"
    10  #include "textflag.h"
    11  
    12  // The following thunks allow calling the gcc-compiled race runtime directly
    13  // from Go code without going all the way through cgo.
    14  // First, it's much faster (up to 50% speedup for real Go programs).
    15  // Second, it eliminates race-related special cases from cgocall and scheduler.
    16  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    17  
    18  // A brief recap of the amd64 calling convention.
    19  // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
    20  // Callee-saved registers are: BX, BP, R12-R15.
    21  // SP must be 16-byte aligned.
    22  // On Windows:
    23  // Arguments are passed in CX, DX, R8, R9, the rest is on stack.
    24  // Callee-saved registers are: BX, BP, DI, SI, R12-R15.
    25  // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
    26  // http://msdn.microsoft.com/en-us/library/ms235286.aspx
    27  // We do not do this, because it seems to be intended for vararg/unprototyped functions.
    28  // Gcc-compiled race runtime does not try to use that space.
    29  
    30  #ifdef GOOS_windows
    31  #define RARG0 CX
    32  #define RARG1 DX
    33  #define RARG2 R8
    34  #define RARG3 R9
    35  #else
    36  #define RARG0 DI
    37  #define RARG1 SI
    38  #define RARG2 DX
    39  #define RARG3 CX
    40  #endif
    41  
    42  // func runtime·raceread(addr uintptr)
    43  // Called from instrumented code.
    44  TEXT	runtime·raceread(SB), NOSPLIT, $0-8
    45  	MOVQ	addr+0(FP), RARG1
    46  	MOVQ	(SP), RARG2
    47  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    48  	MOVQ	$__tsan_read(SB), AX
    49  	JMP	racecalladdr<>(SB)
    50  
    51  // func runtime·RaceRead(addr uintptr)
    52  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    53  	// This needs to be a tail call, because raceread reads caller pc.
    54  	JMP	runtime·raceread(SB)
    55  
    56  // void runtime·racereadpc(void *addr, void *callpc, void *pc)
    57  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    58  	MOVQ	addr+0(FP), RARG1
    59  	MOVQ	callpc+8(FP), RARG2
    60  	MOVQ	pc+16(FP), RARG3
    61  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    62  	MOVQ	$__tsan_read_pc(SB), AX
    63  	JMP	racecalladdr<>(SB)
    64  
    65  // func runtime·racewrite(addr uintptr)
    66  // Called from instrumented code.
    67  TEXT	runtime·racewrite(SB), NOSPLIT, $0-8
    68  	MOVQ	addr+0(FP), RARG1
    69  	MOVQ	(SP), RARG2
    70  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    71  	MOVQ	$__tsan_write(SB), AX
    72  	JMP	racecalladdr<>(SB)
    73  
    74  // func runtime·RaceWrite(addr uintptr)
    75  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    76  	// This needs to be a tail call, because racewrite reads caller pc.
    77  	JMP	runtime·racewrite(SB)
    78  
    79  // void runtime·racewritepc(void *addr, void *callpc, void *pc)
    80  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    81  	MOVQ	addr+0(FP), RARG1
    82  	MOVQ	callpc+8(FP), RARG2
    83  	MOVQ	pc+16(FP), RARG3
    84  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    85  	MOVQ	$__tsan_write_pc(SB), AX
    86  	JMP	racecalladdr<>(SB)
    87  
    88  // func runtime·racereadrange(addr, size uintptr)
    89  // Called from instrumented code.
    90  TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
    91  	MOVQ	addr+0(FP), RARG1
    92  	MOVQ	size+8(FP), RARG2
    93  	MOVQ	(SP), RARG3
    94  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    95  	MOVQ	$__tsan_read_range(SB), AX
    96  	JMP	racecalladdr<>(SB)
    97  
    98  // func runtime·RaceReadRange(addr, size uintptr)
    99  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   100  	// This needs to be a tail call, because racereadrange reads caller pc.
   101  	JMP	runtime·racereadrange(SB)
   102  
   103  // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   104  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   105  	MOVQ	addr+0(FP), RARG1
   106  	MOVQ	size+8(FP), RARG2
   107  	MOVQ	pc+16(FP), RARG3
   108  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   109  	MOVQ	$__tsan_read_range(SB), AX
   110  	JMP	racecalladdr<>(SB)
   111  
   112  // func runtime·racewriterange(addr, size uintptr)
   113  // Called from instrumented code.
   114  TEXT	runtime·racewriterange(SB), NOSPLIT, $0-16
   115  	MOVQ	addr+0(FP), RARG1
   116  	MOVQ	size+8(FP), RARG2
   117  	MOVQ	(SP), RARG3
   118  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   119  	MOVQ	$__tsan_write_range(SB), AX
   120  	JMP	racecalladdr<>(SB)
   121  
   122  // func runtime·RaceWriteRange(addr, size uintptr)
   123  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   124  	// This needs to be a tail call, because racewriterange reads caller pc.
   125  	JMP	runtime·racewriterange(SB)
   126  
   127  // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   128  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   129  	MOVQ	addr+0(FP), RARG1
   130  	MOVQ	size+8(FP), RARG2
   131  	MOVQ	pc+16(FP), RARG3
   132  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   133  	MOVQ	$__tsan_write_range(SB), AX
   134  	JMP	racecalladdr<>(SB)
   135  
   136  // If addr (RARG1) is out of range, do nothing.
   137  // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   138  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   139  	get_tls(R12)
   140  	MOVQ	g(R12), R14
   141  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   142  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   143  	CMPQ	RARG1, runtime·racearenastart(SB)
   144  	JB	data
   145  	CMPQ	RARG1, runtime·racearenaend(SB)
   146  	JB	call
   147  data:
   148  	CMPQ	RARG1, runtime·racedatastart(SB)
   149  	JB	ret
   150  	CMPQ	RARG1, runtime·racedataend(SB)
   151  	JAE	ret
   152  call:
   153  	MOVQ	AX, AX		// w/o this 6a miscompiles this function
   154  	JMP	racecall<>(SB)
   155  ret:
   156  	RET
   157  
   158  // func runtime·racefuncenter(pc uintptr)
   159  // Called from instrumented code.
   160  TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   161  	MOVQ	DX, R15		// save function entry context (for closures)
   162  	get_tls(R12)
   163  	MOVQ	g(R12), R14
   164  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   165  	MOVQ	callpc+0(FP), RARG1
   166  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   167  	MOVQ	$__tsan_func_enter(SB), AX
   168  	// racecall<> preserves R15
   169  	CALL	racecall<>(SB)
   170  	MOVQ	R15, DX	// restore function entry context
   171  	RET
   172  
   173  // func runtime·racefuncexit()
   174  // Called from instrumented code.
   175  TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   176  	get_tls(R12)
   177  	MOVQ	g(R12), R14
   178  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   179  	// void __tsan_func_exit(ThreadState *thr);
   180  	MOVQ	$__tsan_func_exit(SB), AX
   181  	JMP	racecall<>(SB)
   182  
   183  // Atomic operations for sync/atomic package.
   184  
   185  // Load
   186  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0
   187  	MOVQ	$__tsan_go_atomic32_load(SB), AX
   188  	CALL	racecallatomic<>(SB)
   189  	RET
   190  
   191  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0
   192  	MOVQ	$__tsan_go_atomic64_load(SB), AX
   193  	CALL	racecallatomic<>(SB)
   194  	RET
   195  
   196  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0
   197  	JMP	sync∕atomic·LoadInt32(SB)
   198  
   199  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0
   200  	JMP	sync∕atomic·LoadInt64(SB)
   201  
   202  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0
   203  	JMP	sync∕atomic·LoadInt64(SB)
   204  
   205  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0
   206  	JMP	sync∕atomic·LoadInt64(SB)
   207  
   208  // Store
   209  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0
   210  	MOVQ	$__tsan_go_atomic32_store(SB), AX
   211  	CALL	racecallatomic<>(SB)
   212  	RET
   213  
   214  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0
   215  	MOVQ	$__tsan_go_atomic64_store(SB), AX
   216  	CALL	racecallatomic<>(SB)
   217  	RET
   218  
   219  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0
   220  	JMP	sync∕atomic·StoreInt32(SB)
   221  
   222  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0
   223  	JMP	sync∕atomic·StoreInt64(SB)
   224  
   225  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0
   226  	JMP	sync∕atomic·StoreInt64(SB)
   227  
   228  // Swap
   229  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0
   230  	MOVQ	$__tsan_go_atomic32_exchange(SB), AX
   231  	CALL	racecallatomic<>(SB)
   232  	RET
   233  
   234  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0
   235  	MOVQ	$__tsan_go_atomic64_exchange(SB), AX
   236  	CALL	racecallatomic<>(SB)
   237  	RET
   238  
   239  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0
   240  	JMP	sync∕atomic·SwapInt32(SB)
   241  
   242  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0
   243  	JMP	sync∕atomic·SwapInt64(SB)
   244  
   245  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0
   246  	JMP	sync∕atomic·SwapInt64(SB)
   247  
   248  // Add
   249  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-0
   250  	MOVQ	$__tsan_go_atomic32_fetch_add(SB), AX
   251  	CALL	racecallatomic<>(SB)
   252  	MOVL	add+8(FP), AX	// convert fetch_add to add_fetch
   253  	ADDL	AX, ret+16(FP)
   254  	RET
   255  
   256  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-0
   257  	MOVQ	$__tsan_go_atomic64_fetch_add(SB), AX
   258  	CALL	racecallatomic<>(SB)
   259  	MOVQ	add+8(FP), AX	// convert fetch_add to add_fetch
   260  	ADDQ	AX, ret+16(FP)
   261  	RET
   262  
   263  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-0
   264  	JMP	sync∕atomic·AddInt32(SB)
   265  
   266  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-0
   267  	JMP	sync∕atomic·AddInt64(SB)
   268  
   269  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0
   270  	JMP	sync∕atomic·AddInt64(SB)
   271  
   272  // CompareAndSwap
   273  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0
   274  	MOVQ	$__tsan_go_atomic32_compare_exchange(SB), AX
   275  	CALL	racecallatomic<>(SB)
   276  	RET
   277  
   278  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0
   279  	MOVQ	$__tsan_go_atomic64_compare_exchange(SB), AX
   280  	CALL	racecallatomic<>(SB)
   281  	RET
   282  
   283  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0
   284  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   285  
   286  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0
   287  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   288  
   289  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0
   290  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   291  
   292  // Generic atomic operation implementation.
   293  // AX already contains target function.
   294  TEXT	racecallatomic<>(SB), NOSPLIT, $0-0
   295  	// Trigger SIGSEGV early.
   296  	MOVQ	16(SP), R12
   297  	MOVL	(R12), R13
   298  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   299  	CMPQ	R12, runtime·racearenastart(SB)
   300  	JB	racecallatomic_data
   301  	CMPQ	R12, runtime·racearenaend(SB)
   302  	JB	racecallatomic_ok
   303  racecallatomic_data:
   304  	CMPQ	R12, runtime·racedatastart(SB)
   305  	JB	racecallatomic_ignore
   306  	CMPQ	R12, runtime·racedataend(SB)
   307  	JAE	racecallatomic_ignore
   308  racecallatomic_ok:
   309  	// Addr is within the good range, call the atomic function.
   310  	get_tls(R12)
   311  	MOVQ	g(R12), R14
   312  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   313  	MOVQ	8(SP), RARG1	// caller pc
   314  	MOVQ	(SP), RARG2	// pc
   315  	LEAQ	16(SP), RARG3	// arguments
   316  	JMP	racecall<>(SB)	// does not return
   317  racecallatomic_ignore:
   318  	// Addr is outside the good range.
   319  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   320  	// An attempt to synchronize on the address would cause crash.
   321  	MOVQ	AX, R15	// remember the original function
   322  	MOVQ	$__tsan_go_ignore_sync_begin(SB), AX
   323  	MOVQ	g(R12), R14
   324  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   325  	CALL	racecall<>(SB)
   326  	MOVQ	R15, AX	// restore the original function
   327  	// Call the atomic function.
   328  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   329  	MOVQ	8(SP), RARG1	// caller pc
   330  	MOVQ	(SP), RARG2	// pc
   331  	LEAQ	16(SP), RARG3	// arguments
   332  	CALL	racecall<>(SB)
   333  	// Call __tsan_go_ignore_sync_end.
   334  	MOVQ	$__tsan_go_ignore_sync_end(SB), AX
   335  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   336  	JMP	racecall<>(SB)
   337  
   338  // void runtime·racecall(void(*f)(...), ...)
   339  // Calls C function f from race runtime and passes up to 4 arguments to it.
   340  // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   341  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   342  	MOVQ	fn+0(FP), AX
   343  	MOVQ	arg0+8(FP), RARG0
   344  	MOVQ	arg1+16(FP), RARG1
   345  	MOVQ	arg2+24(FP), RARG2
   346  	MOVQ	arg3+32(FP), RARG3
   347  	JMP	racecall<>(SB)
   348  
   349  // Switches SP to g0 stack and calls (AX). Arguments already set.
   350  TEXT	racecall<>(SB), NOSPLIT, $0-0
   351  	get_tls(R12)
   352  	MOVQ	g(R12), R14
   353  	MOVQ	g_m(R14), R13
   354  	// Switch to g0 stack.
   355  	MOVQ	SP, R12		// callee-saved, preserved across the CALL
   356  	MOVQ	m_g0(R13), R10
   357  	CMPQ	R10, R14
   358  	JE	call	// already on g0
   359  	MOVQ	(g_sched+gobuf_sp)(R10), SP
   360  call:
   361  	ANDQ	$~15, SP	// alignment for gcc ABI
   362  	CALL	AX
   363  	MOVQ	R12, SP
   364  	RET
   365  
   366  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   367  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   368  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   369  TEXT	runtime·racesymbolizethunk(SB), NOSPLIT, $56-8
   370  	// Save callee-saved registers (Go code won't respect that).
   371  	// This is superset of darwin/linux/windows registers.
   372  	PUSHQ	BX
   373  	PUSHQ	BP
   374  	PUSHQ	DI
   375  	PUSHQ	SI
   376  	PUSHQ	R12
   377  	PUSHQ	R13
   378  	PUSHQ	R14
   379  	PUSHQ	R15
   380  	// Set g = g0.
   381  	get_tls(R12)
   382  	MOVQ	g(R12), R13
   383  	MOVQ	g_m(R13), R13
   384  	MOVQ	m_g0(R13), R14
   385  	MOVQ	R14, g(R12)	// g = m->g0
   386  	MOVQ	RARG0, 0(SP)	// func arg
   387  	CALL	runtime·racesymbolize(SB)
   388  	// All registers are smashed after Go code, reload.
   389  	get_tls(R12)
   390  	MOVQ	g(R12), R13
   391  	MOVQ	g_m(R13), R13
   392  	MOVQ	m_curg(R13), R14
   393  	MOVQ	R14, g(R12)	// g = m->curg
   394  	// Restore callee-saved registers.
   395  	POPQ	R15
   396  	POPQ	R14
   397  	POPQ	R13
   398  	POPQ	R12
   399  	POPQ	SI
   400  	POPQ	DI
   401  	POPQ	BP
   402  	POPQ	BX
   403  	RET