github.com/aloncn/graphics-go@v0.0.1/src/runtime/race_amd64.s

github.com/aloncn/graphics-go@v0.0.1/src/runtime/race_amd64.s (about)

     1  // Copyright 2013 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build race
     6  
     7  #include "go_asm.h"
     8  #include "go_tls.h"
     9  #include "funcdata.h"
    10  #include "textflag.h"
    11  
    12  // The following thunks allow calling the gcc-compiled race runtime directly
    13  // from Go code without going all the way through cgo.
    14  // First, it's much faster (up to 50% speedup for real Go programs).
    15  // Second, it eliminates race-related special cases from cgocall and scheduler.
    16  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    17  
    18  // A brief recap of the amd64 calling convention.
    19  // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
    20  // Callee-saved registers are: BX, BP, R12-R15.
    21  // SP must be 16-byte aligned.
    22  // On Windows:
    23  // Arguments are passed in CX, DX, R8, R9, the rest is on stack.
    24  // Callee-saved registers are: BX, BP, DI, SI, R12-R15.
    25  // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
    26  // http://msdn.microsoft.com/en-us/library/ms235286.aspx
    27  // We do not do this, because it seems to be intended for vararg/unprototyped functions.
    28  // Gcc-compiled race runtime does not try to use that space.
    29  
    30  #ifdef GOOS_windows
    31  #define RARG0 CX
    32  #define RARG1 DX
    33  #define RARG2 R8
    34  #define RARG3 R9
    35  #else
    36  #define RARG0 DI
    37  #define RARG1 SI
    38  #define RARG2 DX
    39  #define RARG3 CX
    40  #endif
    41  
    42  // func runtime·raceread(addr uintptr)
    43  // Called from instrumented code.
    44  TEXT	runtime·raceread(SB), NOSPLIT, $0-8
    45  	MOVQ	addr+0(FP), RARG1
    46  	MOVQ	(SP), RARG2
    47  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    48  	MOVQ	$__tsan_read(SB), AX
    49  	JMP	racecalladdr<>(SB)
    50  
    51  // func runtime·RaceRead(addr uintptr)
    52  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    53  	// This needs to be a tail call, because raceread reads caller pc.
    54  	JMP	runtime·raceread(SB)
    55  
    56  // void runtime·racereadpc(void *addr, void *callpc, void *pc)
    57  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    58  	MOVQ	addr+0(FP), RARG1
    59  	MOVQ	callpc+8(FP), RARG2
    60  	MOVQ	pc+16(FP), RARG3
    61  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    62  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    63  	MOVQ	$__tsan_read_pc(SB), AX
    64  	JMP	racecalladdr<>(SB)
    65  
    66  // func runtime·racewrite(addr uintptr)
    67  // Called from instrumented code.
    68  TEXT	runtime·racewrite(SB), NOSPLIT, $0-8
    69  	MOVQ	addr+0(FP), RARG1
    70  	MOVQ	(SP), RARG2
    71  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    72  	MOVQ	$__tsan_write(SB), AX
    73  	JMP	racecalladdr<>(SB)
    74  
    75  // func runtime·RaceWrite(addr uintptr)
    76  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    77  	// This needs to be a tail call, because racewrite reads caller pc.
    78  	JMP	runtime·racewrite(SB)
    79  
    80  // void runtime·racewritepc(void *addr, void *callpc, void *pc)
    81  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    82  	MOVQ	addr+0(FP), RARG1
    83  	MOVQ	callpc+8(FP), RARG2
    84  	MOVQ	pc+16(FP), RARG3
    85  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    86  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    87  	MOVQ	$__tsan_write_pc(SB), AX
    88  	JMP	racecalladdr<>(SB)
    89  
    90  // func runtime·racereadrange(addr, size uintptr)
    91  // Called from instrumented code.
    92  TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
    93  	MOVQ	addr+0(FP), RARG1
    94  	MOVQ	size+8(FP), RARG2
    95  	MOVQ	(SP), RARG3
    96  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    97  	MOVQ	$__tsan_read_range(SB), AX
    98  	JMP	racecalladdr<>(SB)
    99  
   100  // func runtime·RaceReadRange(addr, size uintptr)
   101  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   102  	// This needs to be a tail call, because racereadrange reads caller pc.
   103  	JMP	runtime·racereadrange(SB)
   104  
   105  // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   106  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   107  	MOVQ	addr+0(FP), RARG1
   108  	MOVQ	size+8(FP), RARG2
   109  	MOVQ	pc+16(FP), RARG3
   110  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   111  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   112  	MOVQ	$__tsan_read_range(SB), AX
   113  	JMP	racecalladdr<>(SB)
   114  
   115  // func runtime·racewriterange(addr, size uintptr)
   116  // Called from instrumented code.
   117  TEXT	runtime·racewriterange(SB), NOSPLIT, $0-16
   118  	MOVQ	addr+0(FP), RARG1
   119  	MOVQ	size+8(FP), RARG2
   120  	MOVQ	(SP), RARG3
   121  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   122  	MOVQ	$__tsan_write_range(SB), AX
   123  	JMP	racecalladdr<>(SB)
   124  
   125  // func runtime·RaceWriteRange(addr, size uintptr)
   126  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   127  	// This needs to be a tail call, because racewriterange reads caller pc.
   128  	JMP	runtime·racewriterange(SB)
   129  
   130  // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   131  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   132  	MOVQ	addr+0(FP), RARG1
   133  	MOVQ	size+8(FP), RARG2
   134  	MOVQ	pc+16(FP), RARG3
   135  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   136  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   137  	MOVQ	$__tsan_write_range(SB), AX
   138  	JMP	racecalladdr<>(SB)
   139  
   140  // If addr (RARG1) is out of range, do nothing.
   141  // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   142  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   143  	get_tls(R12)
   144  	MOVQ	g(R12), R14
   145  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   146  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   147  	CMPQ	RARG1, runtime·racearenastart(SB)
   148  	JB	data
   149  	CMPQ	RARG1, runtime·racearenaend(SB)
   150  	JB	call
   151  data:
   152  	CMPQ	RARG1, runtime·racedatastart(SB)
   153  	JB	ret
   154  	CMPQ	RARG1, runtime·racedataend(SB)
   155  	JAE	ret
   156  call:
   157  	MOVQ	AX, AX		// w/o this 6a miscompiles this function
   158  	JMP	racecall<>(SB)
   159  ret:
   160  	RET
   161  
   162  // func runtime·racefuncenter(pc uintptr)
   163  // Called from instrumented code.
   164  TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   165  	MOVQ	DX, R15		// save function entry context (for closures)
   166  	get_tls(R12)
   167  	MOVQ	g(R12), R14
   168  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   169  	MOVQ	callpc+0(FP), RARG1
   170  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   171  	MOVQ	$__tsan_func_enter(SB), AX
   172  	// racecall<> preserves R15
   173  	CALL	racecall<>(SB)
   174  	MOVQ	R15, DX	// restore function entry context
   175  	RET
   176  
   177  // func runtime·racefuncexit()
   178  // Called from instrumented code.
   179  TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   180  	get_tls(R12)
   181  	MOVQ	g(R12), R14
   182  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   183  	// void __tsan_func_exit(ThreadState *thr);
   184  	MOVQ	$__tsan_func_exit(SB), AX
   185  	JMP	racecall<>(SB)
   186  
   187  // Atomic operations for sync/atomic package.
   188  
   189  // Load
   190  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0
   191  	MOVQ	$__tsan_go_atomic32_load(SB), AX
   192  	CALL	racecallatomic<>(SB)
   193  	RET
   194  
   195  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0
   196  	MOVQ	$__tsan_go_atomic64_load(SB), AX
   197  	CALL	racecallatomic<>(SB)
   198  	RET
   199  
   200  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0
   201  	JMP	sync∕atomic·LoadInt32(SB)
   202  
   203  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0
   204  	JMP	sync∕atomic·LoadInt64(SB)
   205  
   206  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0
   207  	JMP	sync∕atomic·LoadInt64(SB)
   208  
   209  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0
   210  	JMP	sync∕atomic·LoadInt64(SB)
   211  
   212  // Store
   213  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0
   214  	MOVQ	$__tsan_go_atomic32_store(SB), AX
   215  	CALL	racecallatomic<>(SB)
   216  	RET
   217  
   218  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0
   219  	MOVQ	$__tsan_go_atomic64_store(SB), AX
   220  	CALL	racecallatomic<>(SB)
   221  	RET
   222  
   223  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0
   224  	JMP	sync∕atomic·StoreInt32(SB)
   225  
   226  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0
   227  	JMP	sync∕atomic·StoreInt64(SB)
   228  
   229  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0
   230  	JMP	sync∕atomic·StoreInt64(SB)
   231  
   232  // Swap
   233  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0
   234  	MOVQ	$__tsan_go_atomic32_exchange(SB), AX
   235  	CALL	racecallatomic<>(SB)
   236  	RET
   237  
   238  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0
   239  	MOVQ	$__tsan_go_atomic64_exchange(SB), AX
   240  	CALL	racecallatomic<>(SB)
   241  	RET
   242  
   243  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0
   244  	JMP	sync∕atomic·SwapInt32(SB)
   245  
   246  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0
   247  	JMP	sync∕atomic·SwapInt64(SB)
   248  
   249  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0
   250  	JMP	sync∕atomic·SwapInt64(SB)
   251  
   252  // Add
   253  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-0
   254  	MOVQ	$__tsan_go_atomic32_fetch_add(SB), AX
   255  	CALL	racecallatomic<>(SB)
   256  	MOVL	add+8(FP), AX	// convert fetch_add to add_fetch
   257  	ADDL	AX, ret+16(FP)
   258  	RET
   259  
   260  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-0
   261  	MOVQ	$__tsan_go_atomic64_fetch_add(SB), AX
   262  	CALL	racecallatomic<>(SB)
   263  	MOVQ	add+8(FP), AX	// convert fetch_add to add_fetch
   264  	ADDQ	AX, ret+16(FP)
   265  	RET
   266  
   267  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-0
   268  	JMP	sync∕atomic·AddInt32(SB)
   269  
   270  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-0
   271  	JMP	sync∕atomic·AddInt64(SB)
   272  
   273  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0
   274  	JMP	sync∕atomic·AddInt64(SB)
   275  
   276  // CompareAndSwap
   277  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0
   278  	MOVQ	$__tsan_go_atomic32_compare_exchange(SB), AX
   279  	CALL	racecallatomic<>(SB)
   280  	RET
   281  
   282  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0
   283  	MOVQ	$__tsan_go_atomic64_compare_exchange(SB), AX
   284  	CALL	racecallatomic<>(SB)
   285  	RET
   286  
   287  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0
   288  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   289  
   290  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0
   291  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   292  
   293  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0
   294  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   295  
   296  // Generic atomic operation implementation.
   297  // AX already contains target function.
   298  TEXT	racecallatomic<>(SB), NOSPLIT, $0-0
   299  	// Trigger SIGSEGV early.
   300  	MOVQ	16(SP), R12
   301  	MOVL	(R12), R13
   302  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   303  	CMPQ	R12, runtime·racearenastart(SB)
   304  	JB	racecallatomic_data
   305  	CMPQ	R12, runtime·racearenaend(SB)
   306  	JB	racecallatomic_ok
   307  racecallatomic_data:
   308  	CMPQ	R12, runtime·racedatastart(SB)
   309  	JB	racecallatomic_ignore
   310  	CMPQ	R12, runtime·racedataend(SB)
   311  	JAE	racecallatomic_ignore
   312  racecallatomic_ok:
   313  	// Addr is within the good range, call the atomic function.
   314  	get_tls(R12)
   315  	MOVQ	g(R12), R14
   316  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   317  	MOVQ	8(SP), RARG1	// caller pc
   318  	MOVQ	(SP), RARG2	// pc
   319  	LEAQ	16(SP), RARG3	// arguments
   320  	JMP	racecall<>(SB)	// does not return
   321  racecallatomic_ignore:
   322  	// Addr is outside the good range.
   323  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   324  	// An attempt to synchronize on the address would cause crash.
   325  	MOVQ	AX, R15	// remember the original function
   326  	MOVQ	$__tsan_go_ignore_sync_begin(SB), AX
   327  	MOVQ	g(R12), R14
   328  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   329  	CALL	racecall<>(SB)
   330  	MOVQ	R15, AX	// restore the original function
   331  	// Call the atomic function.
   332  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   333  	MOVQ	8(SP), RARG1	// caller pc
   334  	MOVQ	(SP), RARG2	// pc
   335  	LEAQ	16(SP), RARG3	// arguments
   336  	CALL	racecall<>(SB)
   337  	// Call __tsan_go_ignore_sync_end.
   338  	MOVQ	$__tsan_go_ignore_sync_end(SB), AX
   339  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   340  	JMP	racecall<>(SB)
   341  
   342  // void runtime·racecall(void(*f)(...), ...)
   343  // Calls C function f from race runtime and passes up to 4 arguments to it.
   344  // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   345  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   346  	MOVQ	fn+0(FP), AX
   347  	MOVQ	arg0+8(FP), RARG0
   348  	MOVQ	arg1+16(FP), RARG1
   349  	MOVQ	arg2+24(FP), RARG2
   350  	MOVQ	arg3+32(FP), RARG3
   351  	JMP	racecall<>(SB)
   352  
   353  // Switches SP to g0 stack and calls (AX). Arguments already set.
   354  TEXT	racecall<>(SB), NOSPLIT, $0-0
   355  	get_tls(R12)
   356  	MOVQ	g(R12), R14
   357  	MOVQ	g_m(R14), R13
   358  	// Switch to g0 stack.
   359  	MOVQ	SP, R12		// callee-saved, preserved across the CALL
   360  	MOVQ	m_g0(R13), R10
   361  	CMPQ	R10, R14
   362  	JE	call	// already on g0
   363  	MOVQ	(g_sched+gobuf_sp)(R10), SP
   364  call:
   365  	ANDQ	$~15, SP	// alignment for gcc ABI
   366  	CALL	AX
   367  	MOVQ	R12, SP
   368  	RET
   369  
   370  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   371  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   372  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   373  TEXT	runtime·racesymbolizethunk(SB), NOSPLIT, $56-8
   374  	// Save callee-saved registers (Go code won't respect that).
   375  	// This is superset of darwin/linux/windows registers.
   376  	PUSHQ	BX
   377  	PUSHQ	BP
   378  	PUSHQ	DI
   379  	PUSHQ	SI
   380  	PUSHQ	R12
   381  	PUSHQ	R13
   382  	PUSHQ	R14
   383  	PUSHQ	R15
   384  	// Set g = g0.
   385  	get_tls(R12)
   386  	MOVQ	g(R12), R13
   387  	MOVQ	g_m(R13), R13
   388  	MOVQ	m_g0(R13), R14
   389  	MOVQ	R14, g(R12)	// g = m->g0
   390  	PUSHQ	RARG0	// func arg
   391  	CALL	runtime·racesymbolize(SB)
   392  	POPQ	R12
   393  	// All registers are smashed after Go code, reload.
   394  	get_tls(R12)
   395  	MOVQ	g(R12), R13
   396  	MOVQ	g_m(R13), R13
   397  	MOVQ	m_curg(R13), R14
   398  	MOVQ	R14, g(R12)	// g = m->curg
   399  	// Restore callee-saved registers.
   400  	POPQ	R15
   401  	POPQ	R14
   402  	POPQ	R13
   403  	POPQ	R12
   404  	POPQ	SI
   405  	POPQ	DI
   406  	POPQ	BP
   407  	POPQ	BX
   408  	RET