github.com/lzhfromustc/gofuzz@v0.0.0-20211116160056-151b3108bbd1/runtime/race_amd64.s (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build race
     6  
     7  #include "go_asm.h"
     8  #include "go_tls.h"
     9  #include "funcdata.h"
    10  #include "textflag.h"
    11  
    12  // The following thunks allow calling the gcc-compiled race runtime directly
    13  // from Go code without going all the way through cgo.
    14  // First, it's much faster (up to 50% speedup for real Go programs).
    15  // Second, it eliminates race-related special cases from cgocall and scheduler.
    16  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    17  
    18  // A brief recap of the amd64 calling convention.
    19  // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
    20  // Callee-saved registers are: BX, BP, R12-R15.
    21  // SP must be 16-byte aligned.
    22  // On Windows:
    23  // Arguments are passed in CX, DX, R8, R9, the rest is on stack.
    24  // Callee-saved registers are: BX, BP, DI, SI, R12-R15.
    25  // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
    26  // https://msdn.microsoft.com/en-us/library/ms235286.aspx
    27  // We do not do this, because it seems to be intended for vararg/unprototyped functions.
    28  // Gcc-compiled race runtime does not try to use that space.
    29  
    30  #ifdef GOOS_windows
    31  #define RARG0 CX
    32  #define RARG1 DX
    33  #define RARG2 R8
    34  #define RARG3 R9
    35  #else
    36  #define RARG0 DI
    37  #define RARG1 SI
    38  #define RARG2 DX
    39  #define RARG3 CX
    40  #endif
    41  
    42  // func runtime·raceread(addr uintptr)
    43  // Called from instrumented code.
    44  // Defined as ABIInternal so as to avoid introducing a wrapper,
    45  // which would render runtime.getcallerpc ineffective.
    46  TEXT	runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
    47  	MOVQ	addr+0(FP), RARG1
    48  	MOVQ	(SP), RARG2
    49  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    50  	MOVQ	$__tsan_read(SB), AX
    51  	JMP	racecalladdr<>(SB)
    52  
    53  // func runtime·RaceRead(addr uintptr)
    54  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    55  	// This needs to be a tail call, because raceread reads caller pc.
    56  	JMP	runtime·raceread(SB)
    57  
    58  // void runtime·racereadpc(void *addr, void *callpc, void *pc)
    59  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    60  	MOVQ	addr+0(FP), RARG1
    61  	MOVQ	callpc+8(FP), RARG2
    62  	MOVQ	pc+16(FP), RARG3
    63  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    64  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    65  	MOVQ	$__tsan_read_pc(SB), AX
    66  	JMP	racecalladdr<>(SB)
    67  
    68  // func runtime·racewrite(addr uintptr)
    69  // Called from instrumented code.
    70  // Defined as ABIInternal so as to avoid introducing a wrapper,
    71  // which would render runtime.getcallerpc ineffective.
    72  TEXT	runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
    73  	MOVQ	addr+0(FP), RARG1
    74  	MOVQ	(SP), RARG2
    75  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    76  	MOVQ	$__tsan_write(SB), AX
    77  	JMP	racecalladdr<>(SB)
    78  
    79  // func runtime·RaceWrite(addr uintptr)
    80  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    81  	// This needs to be a tail call, because racewrite reads caller pc.
    82  	JMP	runtime·racewrite(SB)
    83  
    84  // void runtime·racewritepc(void *addr, void *callpc, void *pc)
    85  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    86  	MOVQ	addr+0(FP), RARG1
    87  	MOVQ	callpc+8(FP), RARG2
    88  	MOVQ	pc+16(FP), RARG3
    89  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    90  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    91  	MOVQ	$__tsan_write_pc(SB), AX
    92  	JMP	racecalladdr<>(SB)
    93  
    94  // func runtime·racereadrange(addr, size uintptr)
    95  // Called from instrumented code.
    96  TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
    97  	MOVQ	addr+0(FP), RARG1
    98  	MOVQ	size+8(FP), RARG2
    99  	MOVQ	(SP), RARG3
   100  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   101  	MOVQ	$__tsan_read_range(SB), AX
   102  	JMP	racecalladdr<>(SB)
   103  
   104  // func runtime·RaceReadRange(addr, size uintptr)
   105  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   106  	// This needs to be a tail call, because racereadrange reads caller pc.
   107  	JMP	runtime·racereadrange(SB)
   108  
   109  // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   110  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   111  	MOVQ	addr+0(FP), RARG1
   112  	MOVQ	size+8(FP), RARG2
   113  	MOVQ	pc+16(FP), RARG3
   114  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   115  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   116  	MOVQ	$__tsan_read_range(SB), AX
   117  	JMP	racecalladdr<>(SB)
   118  
   119  // func runtime·racewriterange(addr, size uintptr)
   120  // Called from instrumented code.
   121  // Defined as ABIInternal so as to avoid introducing a wrapper,
   122  // which would render runtime.getcallerpc ineffective.
   123  TEXT	runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
   124  	MOVQ	addr+0(FP), RARG1
   125  	MOVQ	size+8(FP), RARG2
   126  	MOVQ	(SP), RARG3
   127  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   128  	MOVQ	$__tsan_write_range(SB), AX
   129  	JMP	racecalladdr<>(SB)
   130  
   131  // func runtime·RaceWriteRange(addr, size uintptr)
   132  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   133  	// This needs to be a tail call, because racewriterange reads caller pc.
   134  	JMP	runtime·racewriterange(SB)
   135  
   136  // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   137  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   138  	MOVQ	addr+0(FP), RARG1
   139  	MOVQ	size+8(FP), RARG2
   140  	MOVQ	pc+16(FP), RARG3
   141  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   142  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   143  	MOVQ	$__tsan_write_range(SB), AX
   144  	JMP	racecalladdr<>(SB)
   145  
   146  // If addr (RARG1) is out of range, do nothing.
   147  // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   148  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   149  	get_tls(R12)
   150  	MOVQ	g(R12), R14
   151  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   152  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   153  	CMPQ	RARG1, runtime·racearenastart(SB)
   154  	JB	data
   155  	CMPQ	RARG1, runtime·racearenaend(SB)
   156  	JB	call
   157  data:
   158  	CMPQ	RARG1, runtime·racedatastart(SB)
   159  	JB	ret
   160  	CMPQ	RARG1, runtime·racedataend(SB)
   161  	JAE	ret
   162  call:
   163  	MOVQ	AX, AX		// w/o this 6a miscompiles this function
   164  	JMP	racecall<>(SB)
   165  ret:
   166  	RET
   167  
   168  // func runtime·racefuncenterfp(fp uintptr)
   169  // Called from instrumented code.
   170  // Like racefuncenter but passes FP, not PC
   171  TEXT	runtime·racefuncenterfp(SB), NOSPLIT, $0-8
   172  	MOVQ	fp+0(FP), R11
   173  	MOVQ	-8(R11), R11
   174  	JMP	racefuncenter<>(SB)
   175  
   176  // func runtime·racefuncenter(pc uintptr)
   177  // Called from instrumented code.
   178  TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   179  	MOVQ	callpc+0(FP), R11
   180  	JMP	racefuncenter<>(SB)
   181  
   182  // Common code for racefuncenter/racefuncenterfp
   183  // R11 = caller's return address
   184  TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   185  	MOVQ	DX, R15		// save function entry context (for closures)
   186  	get_tls(R12)
   187  	MOVQ	g(R12), R14
   188  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   189  	MOVQ	R11, RARG1
   190  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   191  	MOVQ	$__tsan_func_enter(SB), AX
   192  	// racecall<> preserves R15
   193  	CALL	racecall<>(SB)
   194  	MOVQ	R15, DX	// restore function entry context
   195  	RET
   196  
   197  // func runtime·racefuncexit()
   198  // Called from instrumented code.
   199  TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   200  	get_tls(R12)
   201  	MOVQ	g(R12), R14
   202  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   203  	// void __tsan_func_exit(ThreadState *thr);
   204  	MOVQ	$__tsan_func_exit(SB), AX
   205  	JMP	racecall<>(SB)
   206  
   207  // Atomic operations for sync/atomic package.
   208  
   209  // Load
   210  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
   211  	GO_ARGS
   212  	MOVQ	$__tsan_go_atomic32_load(SB), AX
   213  	CALL	racecallatomic<>(SB)
   214  	RET
   215  
   216  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
   217  	GO_ARGS
   218  	MOVQ	$__tsan_go_atomic64_load(SB), AX
   219  	CALL	racecallatomic<>(SB)
   220  	RET
   221  
   222  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
   223  	GO_ARGS
   224  	JMP	sync∕atomic·LoadInt32(SB)
   225  
   226  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
   227  	GO_ARGS
   228  	JMP	sync∕atomic·LoadInt64(SB)
   229  
   230  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
   231  	GO_ARGS
   232  	JMP	sync∕atomic·LoadInt64(SB)
   233  
   234  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
   235  	GO_ARGS
   236  	JMP	sync∕atomic·LoadInt64(SB)
   237  
   238  // Store
   239  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
   240  	GO_ARGS
   241  	MOVQ	$__tsan_go_atomic32_store(SB), AX
   242  	CALL	racecallatomic<>(SB)
   243  	RET
   244  
   245  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
   246  	GO_ARGS
   247  	MOVQ	$__tsan_go_atomic64_store(SB), AX
   248  	CALL	racecallatomic<>(SB)
   249  	RET
   250  
   251  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
   252  	GO_ARGS
   253  	JMP	sync∕atomic·StoreInt32(SB)
   254  
   255  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
   256  	GO_ARGS
   257  	JMP	sync∕atomic·StoreInt64(SB)
   258  
   259  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
   260  	GO_ARGS
   261  	JMP	sync∕atomic·StoreInt64(SB)
   262  
   263  // Swap
   264  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
   265  	GO_ARGS
   266  	MOVQ	$__tsan_go_atomic32_exchange(SB), AX
   267  	CALL	racecallatomic<>(SB)
   268  	RET
   269  
   270  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
   271  	GO_ARGS
   272  	MOVQ	$__tsan_go_atomic64_exchange(SB), AX
   273  	CALL	racecallatomic<>(SB)
   274  	RET
   275  
   276  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
   277  	GO_ARGS
   278  	JMP	sync∕atomic·SwapInt32(SB)
   279  
   280  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
   281  	GO_ARGS
   282  	JMP	sync∕atomic·SwapInt64(SB)
   283  
   284  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
   285  	GO_ARGS
   286  	JMP	sync∕atomic·SwapInt64(SB)
   287  
   288  // Add
   289  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
   290  	GO_ARGS
   291  	MOVQ	$__tsan_go_atomic32_fetch_add(SB), AX
   292  	CALL	racecallatomic<>(SB)
   293  	MOVL	add+8(FP), AX	// convert fetch_add to add_fetch
   294  	ADDL	AX, ret+16(FP)
   295  	RET
   296  
   297  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
   298  	GO_ARGS
   299  	MOVQ	$__tsan_go_atomic64_fetch_add(SB), AX
   300  	CALL	racecallatomic<>(SB)
   301  	MOVQ	add+8(FP), AX	// convert fetch_add to add_fetch
   302  	ADDQ	AX, ret+16(FP)
   303  	RET
   304  
   305  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
   306  	GO_ARGS
   307  	JMP	sync∕atomic·AddInt32(SB)
   308  
   309  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
   310  	GO_ARGS
   311  	JMP	sync∕atomic·AddInt64(SB)
   312  
   313  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
   314  	GO_ARGS
   315  	JMP	sync∕atomic·AddInt64(SB)
   316  
   317  // CompareAndSwap
   318  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
   319  	GO_ARGS
   320  	MOVQ	$__tsan_go_atomic32_compare_exchange(SB), AX
   321  	CALL	racecallatomic<>(SB)
   322  	RET
   323  
   324  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
   325  	GO_ARGS
   326  	MOVQ	$__tsan_go_atomic64_compare_exchange(SB), AX
   327  	CALL	racecallatomic<>(SB)
   328  	RET
   329  
   330  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
   331  	GO_ARGS
   332  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   333  
   334  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
   335  	GO_ARGS
   336  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   337  
   338  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
   339  	GO_ARGS
   340  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   341  
   342  // Generic atomic operation implementation.
   343  // AX already contains target function.
   344  TEXT	racecallatomic<>(SB), NOSPLIT, $0-0
   345  	// Trigger SIGSEGV early.
   346  	MOVQ	16(SP), R12
   347  	MOVL	(R12), R13
   348  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   349  	CMPQ	R12, runtime·racearenastart(SB)
   350  	JB	racecallatomic_data
   351  	CMPQ	R12, runtime·racearenaend(SB)
   352  	JB	racecallatomic_ok
   353  racecallatomic_data:
   354  	CMPQ	R12, runtime·racedatastart(SB)
   355  	JB	racecallatomic_ignore
   356  	CMPQ	R12, runtime·racedataend(SB)
   357  	JAE	racecallatomic_ignore
   358  racecallatomic_ok:
   359  	// Addr is within the good range, call the atomic function.
   360  	get_tls(R12)
   361  	MOVQ	g(R12), R14
   362  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   363  	MOVQ	8(SP), RARG1	// caller pc
   364  	MOVQ	(SP), RARG2	// pc
   365  	LEAQ	16(SP), RARG3	// arguments
   366  	JMP	racecall<>(SB)	// does not return
   367  racecallatomic_ignore:
   368  	// Addr is outside the good range.
   369  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   370  	// An attempt to synchronize on the address would cause crash.
   371  	MOVQ	AX, R15	// remember the original function
   372  	MOVQ	$__tsan_go_ignore_sync_begin(SB), AX
   373  	get_tls(R12)
   374  	MOVQ	g(R12), R14
   375  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   376  	CALL	racecall<>(SB)
   377  	MOVQ	R15, AX	// restore the original function
   378  	// Call the atomic function.
   379  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   380  	MOVQ	8(SP), RARG1	// caller pc
   381  	MOVQ	(SP), RARG2	// pc
   382  	LEAQ	16(SP), RARG3	// arguments
   383  	CALL	racecall<>(SB)
   384  	// Call __tsan_go_ignore_sync_end.
   385  	MOVQ	$__tsan_go_ignore_sync_end(SB), AX
   386  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   387  	JMP	racecall<>(SB)
   388  
   389  // void runtime·racecall(void(*f)(...), ...)
   390  // Calls C function f from race runtime and passes up to 4 arguments to it.
   391  // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   392  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   393  	MOVQ	fn+0(FP), AX
   394  	MOVQ	arg0+8(FP), RARG0
   395  	MOVQ	arg1+16(FP), RARG1
   396  	MOVQ	arg2+24(FP), RARG2
   397  	MOVQ	arg3+32(FP), RARG3
   398  	JMP	racecall<>(SB)
   399  
   400  // Switches SP to g0 stack and calls (AX). Arguments already set.
   401  TEXT	racecall<>(SB), NOSPLIT, $0-0
   402  	get_tls(R12)
   403  	MOVQ	g(R12), R14
   404  	MOVQ	g_m(R14), R13
   405  	// Switch to g0 stack.
   406  	MOVQ	SP, R12		// callee-saved, preserved across the CALL
   407  	MOVQ	m_g0(R13), R10
   408  	CMPQ	R10, R14
   409  	JE	call	// already on g0
   410  	MOVQ	(g_sched+gobuf_sp)(R10), SP
   411  call:
   412  	ANDQ	$~15, SP	// alignment for gcc ABI
   413  	CALL	AX
   414  	MOVQ	R12, SP
   415  	RET
   416  
   417  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   418  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   419  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   420  // RARG0 contains command code. RARG1 contains command-specific context.
   421  // See racecallback for command codes.
   422  TEXT	runtime·racecallbackthunk(SB), NOSPLIT, $56-8
   423  	// Handle command raceGetProcCmd (0) here.
   424  	// First, code below assumes that we are on curg, while raceGetProcCmd
   425  	// can be executed on g0. Second, it is called frequently, so will
   426  	// benefit from this fast path.
   427  	CMPQ	RARG0, $0
   428  	JNE	rest
   429  	get_tls(RARG0)
   430  	MOVQ	g(RARG0), RARG0
   431  	MOVQ	g_m(RARG0), RARG0
   432  	MOVQ	m_p(RARG0), RARG0
   433  	MOVQ	p_raceprocctx(RARG0), RARG0
   434  	MOVQ	RARG0, (RARG1)
   435  	RET
   436  
   437  rest:
   438  	// Save callee-saved registers (Go code won't respect that).
   439  	// This is superset of darwin/linux/windows registers.
   440  	PUSHQ	BX
   441  	PUSHQ	BP
   442  	PUSHQ	DI
   443  	PUSHQ	SI
   444  	PUSHQ	R12
   445  	PUSHQ	R13
   446  	PUSHQ	R14
   447  	PUSHQ	R15
   448  	// Set g = g0.
   449  	get_tls(R12)
   450  	MOVQ	g(R12), R13
   451  	MOVQ	g_m(R13), R14
   452  	MOVQ	m_g0(R14), R15
   453  	CMPQ	R13, R15
   454  	JEQ	noswitch	// branch if already on g0
   455  	MOVQ	R15, g(R12)	// g = m->g0
   456  	PUSHQ	RARG1	// func arg
   457  	PUSHQ	RARG0	// func arg
   458  	CALL	runtime·racecallback(SB)
   459  	POPQ	R12
   460  	POPQ	R12
   461  	// All registers are smashed after Go code, reload.
   462  	get_tls(R12)
   463  	MOVQ	g(R12), R13
   464  	MOVQ	g_m(R13), R13
   465  	MOVQ	m_curg(R13), R14
   466  	MOVQ	R14, g(R12)	// g = m->curg
   467  ret:
   468  	// Restore callee-saved registers.
   469  	POPQ	R15
   470  	POPQ	R14
   471  	POPQ	R13
   472  	POPQ	R12
   473  	POPQ	SI
   474  	POPQ	DI
   475  	POPQ	BP
   476  	POPQ	BX
   477  	RET
   478  
   479  noswitch:
   480  	// already on g0
   481  	PUSHQ	RARG1	// func arg
   482  	PUSHQ	RARG0	// func arg
   483  	CALL	runtime·racecallback(SB)
   484  	POPQ	R12
   485  	POPQ	R12
   486  	JMP	ret