github.com/m10x/go/src@v0.0.0-20220112094212-ba61592315da/runtime/race_amd64.s (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build race 6 7 #include "go_asm.h" 8 #include "go_tls.h" 9 #include "funcdata.h" 10 #include "textflag.h" 11 #include "cgo/abi_amd64.h" 12 13 // The following thunks allow calling the gcc-compiled race runtime directly 14 // from Go code without going all the way through cgo. 15 // First, it's much faster (up to 50% speedup for real Go programs). 16 // Second, it eliminates race-related special cases from cgocall and scheduler. 17 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go. 18 19 // A brief recap of the amd64 calling convention. 20 // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack. 21 // Callee-saved registers are: BX, BP, R12-R15. 22 // SP must be 16-byte aligned. 23 // On Windows: 24 // Arguments are passed in CX, DX, R8, R9, the rest is on stack. 25 // Callee-saved registers are: BX, BP, DI, SI, R12-R15. 26 // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments: 27 // https://msdn.microsoft.com/en-us/library/ms235286.aspx 28 // We do not do this, because it seems to be intended for vararg/unprototyped functions. 29 // Gcc-compiled race runtime does not try to use that space. 30 31 #ifdef GOOS_windows 32 #define RARG0 CX 33 #define RARG1 DX 34 #define RARG2 R8 35 #define RARG3 R9 36 #else 37 #define RARG0 DI 38 #define RARG1 SI 39 #define RARG2 DX 40 #define RARG3 CX 41 #endif 42 43 // func runtime·raceread(addr uintptr) 44 // Called from instrumented code. 45 // Defined as ABIInternal so as to avoid introducing a wrapper, 46 // which would render runtime.getcallerpc ineffective. 47 TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8 48 MOVQ AX, RARG1 49 MOVQ (SP), RARG2 50 // void __tsan_read(ThreadState *thr, void *addr, void *pc); 51 MOVQ $__tsan_read(SB), AX 52 JMP racecalladdr<>(SB) 53 54 // func runtime·RaceRead(addr uintptr) 55 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8 56 // This needs to be a tail call, because raceread reads caller pc. 57 JMP runtime·raceread(SB) 58 59 // void runtime·racereadpc(void *addr, void *callpc, void *pc) 60 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 61 MOVQ addr+0(FP), RARG1 62 MOVQ callpc+8(FP), RARG2 63 MOVQ pc+16(FP), RARG3 64 ADDQ $1, RARG3 // pc is function start, tsan wants return address 65 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 66 MOVQ $__tsan_read_pc(SB), AX 67 JMP racecalladdr<>(SB) 68 69 // func runtime·racewrite(addr uintptr) 70 // Called from instrumented code. 71 // Defined as ABIInternal so as to avoid introducing a wrapper, 72 // which would render runtime.getcallerpc ineffective. 73 TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8 74 MOVQ AX, RARG1 75 MOVQ (SP), RARG2 76 // void __tsan_write(ThreadState *thr, void *addr, void *pc); 77 MOVQ $__tsan_write(SB), AX 78 JMP racecalladdr<>(SB) 79 80 // func runtime·RaceWrite(addr uintptr) 81 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8 82 // This needs to be a tail call, because racewrite reads caller pc. 83 JMP runtime·racewrite(SB) 84 85 // void runtime·racewritepc(void *addr, void *callpc, void *pc) 86 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 87 MOVQ addr+0(FP), RARG1 88 MOVQ callpc+8(FP), RARG2 89 MOVQ pc+16(FP), RARG3 90 ADDQ $1, RARG3 // pc is function start, tsan wants return address 91 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 92 MOVQ $__tsan_write_pc(SB), AX 93 JMP racecalladdr<>(SB) 94 95 // func runtime·racereadrange(addr, size uintptr) 96 // Called from instrumented code. 97 TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 98 MOVQ addr+0(FP), RARG1 99 MOVQ size+8(FP), RARG2 100 MOVQ (SP), RARG3 101 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 102 MOVQ $__tsan_read_range(SB), AX 103 JMP racecalladdr<>(SB) 104 105 // func runtime·RaceReadRange(addr, size uintptr) 106 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16 107 // This needs to be a tail call, because racereadrange reads caller pc. 108 JMP runtime·racereadrange(SB) 109 110 // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc) 111 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 112 MOVQ addr+0(FP), RARG1 113 MOVQ size+8(FP), RARG2 114 MOVQ pc+16(FP), RARG3 115 ADDQ $1, RARG3 // pc is function start, tsan wants return address 116 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 117 MOVQ $__tsan_read_range(SB), AX 118 JMP racecalladdr<>(SB) 119 120 // func runtime·racewriterange(addr, size uintptr) 121 // Called from instrumented code. 122 // Defined as ABIInternal so as to avoid introducing a wrapper, 123 // which would render runtime.getcallerpc ineffective. 124 TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16 125 MOVQ AX, RARG1 126 MOVQ BX, RARG2 127 MOVQ (SP), RARG3 128 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 129 MOVQ $__tsan_write_range(SB), AX 130 JMP racecalladdr<>(SB) 131 132 // func runtime·RaceWriteRange(addr, size uintptr) 133 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16 134 // This needs to be a tail call, because racewriterange reads caller pc. 135 JMP runtime·racewriterange(SB) 136 137 // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc) 138 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24 139 MOVQ addr+0(FP), RARG1 140 MOVQ size+8(FP), RARG2 141 MOVQ pc+16(FP), RARG3 142 ADDQ $1, RARG3 // pc is function start, tsan wants return address 143 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 144 MOVQ $__tsan_write_range(SB), AX 145 JMP racecalladdr<>(SB) 146 147 // If addr (RARG1) is out of range, do nothing. 148 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set. 149 TEXT racecalladdr<>(SB), NOSPLIT, $0-0 150 MOVQ g_racectx(R14), RARG0 // goroutine context 151 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 152 CMPQ RARG1, runtime·racearenastart(SB) 153 JB data 154 CMPQ RARG1, runtime·racearenaend(SB) 155 JB call 156 data: 157 CMPQ RARG1, runtime·racedatastart(SB) 158 JB ret 159 CMPQ RARG1, runtime·racedataend(SB) 160 JAE ret 161 call: 162 MOVQ AX, AX // w/o this 6a miscompiles this function 163 JMP racecall<>(SB) 164 ret: 165 RET 166 167 // func runtime·racefuncenter(pc uintptr) 168 // Called from instrumented code. 169 TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8 170 MOVQ callpc+0(FP), R11 171 JMP racefuncenter<>(SB) 172 173 // Common code for racefuncenter 174 // R11 = caller's return address 175 TEXT racefuncenter<>(SB), NOSPLIT, $0-0 176 MOVQ DX, BX // save function entry context (for closures) 177 MOVQ g_racectx(R14), RARG0 // goroutine context 178 MOVQ R11, RARG1 179 // void __tsan_func_enter(ThreadState *thr, void *pc); 180 MOVQ $__tsan_func_enter(SB), AX 181 // racecall<> preserves BX 182 CALL racecall<>(SB) 183 MOVQ BX, DX // restore function entry context 184 RET 185 186 // func runtime·racefuncexit() 187 // Called from instrumented code. 188 TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0 189 MOVQ g_racectx(R14), RARG0 // goroutine context 190 // void __tsan_func_exit(ThreadState *thr); 191 MOVQ $__tsan_func_exit(SB), AX 192 JMP racecall<>(SB) 193 194 // Atomic operations for sync/atomic package. 195 196 // Load 197 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12 198 GO_ARGS 199 MOVQ $__tsan_go_atomic32_load(SB), AX 200 CALL racecallatomic<>(SB) 201 RET 202 203 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16 204 GO_ARGS 205 MOVQ $__tsan_go_atomic64_load(SB), AX 206 CALL racecallatomic<>(SB) 207 RET 208 209 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12 210 GO_ARGS 211 JMP sync∕atomic·LoadInt32(SB) 212 213 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16 214 GO_ARGS 215 JMP sync∕atomic·LoadInt64(SB) 216 217 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16 218 GO_ARGS 219 JMP sync∕atomic·LoadInt64(SB) 220 221 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16 222 GO_ARGS 223 JMP sync∕atomic·LoadInt64(SB) 224 225 // Store 226 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12 227 GO_ARGS 228 MOVQ $__tsan_go_atomic32_store(SB), AX 229 CALL racecallatomic<>(SB) 230 RET 231 232 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16 233 GO_ARGS 234 MOVQ $__tsan_go_atomic64_store(SB), AX 235 CALL racecallatomic<>(SB) 236 RET 237 238 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12 239 GO_ARGS 240 JMP sync∕atomic·StoreInt32(SB) 241 242 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16 243 GO_ARGS 244 JMP sync∕atomic·StoreInt64(SB) 245 246 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16 247 GO_ARGS 248 JMP sync∕atomic·StoreInt64(SB) 249 250 // Swap 251 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20 252 GO_ARGS 253 MOVQ $__tsan_go_atomic32_exchange(SB), AX 254 CALL racecallatomic<>(SB) 255 RET 256 257 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24 258 GO_ARGS 259 MOVQ $__tsan_go_atomic64_exchange(SB), AX 260 CALL racecallatomic<>(SB) 261 RET 262 263 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20 264 GO_ARGS 265 JMP sync∕atomic·SwapInt32(SB) 266 267 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24 268 GO_ARGS 269 JMP sync∕atomic·SwapInt64(SB) 270 271 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24 272 GO_ARGS 273 JMP sync∕atomic·SwapInt64(SB) 274 275 // Add 276 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20 277 GO_ARGS 278 MOVQ $__tsan_go_atomic32_fetch_add(SB), AX 279 CALL racecallatomic<>(SB) 280 MOVL add+8(FP), AX // convert fetch_add to add_fetch 281 ADDL AX, ret+16(FP) 282 RET 283 284 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24 285 GO_ARGS 286 MOVQ $__tsan_go_atomic64_fetch_add(SB), AX 287 CALL racecallatomic<>(SB) 288 MOVQ add+8(FP), AX // convert fetch_add to add_fetch 289 ADDQ AX, ret+16(FP) 290 RET 291 292 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20 293 GO_ARGS 294 JMP sync∕atomic·AddInt32(SB) 295 296 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24 297 GO_ARGS 298 JMP sync∕atomic·AddInt64(SB) 299 300 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24 301 GO_ARGS 302 JMP sync∕atomic·AddInt64(SB) 303 304 // CompareAndSwap 305 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17 306 GO_ARGS 307 MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX 308 CALL racecallatomic<>(SB) 309 RET 310 311 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25 312 GO_ARGS 313 MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX 314 CALL racecallatomic<>(SB) 315 RET 316 317 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17 318 GO_ARGS 319 JMP sync∕atomic·CompareAndSwapInt32(SB) 320 321 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25 322 GO_ARGS 323 JMP sync∕atomic·CompareAndSwapInt64(SB) 324 325 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25 326 GO_ARGS 327 JMP sync∕atomic·CompareAndSwapInt64(SB) 328 329 // Generic atomic operation implementation. 330 // AX already contains target function. 331 TEXT racecallatomic<>(SB), NOSPLIT, $0-0 332 // Trigger SIGSEGV early. 333 MOVQ 16(SP), R12 334 MOVL (R12), R13 335 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 336 CMPQ R12, runtime·racearenastart(SB) 337 JB racecallatomic_data 338 CMPQ R12, runtime·racearenaend(SB) 339 JB racecallatomic_ok 340 racecallatomic_data: 341 CMPQ R12, runtime·racedatastart(SB) 342 JB racecallatomic_ignore 343 CMPQ R12, runtime·racedataend(SB) 344 JAE racecallatomic_ignore 345 racecallatomic_ok: 346 // Addr is within the good range, call the atomic function. 347 MOVQ g_racectx(R14), RARG0 // goroutine context 348 MOVQ 8(SP), RARG1 // caller pc 349 MOVQ (SP), RARG2 // pc 350 LEAQ 16(SP), RARG3 // arguments 351 JMP racecall<>(SB) // does not return 352 racecallatomic_ignore: 353 // Addr is outside the good range. 354 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. 355 // An attempt to synchronize on the address would cause crash. 356 MOVQ AX, BX // remember the original function 357 MOVQ $__tsan_go_ignore_sync_begin(SB), AX 358 MOVQ g_racectx(R14), RARG0 // goroutine context 359 CALL racecall<>(SB) 360 MOVQ BX, AX // restore the original function 361 // Call the atomic function. 362 MOVQ g_racectx(R14), RARG0 // goroutine context 363 MOVQ 8(SP), RARG1 // caller pc 364 MOVQ (SP), RARG2 // pc 365 LEAQ 16(SP), RARG3 // arguments 366 CALL racecall<>(SB) 367 // Call __tsan_go_ignore_sync_end. 368 MOVQ $__tsan_go_ignore_sync_end(SB), AX 369 MOVQ g_racectx(R14), RARG0 // goroutine context 370 JMP racecall<>(SB) 371 372 // void runtime·racecall(void(*f)(...), ...) 373 // Calls C function f from race runtime and passes up to 4 arguments to it. 374 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments. 375 TEXT runtime·racecall(SB), NOSPLIT, $0-0 376 MOVQ fn+0(FP), AX 377 MOVQ arg0+8(FP), RARG0 378 MOVQ arg1+16(FP), RARG1 379 MOVQ arg2+24(FP), RARG2 380 MOVQ arg3+32(FP), RARG3 381 JMP racecall<>(SB) 382 383 // Switches SP to g0 stack and calls (AX). Arguments already set. 384 TEXT racecall<>(SB), NOSPLIT, $0-0 385 MOVQ g_m(R14), R13 386 // Switch to g0 stack. 387 MOVQ SP, R12 // callee-saved, preserved across the CALL 388 MOVQ m_g0(R13), R10 389 CMPQ R10, R14 390 JE call // already on g0 391 MOVQ (g_sched+gobuf_sp)(R10), SP 392 call: 393 ANDQ $~15, SP // alignment for gcc ABI 394 CALL AX 395 MOVQ R12, SP 396 // Back to Go world, set special registers. 397 // The g register (R14) is preserved in C. 398 XORPS X15, X15 399 RET 400 401 // C->Go callback thunk that allows to call runtime·racesymbolize from C code. 402 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g. 403 // The overall effect of Go->C->Go call chain is similar to that of mcall. 404 // RARG0 contains command code. RARG1 contains command-specific context. 405 // See racecallback for command codes. 406 TEXT runtime·racecallbackthunk(SB), NOSPLIT, $0-0 407 // Handle command raceGetProcCmd (0) here. 408 // First, code below assumes that we are on curg, while raceGetProcCmd 409 // can be executed on g0. Second, it is called frequently, so will 410 // benefit from this fast path. 411 CMPQ RARG0, $0 412 JNE rest 413 get_tls(RARG0) 414 MOVQ g(RARG0), RARG0 415 MOVQ g_m(RARG0), RARG0 416 MOVQ m_p(RARG0), RARG0 417 MOVQ p_raceprocctx(RARG0), RARG0 418 MOVQ RARG0, (RARG1) 419 RET 420 421 rest: 422 // Transition from C ABI to Go ABI. 423 PUSH_REGS_HOST_TO_ABI0() 424 // Set g = g0. 425 get_tls(R12) 426 MOVQ g(R12), R14 427 MOVQ g_m(R14), R13 428 MOVQ m_g0(R13), R15 429 CMPQ R13, R15 430 JEQ noswitch // branch if already on g0 431 MOVQ R15, g(R12) // g = m->g0 432 MOVQ R15, R14 // set g register 433 PUSHQ RARG1 // func arg 434 PUSHQ RARG0 // func arg 435 CALL runtime·racecallback(SB) 436 POPQ R12 437 POPQ R12 438 // All registers are smashed after Go code, reload. 439 get_tls(R12) 440 MOVQ g(R12), R13 441 MOVQ g_m(R13), R13 442 MOVQ m_curg(R13), R14 443 MOVQ R14, g(R12) // g = m->curg 444 ret: 445 POP_REGS_HOST_TO_ABI0() 446 RET 447 448 noswitch: 449 // already on g0 450 PUSHQ RARG1 // func arg 451 PUSHQ RARG0 // func arg 452 CALL runtime·racecallback(SB) 453 POPQ R12 454 POPQ R12 455 JMP ret