github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/runtime/race_amd64.s (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build race 6 7 #include "go_asm.h" 8 #include "go_tls.h" 9 #include "funcdata.h" 10 #include "textflag.h" 11 12 // The following thunks allow calling the gcc-compiled race runtime directly 13 // from Go code without going all the way through cgo. 14 // First, it's much faster (up to 50% speedup for real Go programs). 15 // Second, it eliminates race-related special cases from cgocall and scheduler. 16 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go. 17 18 // A brief recap of the amd64 calling convention. 19 // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack. 20 // Callee-saved registers are: BX, BP, R12-R15. 21 // SP must be 16-byte aligned. 22 // On Windows: 23 // Arguments are passed in CX, DX, R8, R9, the rest is on stack. 24 // Callee-saved registers are: BX, BP, DI, SI, R12-R15. 25 // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments: 26 // https://msdn.microsoft.com/en-us/library/ms235286.aspx 27 // We do not do this, because it seems to be intended for vararg/unprototyped functions. 28 // Gcc-compiled race runtime does not try to use that space. 29 30 #ifdef GOOS_windows 31 #define RARG0 CX 32 #define RARG1 DX 33 #define RARG2 R8 34 #define RARG3 R9 35 #else 36 #define RARG0 DI 37 #define RARG1 SI 38 #define RARG2 DX 39 #define RARG3 CX 40 #endif 41 42 // func runtime·raceread(addr uintptr) 43 // Called from instrumented code. 44 // Defined as ABIInternal so as to avoid introducing a wrapper, 45 // which would render runtime.getcallerpc ineffective. 46 TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8 47 MOVQ addr+0(FP), RARG1 48 MOVQ (SP), RARG2 49 // void __tsan_read(ThreadState *thr, void *addr, void *pc); 50 MOVQ $__tsan_read(SB), AX 51 JMP racecalladdr<>(SB) 52 53 // func runtime·RaceRead(addr uintptr) 54 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8 55 // This needs to be a tail call, because raceread reads caller pc. 56 JMP runtime·raceread(SB) 57 58 // void runtime·racereadpc(void *addr, void *callpc, void *pc) 59 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 60 MOVQ addr+0(FP), RARG1 61 MOVQ callpc+8(FP), RARG2 62 MOVQ pc+16(FP), RARG3 63 ADDQ $1, RARG3 // pc is function start, tsan wants return address 64 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 65 MOVQ $__tsan_read_pc(SB), AX 66 JMP racecalladdr<>(SB) 67 68 // func runtime·racewrite(addr uintptr) 69 // Called from instrumented code. 70 // Defined as ABIInternal so as to avoid introducing a wrapper, 71 // which would render runtime.getcallerpc ineffective. 72 TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8 73 MOVQ addr+0(FP), RARG1 74 MOVQ (SP), RARG2 75 // void __tsan_write(ThreadState *thr, void *addr, void *pc); 76 MOVQ $__tsan_write(SB), AX 77 JMP racecalladdr<>(SB) 78 79 // func runtime·RaceWrite(addr uintptr) 80 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8 81 // This needs to be a tail call, because racewrite reads caller pc. 82 JMP runtime·racewrite(SB) 83 84 // void runtime·racewritepc(void *addr, void *callpc, void *pc) 85 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 86 MOVQ addr+0(FP), RARG1 87 MOVQ callpc+8(FP), RARG2 88 MOVQ pc+16(FP), RARG3 89 ADDQ $1, RARG3 // pc is function start, tsan wants return address 90 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 91 MOVQ $__tsan_write_pc(SB), AX 92 JMP racecalladdr<>(SB) 93 94 // func runtime·racereadrange(addr, size uintptr) 95 // Called from instrumented code. 96 TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 97 MOVQ addr+0(FP), RARG1 98 MOVQ size+8(FP), RARG2 99 MOVQ (SP), RARG3 100 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 101 MOVQ $__tsan_read_range(SB), AX 102 JMP racecalladdr<>(SB) 103 104 // func runtime·RaceReadRange(addr, size uintptr) 105 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16 106 // This needs to be a tail call, because racereadrange reads caller pc. 107 JMP runtime·racereadrange(SB) 108 109 // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc) 110 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 111 MOVQ addr+0(FP), RARG1 112 MOVQ size+8(FP), RARG2 113 MOVQ pc+16(FP), RARG3 114 ADDQ $1, RARG3 // pc is function start, tsan wants return address 115 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 116 MOVQ $__tsan_read_range(SB), AX 117 JMP racecalladdr<>(SB) 118 119 // func runtime·racewriterange(addr, size uintptr) 120 // Called from instrumented code. 121 // Defined as ABIInternal so as to avoid introducing a wrapper, 122 // which would render runtime.getcallerpc ineffective. 123 TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16 124 MOVQ addr+0(FP), RARG1 125 MOVQ size+8(FP), RARG2 126 MOVQ (SP), RARG3 127 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 128 MOVQ $__tsan_write_range(SB), AX 129 JMP racecalladdr<>(SB) 130 131 // func runtime·RaceWriteRange(addr, size uintptr) 132 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16 133 // This needs to be a tail call, because racewriterange reads caller pc. 134 JMP runtime·racewriterange(SB) 135 136 // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc) 137 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24 138 MOVQ addr+0(FP), RARG1 139 MOVQ size+8(FP), RARG2 140 MOVQ pc+16(FP), RARG3 141 ADDQ $1, RARG3 // pc is function start, tsan wants return address 142 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 143 MOVQ $__tsan_write_range(SB), AX 144 JMP racecalladdr<>(SB) 145 146 // If addr (RARG1) is out of range, do nothing. 147 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set. 148 TEXT racecalladdr<>(SB), NOSPLIT, $0-0 149 get_tls(R12) 150 MOVQ g(R12), R14 151 MOVQ g_racectx(R14), RARG0 // goroutine context 152 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 153 CMPQ RARG1, runtime·racearenastart(SB) 154 JB data 155 CMPQ RARG1, runtime·racearenaend(SB) 156 JB call 157 data: 158 CMPQ RARG1, runtime·racedatastart(SB) 159 JB ret 160 CMPQ RARG1, runtime·racedataend(SB) 161 JAE ret 162 call: 163 MOVQ AX, AX // w/o this 6a miscompiles this function 164 JMP racecall<>(SB) 165 ret: 166 RET 167 168 // func runtime·racefuncenterfp(fp uintptr) 169 // Called from instrumented code. 170 // Like racefuncenter but passes FP, not PC 171 TEXT runtime·racefuncenterfp(SB), NOSPLIT, $0-8 172 MOVQ fp+0(FP), R11 173 MOVQ -8(R11), R11 174 JMP racefuncenter<>(SB) 175 176 // func runtime·racefuncenter(pc uintptr) 177 // Called from instrumented code. 178 TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8 179 MOVQ callpc+0(FP), R11 180 JMP racefuncenter<>(SB) 181 182 // Common code for racefuncenter/racefuncenterfp 183 // R11 = caller's return address 184 TEXT racefuncenter<>(SB), NOSPLIT, $0-0 185 MOVQ DX, R15 // save function entry context (for closures) 186 get_tls(R12) 187 MOVQ g(R12), R14 188 MOVQ g_racectx(R14), RARG0 // goroutine context 189 MOVQ R11, RARG1 190 // void __tsan_func_enter(ThreadState *thr, void *pc); 191 MOVQ $__tsan_func_enter(SB), AX 192 // racecall<> preserves R15 193 CALL racecall<>(SB) 194 MOVQ R15, DX // restore function entry context 195 RET 196 197 // func runtime·racefuncexit() 198 // Called from instrumented code. 199 TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0 200 get_tls(R12) 201 MOVQ g(R12), R14 202 MOVQ g_racectx(R14), RARG0 // goroutine context 203 // void __tsan_func_exit(ThreadState *thr); 204 MOVQ $__tsan_func_exit(SB), AX 205 JMP racecall<>(SB) 206 207 // Atomic operations for sync/atomic package. 208 209 // Load 210 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12 211 GO_ARGS 212 MOVQ $__tsan_go_atomic32_load(SB), AX 213 CALL racecallatomic<>(SB) 214 RET 215 216 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16 217 GO_ARGS 218 MOVQ $__tsan_go_atomic64_load(SB), AX 219 CALL racecallatomic<>(SB) 220 RET 221 222 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12 223 GO_ARGS 224 JMP sync∕atomic·LoadInt32(SB) 225 226 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16 227 GO_ARGS 228 JMP sync∕atomic·LoadInt64(SB) 229 230 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16 231 GO_ARGS 232 JMP sync∕atomic·LoadInt64(SB) 233 234 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16 235 GO_ARGS 236 JMP sync∕atomic·LoadInt64(SB) 237 238 // Store 239 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12 240 GO_ARGS 241 MOVQ $__tsan_go_atomic32_store(SB), AX 242 CALL racecallatomic<>(SB) 243 RET 244 245 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16 246 GO_ARGS 247 MOVQ $__tsan_go_atomic64_store(SB), AX 248 CALL racecallatomic<>(SB) 249 RET 250 251 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12 252 GO_ARGS 253 JMP sync∕atomic·StoreInt32(SB) 254 255 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16 256 GO_ARGS 257 JMP sync∕atomic·StoreInt64(SB) 258 259 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16 260 GO_ARGS 261 JMP sync∕atomic·StoreInt64(SB) 262 263 // Swap 264 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20 265 GO_ARGS 266 MOVQ $__tsan_go_atomic32_exchange(SB), AX 267 CALL racecallatomic<>(SB) 268 RET 269 270 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24 271 GO_ARGS 272 MOVQ $__tsan_go_atomic64_exchange(SB), AX 273 CALL racecallatomic<>(SB) 274 RET 275 276 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20 277 GO_ARGS 278 JMP sync∕atomic·SwapInt32(SB) 279 280 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24 281 GO_ARGS 282 JMP sync∕atomic·SwapInt64(SB) 283 284 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24 285 GO_ARGS 286 JMP sync∕atomic·SwapInt64(SB) 287 288 // Add 289 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20 290 GO_ARGS 291 MOVQ $__tsan_go_atomic32_fetch_add(SB), AX 292 CALL racecallatomic<>(SB) 293 MOVL add+8(FP), AX // convert fetch_add to add_fetch 294 ADDL AX, ret+16(FP) 295 RET 296 297 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24 298 GO_ARGS 299 MOVQ $__tsan_go_atomic64_fetch_add(SB), AX 300 CALL racecallatomic<>(SB) 301 MOVQ add+8(FP), AX // convert fetch_add to add_fetch 302 ADDQ AX, ret+16(FP) 303 RET 304 305 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20 306 GO_ARGS 307 JMP sync∕atomic·AddInt32(SB) 308 309 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24 310 GO_ARGS 311 JMP sync∕atomic·AddInt64(SB) 312 313 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24 314 GO_ARGS 315 JMP sync∕atomic·AddInt64(SB) 316 317 // CompareAndSwap 318 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17 319 GO_ARGS 320 MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX 321 CALL racecallatomic<>(SB) 322 RET 323 324 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25 325 GO_ARGS 326 MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX 327 CALL racecallatomic<>(SB) 328 RET 329 330 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17 331 GO_ARGS 332 JMP sync∕atomic·CompareAndSwapInt32(SB) 333 334 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25 335 GO_ARGS 336 JMP sync∕atomic·CompareAndSwapInt64(SB) 337 338 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25 339 GO_ARGS 340 JMP sync∕atomic·CompareAndSwapInt64(SB) 341 342 // Generic atomic operation implementation. 343 // AX already contains target function. 344 TEXT racecallatomic<>(SB), NOSPLIT, $0-0 345 // Trigger SIGSEGV early. 346 MOVQ 16(SP), R12 347 MOVL (R12), R13 348 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 349 CMPQ R12, runtime·racearenastart(SB) 350 JB racecallatomic_data 351 CMPQ R12, runtime·racearenaend(SB) 352 JB racecallatomic_ok 353 racecallatomic_data: 354 CMPQ R12, runtime·racedatastart(SB) 355 JB racecallatomic_ignore 356 CMPQ R12, runtime·racedataend(SB) 357 JAE racecallatomic_ignore 358 racecallatomic_ok: 359 // Addr is within the good range, call the atomic function. 360 get_tls(R12) 361 MOVQ g(R12), R14 362 MOVQ g_racectx(R14), RARG0 // goroutine context 363 MOVQ 8(SP), RARG1 // caller pc 364 MOVQ (SP), RARG2 // pc 365 LEAQ 16(SP), RARG3 // arguments 366 JMP racecall<>(SB) // does not return 367 racecallatomic_ignore: 368 // Addr is outside the good range. 369 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. 370 // An attempt to synchronize on the address would cause crash. 371 MOVQ AX, R15 // remember the original function 372 MOVQ $__tsan_go_ignore_sync_begin(SB), AX 373 get_tls(R12) 374 MOVQ g(R12), R14 375 MOVQ g_racectx(R14), RARG0 // goroutine context 376 CALL racecall<>(SB) 377 MOVQ R15, AX // restore the original function 378 // Call the atomic function. 379 MOVQ g_racectx(R14), RARG0 // goroutine context 380 MOVQ 8(SP), RARG1 // caller pc 381 MOVQ (SP), RARG2 // pc 382 LEAQ 16(SP), RARG3 // arguments 383 CALL racecall<>(SB) 384 // Call __tsan_go_ignore_sync_end. 385 MOVQ $__tsan_go_ignore_sync_end(SB), AX 386 MOVQ g_racectx(R14), RARG0 // goroutine context 387 JMP racecall<>(SB) 388 389 // void runtime·racecall(void(*f)(...), ...) 390 // Calls C function f from race runtime and passes up to 4 arguments to it. 391 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments. 392 TEXT runtime·racecall(SB), NOSPLIT, $0-0 393 MOVQ fn+0(FP), AX 394 MOVQ arg0+8(FP), RARG0 395 MOVQ arg1+16(FP), RARG1 396 MOVQ arg2+24(FP), RARG2 397 MOVQ arg3+32(FP), RARG3 398 JMP racecall<>(SB) 399 400 // Switches SP to g0 stack and calls (AX). Arguments already set. 401 TEXT racecall<>(SB), NOSPLIT, $0-0 402 get_tls(R12) 403 MOVQ g(R12), R14 404 MOVQ g_m(R14), R13 405 // Switch to g0 stack. 406 MOVQ SP, R12 // callee-saved, preserved across the CALL 407 MOVQ m_g0(R13), R10 408 CMPQ R10, R14 409 JE call // already on g0 410 MOVQ (g_sched+gobuf_sp)(R10), SP 411 call: 412 ANDQ $~15, SP // alignment for gcc ABI 413 CALL AX 414 MOVQ R12, SP 415 RET 416 417 // C->Go callback thunk that allows to call runtime·racesymbolize from C code. 418 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g. 419 // The overall effect of Go->C->Go call chain is similar to that of mcall. 420 // RARG0 contains command code. RARG1 contains command-specific context. 421 // See racecallback for command codes. 422 TEXT runtime·racecallbackthunk(SB), NOSPLIT, $56-8 423 // Handle command raceGetProcCmd (0) here. 424 // First, code below assumes that we are on curg, while raceGetProcCmd 425 // can be executed on g0. Second, it is called frequently, so will 426 // benefit from this fast path. 427 CMPQ RARG0, $0 428 JNE rest 429 get_tls(RARG0) 430 MOVQ g(RARG0), RARG0 431 MOVQ g_m(RARG0), RARG0 432 MOVQ m_p(RARG0), RARG0 433 MOVQ p_raceprocctx(RARG0), RARG0 434 MOVQ RARG0, (RARG1) 435 RET 436 437 rest: 438 // Save callee-saved registers (Go code won't respect that). 439 // This is superset of darwin/linux/windows registers. 440 PUSHQ BX 441 PUSHQ BP 442 PUSHQ DI 443 PUSHQ SI 444 PUSHQ R12 445 PUSHQ R13 446 PUSHQ R14 447 PUSHQ R15 448 // Set g = g0. 449 get_tls(R12) 450 MOVQ g(R12), R13 451 MOVQ g_m(R13), R14 452 MOVQ m_g0(R14), R15 453 CMPQ R13, R15 454 JEQ noswitch // branch if already on g0 455 MOVQ R15, g(R12) // g = m->g0 456 PUSHQ RARG1 // func arg 457 PUSHQ RARG0 // func arg 458 CALL runtime·racecallback(SB) 459 POPQ R12 460 POPQ R12 461 // All registers are smashed after Go code, reload. 462 get_tls(R12) 463 MOVQ g(R12), R13 464 MOVQ g_m(R13), R13 465 MOVQ m_curg(R13), R14 466 MOVQ R14, g(R12) // g = m->curg 467 ret: 468 // Restore callee-saved registers. 469 POPQ R15 470 POPQ R14 471 POPQ R13 472 POPQ R12 473 POPQ SI 474 POPQ DI 475 POPQ BP 476 POPQ BX 477 RET 478 479 noswitch: 480 // already on g0 481 PUSHQ RARG1 // func arg 482 PUSHQ RARG0 // func arg 483 CALL runtime·racecallback(SB) 484 POPQ R12 485 POPQ R12 486 JMP ret