github.com/x04/go/src@v0.0.0-20200202162449-3d481ceb3525/runtime/race_amd64.s (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build race 6 7 #include "go_asm.h" 8 #include "go_tls.h" 9 #include "funcdata.h" 10 #include "textflag.h" 11 12 // The following thunks allow calling the gcc-compiled race runtime directly 13 // from Go code without going all the way through cgo. 14 // First, it's much faster (up to 50% speedup for real Go programs). 15 // Second, it eliminates race-related special cases from cgocall and scheduler. 16 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go. 17 18 // A brief recap of the amd64 calling convention. 19 // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack. 20 // Callee-saved registers are: BX, BP, R12-R15. 21 // SP must be 16-byte aligned. 22 // On Windows: 23 // Arguments are passed in CX, DX, R8, R9, the rest is on stack. 24 // Callee-saved registers are: BX, BP, DI, SI, R12-R15. 25 // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments: 26 // https://msdn.microsoft.com/en-us/library/ms235286.aspx 27 // We do not do this, because it seems to be intended for vararg/unprototyped functions. 28 // Gcc-compiled race runtime does not try to use that space. 29 30 #ifdef GOOS_windows 31 #define RARG0 CX 32 #define RARG1 DX 33 #define RARG2 R8 34 #define RARG3 R9 35 #else 36 #define RARG0 DI 37 #define RARG1 SI 38 #define RARG2 DX 39 #define RARG3 CX 40 #endif 41 42 // func runtime·raceread(addr uintptr) 43 // Called from instrumented code. 44 TEXT runtime·raceread(SB), NOSPLIT, $0-8 45 MOVQ addr+0(FP), RARG1 46 MOVQ (SP), RARG2 47 // void __tsan_read(ThreadState *thr, void *addr, void *pc); 48 MOVQ $__tsan_read(SB), AX 49 JMP racecalladdr<>(SB) 50 51 // func runtime·RaceRead(addr uintptr) 52 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8 53 // This needs to be a tail call, because raceread reads caller pc. 54 JMP runtime·raceread(SB) 55 56 // void runtime·racereadpc(void *addr, void *callpc, void *pc) 57 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 58 MOVQ addr+0(FP), RARG1 59 MOVQ callpc+8(FP), RARG2 60 MOVQ pc+16(FP), RARG3 61 ADDQ $1, RARG3 // pc is function start, tsan wants return address 62 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 63 MOVQ $__tsan_read_pc(SB), AX 64 JMP racecalladdr<>(SB) 65 66 // func runtime·racewrite(addr uintptr) 67 // Called from instrumented code. 68 TEXT runtime·racewrite(SB), NOSPLIT, $0-8 69 MOVQ addr+0(FP), RARG1 70 MOVQ (SP), RARG2 71 // void __tsan_write(ThreadState *thr, void *addr, void *pc); 72 MOVQ $__tsan_write(SB), AX 73 JMP racecalladdr<>(SB) 74 75 // func runtime·RaceWrite(addr uintptr) 76 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8 77 // This needs to be a tail call, because racewrite reads caller pc. 78 JMP runtime·racewrite(SB) 79 80 // void runtime·racewritepc(void *addr, void *callpc, void *pc) 81 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 82 MOVQ addr+0(FP), RARG1 83 MOVQ callpc+8(FP), RARG2 84 MOVQ pc+16(FP), RARG3 85 ADDQ $1, RARG3 // pc is function start, tsan wants return address 86 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 87 MOVQ $__tsan_write_pc(SB), AX 88 JMP racecalladdr<>(SB) 89 90 // func runtime·racereadrange(addr, size uintptr) 91 // Called from instrumented code. 92 TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 93 MOVQ addr+0(FP), RARG1 94 MOVQ size+8(FP), RARG2 95 MOVQ (SP), RARG3 96 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 97 MOVQ $__tsan_read_range(SB), AX 98 JMP racecalladdr<>(SB) 99 100 // func runtime·RaceReadRange(addr, size uintptr) 101 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16 102 // This needs to be a tail call, because racereadrange reads caller pc. 103 JMP runtime·racereadrange(SB) 104 105 // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc) 106 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 107 MOVQ addr+0(FP), RARG1 108 MOVQ size+8(FP), RARG2 109 MOVQ pc+16(FP), RARG3 110 ADDQ $1, RARG3 // pc is function start, tsan wants return address 111 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 112 MOVQ $__tsan_read_range(SB), AX 113 JMP racecalladdr<>(SB) 114 115 // func runtime·racewriterange(addr, size uintptr) 116 // Called from instrumented code. 117 TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 118 MOVQ addr+0(FP), RARG1 119 MOVQ size+8(FP), RARG2 120 MOVQ (SP), RARG3 121 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 122 MOVQ $__tsan_write_range(SB), AX 123 JMP racecalladdr<>(SB) 124 125 // func runtime·RaceWriteRange(addr, size uintptr) 126 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16 127 // This needs to be a tail call, because racewriterange reads caller pc. 128 JMP runtime·racewriterange(SB) 129 130 // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc) 131 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24 132 MOVQ addr+0(FP), RARG1 133 MOVQ size+8(FP), RARG2 134 MOVQ pc+16(FP), RARG3 135 ADDQ $1, RARG3 // pc is function start, tsan wants return address 136 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 137 MOVQ $__tsan_write_range(SB), AX 138 JMP racecalladdr<>(SB) 139 140 // If addr (RARG1) is out of range, do nothing. 141 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set. 142 TEXT racecalladdr<>(SB), NOSPLIT, $0-0 143 get_tls(R12) 144 MOVQ g(R12), R14 145 MOVQ g_racectx(R14), RARG0 // goroutine context 146 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 147 CMPQ RARG1, runtime·racearenastart(SB) 148 JB data 149 CMPQ RARG1, runtime·racearenaend(SB) 150 JB call 151 data: 152 CMPQ RARG1, runtime·racedatastart(SB) 153 JB ret 154 CMPQ RARG1, runtime·racedataend(SB) 155 JAE ret 156 call: 157 MOVQ AX, AX // w/o this 6a miscompiles this function 158 JMP racecall<>(SB) 159 ret: 160 RET 161 162 // func runtime·racefuncenterfp(fp uintptr) 163 // Called from instrumented code. 164 // Like racefuncenter but passes FP, not PC 165 TEXT runtime·racefuncenterfp(SB), NOSPLIT, $0-8 166 MOVQ fp+0(FP), R11 167 MOVQ -8(R11), R11 168 JMP racefuncenter<>(SB) 169 170 // func runtime·racefuncenter(pc uintptr) 171 // Called from instrumented code. 172 TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8 173 MOVQ callpc+0(FP), R11 174 JMP racefuncenter<>(SB) 175 176 // Common code for racefuncenter/racefuncenterfp 177 // R11 = caller's return address 178 TEXT racefuncenter<>(SB), NOSPLIT, $0-0 179 MOVQ DX, R15 // save function entry context (for closures) 180 get_tls(R12) 181 MOVQ g(R12), R14 182 MOVQ g_racectx(R14), RARG0 // goroutine context 183 MOVQ R11, RARG1 184 // void __tsan_func_enter(ThreadState *thr, void *pc); 185 MOVQ $__tsan_func_enter(SB), AX 186 // racecall<> preserves R15 187 CALL racecall<>(SB) 188 MOVQ R15, DX // restore function entry context 189 RET 190 191 // func runtime·racefuncexit() 192 // Called from instrumented code. 193 TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0 194 get_tls(R12) 195 MOVQ g(R12), R14 196 MOVQ g_racectx(R14), RARG0 // goroutine context 197 // void __tsan_func_exit(ThreadState *thr); 198 MOVQ $__tsan_func_exit(SB), AX 199 JMP racecall<>(SB) 200 201 // Atomic operations for sync/atomic package. 202 203 // Load 204 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0 205 MOVQ $__tsan_go_atomic32_load(SB), AX 206 CALL racecallatomic<>(SB) 207 RET 208 209 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0 210 MOVQ $__tsan_go_atomic64_load(SB), AX 211 CALL racecallatomic<>(SB) 212 RET 213 214 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0 215 JMP sync∕atomic·LoadInt32(SB) 216 217 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0 218 JMP sync∕atomic·LoadInt64(SB) 219 220 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0 221 JMP sync∕atomic·LoadInt64(SB) 222 223 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0 224 JMP sync∕atomic·LoadInt64(SB) 225 226 // Store 227 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0 228 MOVQ $__tsan_go_atomic32_store(SB), AX 229 CALL racecallatomic<>(SB) 230 RET 231 232 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0 233 MOVQ $__tsan_go_atomic64_store(SB), AX 234 CALL racecallatomic<>(SB) 235 RET 236 237 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0 238 JMP sync∕atomic·StoreInt32(SB) 239 240 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0 241 JMP sync∕atomic·StoreInt64(SB) 242 243 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0 244 JMP sync∕atomic·StoreInt64(SB) 245 246 // Swap 247 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0 248 MOVQ $__tsan_go_atomic32_exchange(SB), AX 249 CALL racecallatomic<>(SB) 250 RET 251 252 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0 253 MOVQ $__tsan_go_atomic64_exchange(SB), AX 254 CALL racecallatomic<>(SB) 255 RET 256 257 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0 258 JMP sync∕atomic·SwapInt32(SB) 259 260 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0 261 JMP sync∕atomic·SwapInt64(SB) 262 263 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0 264 JMP sync∕atomic·SwapInt64(SB) 265 266 // Add 267 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-0 268 MOVQ $__tsan_go_atomic32_fetch_add(SB), AX 269 CALL racecallatomic<>(SB) 270 MOVL add+8(FP), AX // convert fetch_add to add_fetch 271 ADDL AX, ret+16(FP) 272 RET 273 274 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-0 275 MOVQ $__tsan_go_atomic64_fetch_add(SB), AX 276 CALL racecallatomic<>(SB) 277 MOVQ add+8(FP), AX // convert fetch_add to add_fetch 278 ADDQ AX, ret+16(FP) 279 RET 280 281 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-0 282 JMP sync∕atomic·AddInt32(SB) 283 284 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-0 285 JMP sync∕atomic·AddInt64(SB) 286 287 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0 288 JMP sync∕atomic·AddInt64(SB) 289 290 // CompareAndSwap 291 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0 292 MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX 293 CALL racecallatomic<>(SB) 294 RET 295 296 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0 297 MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX 298 CALL racecallatomic<>(SB) 299 RET 300 301 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0 302 JMP sync∕atomic·CompareAndSwapInt32(SB) 303 304 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0 305 JMP sync∕atomic·CompareAndSwapInt64(SB) 306 307 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0 308 JMP sync∕atomic·CompareAndSwapInt64(SB) 309 310 // Generic atomic operation implementation. 311 // AX already contains target function. 312 TEXT racecallatomic<>(SB), NOSPLIT, $0-0 313 // Trigger SIGSEGV early. 314 MOVQ 16(SP), R12 315 MOVL (R12), R13 316 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 317 CMPQ R12, runtime·racearenastart(SB) 318 JB racecallatomic_data 319 CMPQ R12, runtime·racearenaend(SB) 320 JB racecallatomic_ok 321 racecallatomic_data: 322 CMPQ R12, runtime·racedatastart(SB) 323 JB racecallatomic_ignore 324 CMPQ R12, runtime·racedataend(SB) 325 JAE racecallatomic_ignore 326 racecallatomic_ok: 327 // Addr is within the good range, call the atomic function. 328 get_tls(R12) 329 MOVQ g(R12), R14 330 MOVQ g_racectx(R14), RARG0 // goroutine context 331 MOVQ 8(SP), RARG1 // caller pc 332 MOVQ (SP), RARG2 // pc 333 LEAQ 16(SP), RARG3 // arguments 334 JMP racecall<>(SB) // does not return 335 racecallatomic_ignore: 336 // Addr is outside the good range. 337 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. 338 // An attempt to synchronize on the address would cause crash. 339 MOVQ AX, R15 // remember the original function 340 MOVQ $__tsan_go_ignore_sync_begin(SB), AX 341 get_tls(R12) 342 MOVQ g(R12), R14 343 MOVQ g_racectx(R14), RARG0 // goroutine context 344 CALL racecall<>(SB) 345 MOVQ R15, AX // restore the original function 346 // Call the atomic function. 347 MOVQ g_racectx(R14), RARG0 // goroutine context 348 MOVQ 8(SP), RARG1 // caller pc 349 MOVQ (SP), RARG2 // pc 350 LEAQ 16(SP), RARG3 // arguments 351 CALL racecall<>(SB) 352 // Call __tsan_go_ignore_sync_end. 353 MOVQ $__tsan_go_ignore_sync_end(SB), AX 354 MOVQ g_racectx(R14), RARG0 // goroutine context 355 JMP racecall<>(SB) 356 357 // void runtime·racecall(void(*f)(...), ...) 358 // Calls C function f from race runtime and passes up to 4 arguments to it. 359 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments. 360 TEXT runtime·racecall(SB), NOSPLIT, $0-0 361 MOVQ fn+0(FP), AX 362 MOVQ arg0+8(FP), RARG0 363 MOVQ arg1+16(FP), RARG1 364 MOVQ arg2+24(FP), RARG2 365 MOVQ arg3+32(FP), RARG3 366 JMP racecall<>(SB) 367 368 // Switches SP to g0 stack and calls (AX). Arguments already set. 369 TEXT racecall<>(SB), NOSPLIT, $0-0 370 get_tls(R12) 371 MOVQ g(R12), R14 372 MOVQ g_m(R14), R13 373 // Switch to g0 stack. 374 MOVQ SP, R12 // callee-saved, preserved across the CALL 375 MOVQ m_g0(R13), R10 376 CMPQ R10, R14 377 JE call // already on g0 378 MOVQ (g_sched+gobuf_sp)(R10), SP 379 call: 380 ANDQ $~15, SP // alignment for gcc ABI 381 CALL AX 382 MOVQ R12, SP 383 RET 384 385 // C->Go callback thunk that allows to call runtime·racesymbolize from C code. 386 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g. 387 // The overall effect of Go->C->Go call chain is similar to that of mcall. 388 // RARG0 contains command code. RARG1 contains command-specific context. 389 // See racecallback for command codes. 390 TEXT runtime·racecallbackthunk(SB), NOSPLIT, $56-8 391 // Handle command raceGetProcCmd (0) here. 392 // First, code below assumes that we are on curg, while raceGetProcCmd 393 // can be executed on g0. Second, it is called frequently, so will 394 // benefit from this fast path. 395 CMPQ RARG0, $0 396 JNE rest 397 get_tls(RARG0) 398 MOVQ g(RARG0), RARG0 399 MOVQ g_m(RARG0), RARG0 400 MOVQ m_p(RARG0), RARG0 401 MOVQ p_raceprocctx(RARG0), RARG0 402 MOVQ RARG0, (RARG1) 403 RET 404 405 rest: 406 // Save callee-saved registers (Go code won't respect that). 407 // This is superset of darwin/linux/windows registers. 408 PUSHQ BX 409 PUSHQ BP 410 PUSHQ DI 411 PUSHQ SI 412 PUSHQ R12 413 PUSHQ R13 414 PUSHQ R14 415 PUSHQ R15 416 // Set g = g0. 417 get_tls(R12) 418 MOVQ g(R12), R13 419 MOVQ g_m(R13), R14 420 MOVQ m_g0(R14), R15 421 CMPQ R13, R15 422 JEQ noswitch // branch if already on g0 423 MOVQ R15, g(R12) // g = m->g0 424 PUSHQ RARG1 // func arg 425 PUSHQ RARG0 // func arg 426 CALL runtime·racecallback(SB) 427 POPQ R12 428 POPQ R12 429 // All registers are smashed after Go code, reload. 430 get_tls(R12) 431 MOVQ g(R12), R13 432 MOVQ g_m(R13), R13 433 MOVQ m_curg(R13), R14 434 MOVQ R14, g(R12) // g = m->curg 435 ret: 436 // Restore callee-saved registers. 437 POPQ R15 438 POPQ R14 439 POPQ R13 440 POPQ R12 441 POPQ SI 442 POPQ DI 443 POPQ BP 444 POPQ BX 445 RET 446 447 noswitch: 448 // already on g0 449 PUSHQ RARG1 // func arg 450 PUSHQ RARG0 // func arg 451 CALL runtime·racecallback(SB) 452 POPQ R12 453 POPQ R12 454 JMP ret