github.com/reiver/go@v0.0.0-20150109200633-1d0c7792f172/src/runtime/race_amd64.s (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build race 6 7 #include "go_asm.h" 8 #include "go_tls.h" 9 #include "funcdata.h" 10 #include "textflag.h" 11 12 // The following thunks allow calling the gcc-compiled race runtime directly 13 // from Go code without going all the way through cgo. 14 // First, it's much faster (up to 50% speedup for real Go programs). 15 // Second, it eliminates race-related special cases from cgocall and scheduler. 16 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go. 17 18 // A brief recap of the amd64 calling convention. 19 // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack. 20 // Callee-saved registers are: BX, BP, R12-R15. 21 // SP must be 16-byte aligned. 22 // On Windows: 23 // Arguments are passed in CX, DX, R8, R9, the rest is on stack. 24 // Callee-saved registers are: BX, BP, DI, SI, R12-R15. 25 // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments: 26 // http://msdn.microsoft.com/en-us/library/ms235286.aspx 27 // We do not do this, because it seems to be intended for vararg/unprototyped functions. 28 // Gcc-compiled race runtime does not try to use that space. 29 30 #ifdef GOOS_windows 31 #define RARG0 CX 32 #define RARG1 DX 33 #define RARG2 R8 34 #define RARG3 R9 35 #else 36 #define RARG0 DI 37 #define RARG1 SI 38 #define RARG2 DX 39 #define RARG3 CX 40 #endif 41 42 // func runtime·raceread(addr uintptr) 43 // Called from instrumented code. 44 TEXT runtime·raceread(SB), NOSPLIT, $0-8 45 MOVQ addr+0(FP), RARG1 46 MOVQ (SP), RARG2 47 // void __tsan_read(ThreadState *thr, void *addr, void *pc); 48 MOVQ $__tsan_read(SB), AX 49 JMP racecalladdr<>(SB) 50 51 // func runtime·RaceRead(addr uintptr) 52 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8 53 // This needs to be a tail call, because raceread reads caller pc. 54 JMP runtime·raceread(SB) 55 56 // void runtime·racereadpc(void *addr, void *callpc, void *pc) 57 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 58 MOVQ addr+0(FP), RARG1 59 MOVQ callpc+8(FP), RARG2 60 MOVQ pc+16(FP), RARG3 61 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 62 MOVQ $__tsan_read_pc(SB), AX 63 JMP racecalladdr<>(SB) 64 65 // func runtime·racewrite(addr uintptr) 66 // Called from instrumented code. 67 TEXT runtime·racewrite(SB), NOSPLIT, $0-8 68 MOVQ addr+0(FP), RARG1 69 MOVQ (SP), RARG2 70 // void __tsan_write(ThreadState *thr, void *addr, void *pc); 71 MOVQ $__tsan_write(SB), AX 72 JMP racecalladdr<>(SB) 73 74 // func runtime·RaceWrite(addr uintptr) 75 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8 76 // This needs to be a tail call, because racewrite reads caller pc. 77 JMP runtime·racewrite(SB) 78 79 // void runtime·racewritepc(void *addr, void *callpc, void *pc) 80 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 81 MOVQ addr+0(FP), RARG1 82 MOVQ callpc+8(FP), RARG2 83 MOVQ pc+16(FP), RARG3 84 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 85 MOVQ $__tsan_write_pc(SB), AX 86 JMP racecalladdr<>(SB) 87 88 // func runtime·racereadrange(addr, size uintptr) 89 // Called from instrumented code. 90 TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 91 MOVQ addr+0(FP), RARG1 92 MOVQ size+8(FP), RARG2 93 MOVQ (SP), RARG3 94 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 95 MOVQ $__tsan_read_range(SB), AX 96 JMP racecalladdr<>(SB) 97 98 // func runtime·RaceReadRange(addr, size uintptr) 99 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16 100 // This needs to be a tail call, because racereadrange reads caller pc. 101 JMP runtime·racereadrange(SB) 102 103 // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc) 104 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 105 MOVQ addr+0(FP), RARG1 106 MOVQ size+8(FP), RARG2 107 MOVQ pc+16(FP), RARG3 108 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 109 MOVQ $__tsan_read_range(SB), AX 110 JMP racecalladdr<>(SB) 111 112 // func runtime·racewriterange(addr, size uintptr) 113 // Called from instrumented code. 114 TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 115 MOVQ addr+0(FP), RARG1 116 MOVQ size+8(FP), RARG2 117 MOVQ (SP), RARG3 118 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 119 MOVQ $__tsan_write_range(SB), AX 120 JMP racecalladdr<>(SB) 121 122 // func runtime·RaceWriteRange(addr, size uintptr) 123 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16 124 // This needs to be a tail call, because racewriterange reads caller pc. 125 JMP runtime·racewriterange(SB) 126 127 // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc) 128 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24 129 MOVQ addr+0(FP), RARG1 130 MOVQ size+8(FP), RARG2 131 MOVQ pc+16(FP), RARG3 132 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 133 MOVQ $__tsan_write_range(SB), AX 134 JMP racecalladdr<>(SB) 135 136 // If addr (RARG1) is out of range, do nothing. 137 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set. 138 TEXT racecalladdr<>(SB), NOSPLIT, $0-0 139 get_tls(R12) 140 MOVQ g(R12), R14 141 MOVQ g_racectx(R14), RARG0 // goroutine context 142 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 143 CMPQ RARG1, runtime·racearenastart(SB) 144 JB data 145 CMPQ RARG1, runtime·racearenaend(SB) 146 JB call 147 data: 148 CMPQ RARG1, runtime·racedatastart(SB) 149 JB ret 150 CMPQ RARG1, runtime·racedataend(SB) 151 JAE ret 152 call: 153 MOVQ AX, AX // w/o this 6a miscompiles this function 154 JMP racecall<>(SB) 155 ret: 156 RET 157 158 // func runtime·racefuncenter(pc uintptr) 159 // Called from instrumented code. 160 TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8 161 MOVQ DX, R15 // save function entry context (for closures) 162 get_tls(R12) 163 MOVQ g(R12), R14 164 MOVQ g_racectx(R14), RARG0 // goroutine context 165 MOVQ callpc+0(FP), RARG1 166 // void __tsan_func_enter(ThreadState *thr, void *pc); 167 MOVQ $__tsan_func_enter(SB), AX 168 // racecall<> preserves R15 169 CALL racecall<>(SB) 170 MOVQ R15, DX // restore function entry context 171 RET 172 173 // func runtime·racefuncexit() 174 // Called from instrumented code. 175 TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0 176 get_tls(R12) 177 MOVQ g(R12), R14 178 MOVQ g_racectx(R14), RARG0 // goroutine context 179 // void __tsan_func_exit(ThreadState *thr); 180 MOVQ $__tsan_func_exit(SB), AX 181 JMP racecall<>(SB) 182 183 // Atomic operations for sync/atomic package. 184 185 // Load 186 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0 187 MOVQ $__tsan_go_atomic32_load(SB), AX 188 CALL racecallatomic<>(SB) 189 RET 190 191 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0 192 MOVQ $__tsan_go_atomic64_load(SB), AX 193 CALL racecallatomic<>(SB) 194 RET 195 196 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0 197 JMP sync∕atomic·LoadInt32(SB) 198 199 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0 200 JMP sync∕atomic·LoadInt64(SB) 201 202 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0 203 JMP sync∕atomic·LoadInt64(SB) 204 205 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0 206 JMP sync∕atomic·LoadInt64(SB) 207 208 // Store 209 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0 210 MOVQ $__tsan_go_atomic32_store(SB), AX 211 CALL racecallatomic<>(SB) 212 RET 213 214 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0 215 MOVQ $__tsan_go_atomic64_store(SB), AX 216 CALL racecallatomic<>(SB) 217 RET 218 219 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0 220 JMP sync∕atomic·StoreInt32(SB) 221 222 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0 223 JMP sync∕atomic·StoreInt64(SB) 224 225 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0 226 JMP sync∕atomic·StoreInt64(SB) 227 228 // Swap 229 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0 230 MOVQ $__tsan_go_atomic32_exchange(SB), AX 231 CALL racecallatomic<>(SB) 232 RET 233 234 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0 235 MOVQ $__tsan_go_atomic64_exchange(SB), AX 236 CALL racecallatomic<>(SB) 237 RET 238 239 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0 240 JMP sync∕atomic·SwapInt32(SB) 241 242 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0 243 JMP sync∕atomic·SwapInt64(SB) 244 245 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0 246 JMP sync∕atomic·SwapInt64(SB) 247 248 // Add 249 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-0 250 MOVQ $__tsan_go_atomic32_fetch_add(SB), AX 251 CALL racecallatomic<>(SB) 252 MOVL add+8(FP), AX // convert fetch_add to add_fetch 253 ADDL AX, ret+16(FP) 254 RET 255 256 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-0 257 MOVQ $__tsan_go_atomic64_fetch_add(SB), AX 258 CALL racecallatomic<>(SB) 259 MOVQ add+8(FP), AX // convert fetch_add to add_fetch 260 ADDQ AX, ret+16(FP) 261 RET 262 263 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-0 264 JMP sync∕atomic·AddInt32(SB) 265 266 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-0 267 JMP sync∕atomic·AddInt64(SB) 268 269 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0 270 JMP sync∕atomic·AddInt64(SB) 271 272 // CompareAndSwap 273 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0 274 MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX 275 CALL racecallatomic<>(SB) 276 RET 277 278 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0 279 MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX 280 CALL racecallatomic<>(SB) 281 RET 282 283 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0 284 JMP sync∕atomic·CompareAndSwapInt32(SB) 285 286 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0 287 JMP sync∕atomic·CompareAndSwapInt64(SB) 288 289 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0 290 JMP sync∕atomic·CompareAndSwapInt64(SB) 291 292 // Generic atomic operation implementation. 293 // AX already contains target function. 294 TEXT racecallatomic<>(SB), NOSPLIT, $0-0 295 // Trigger SIGSEGV early. 296 MOVQ 16(SP), R12 297 MOVL (R12), R13 298 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 299 CMPQ R12, runtime·racearenastart(SB) 300 JB racecallatomic_data 301 CMPQ R12, runtime·racearenaend(SB) 302 JB racecallatomic_ok 303 racecallatomic_data: 304 CMPQ R12, runtime·racedatastart(SB) 305 JB racecallatomic_ignore 306 CMPQ R12, runtime·racedataend(SB) 307 JAE racecallatomic_ignore 308 racecallatomic_ok: 309 // Addr is within the good range, call the atomic function. 310 get_tls(R12) 311 MOVQ g(R12), R14 312 MOVQ g_racectx(R14), RARG0 // goroutine context 313 MOVQ 8(SP), RARG1 // caller pc 314 MOVQ (SP), RARG2 // pc 315 LEAQ 16(SP), RARG3 // arguments 316 JMP racecall<>(SB) // does not return 317 racecallatomic_ignore: 318 // Addr is outside the good range. 319 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. 320 // An attempt to synchronize on the address would cause crash. 321 MOVQ AX, R15 // remember the original function 322 MOVQ $__tsan_go_ignore_sync_begin(SB), AX 323 MOVQ g(R12), R14 324 MOVQ g_racectx(R14), RARG0 // goroutine context 325 CALL racecall<>(SB) 326 MOVQ R15, AX // restore the original function 327 // Call the atomic function. 328 MOVQ g_racectx(R14), RARG0 // goroutine context 329 MOVQ 8(SP), RARG1 // caller pc 330 MOVQ (SP), RARG2 // pc 331 LEAQ 16(SP), RARG3 // arguments 332 CALL racecall<>(SB) 333 // Call __tsan_go_ignore_sync_end. 334 MOVQ $__tsan_go_ignore_sync_end(SB), AX 335 MOVQ g_racectx(R14), RARG0 // goroutine context 336 JMP racecall<>(SB) 337 338 // void runtime·racecall(void(*f)(...), ...) 339 // Calls C function f from race runtime and passes up to 4 arguments to it. 340 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments. 341 TEXT runtime·racecall(SB), NOSPLIT, $0-0 342 MOVQ fn+0(FP), AX 343 MOVQ arg0+8(FP), RARG0 344 MOVQ arg1+16(FP), RARG1 345 MOVQ arg2+24(FP), RARG2 346 MOVQ arg3+32(FP), RARG3 347 JMP racecall<>(SB) 348 349 // Switches SP to g0 stack and calls (AX). Arguments already set. 350 TEXT racecall<>(SB), NOSPLIT, $0-0 351 get_tls(R12) 352 MOVQ g(R12), R14 353 MOVQ g_m(R14), R13 354 // Switch to g0 stack. 355 MOVQ SP, R12 // callee-saved, preserved across the CALL 356 MOVQ m_g0(R13), R10 357 CMPQ R10, R14 358 JE call // already on g0 359 MOVQ (g_sched+gobuf_sp)(R10), SP 360 call: 361 ANDQ $~15, SP // alignment for gcc ABI 362 CALL AX 363 MOVQ R12, SP 364 RET 365 366 // C->Go callback thunk that allows to call runtime·racesymbolize from C code. 367 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g. 368 // The overall effect of Go->C->Go call chain is similar to that of mcall. 369 TEXT runtime·racesymbolizethunk(SB), NOSPLIT, $56-8 370 // Save callee-saved registers (Go code won't respect that). 371 // This is superset of darwin/linux/windows registers. 372 PUSHQ BX 373 PUSHQ BP 374 PUSHQ DI 375 PUSHQ SI 376 PUSHQ R12 377 PUSHQ R13 378 PUSHQ R14 379 PUSHQ R15 380 // Set g = g0. 381 get_tls(R12) 382 MOVQ g(R12), R13 383 MOVQ g_m(R13), R13 384 MOVQ m_g0(R13), R14 385 MOVQ R14, g(R12) // g = m->g0 386 MOVQ RARG0, 0(SP) // func arg 387 CALL runtime·racesymbolize(SB) 388 // All registers are smashed after Go code, reload. 389 get_tls(R12) 390 MOVQ g(R12), R13 391 MOVQ g_m(R13), R13 392 MOVQ m_curg(R13), R14 393 MOVQ R14, g(R12) // g = m->curg 394 // Restore callee-saved registers. 395 POPQ R15 396 POPQ R14 397 POPQ R13 398 POPQ R12 399 POPQ SI 400 POPQ DI 401 POPQ BP 402 POPQ BX 403 RET