github.com/comwrg/go/src@v0.0.0-20220319063731-c238d0440370/runtime/race_arm64.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build race 6 // +build race 7 8 #include "go_asm.h" 9 #include "funcdata.h" 10 #include "textflag.h" 11 #include "tls_arm64.h" 12 13 // The following thunks allow calling the gcc-compiled race runtime directly 14 // from Go code without going all the way through cgo. 15 // First, it's much faster (up to 50% speedup for real Go programs). 16 // Second, it eliminates race-related special cases from cgocall and scheduler. 17 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go. 18 19 // A brief recap of the arm64 calling convention. 20 // Arguments are passed in R0...R7, the rest is on stack. 21 // Callee-saved registers are: R19...R28. 22 // Temporary registers are: R9...R15 23 // SP must be 16-byte aligned. 24 25 // When calling racecalladdr, R9 is the call target address. 26 27 // The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr. 28 29 // Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s) 30 // No-op on other OSes. 31 #ifdef TLS_darwin 32 #define TP_ALIGN AND $~7, R0 33 #else 34 #define TP_ALIGN 35 #endif 36 37 // Load g from TLS. (See tls_arm64.s) 38 #define load_g \ 39 MRS_TPIDR_R0 \ 40 TP_ALIGN \ 41 MOVD runtime·tls_g(SB), R11 \ 42 MOVD (R0)(R11), g 43 44 // func runtime·raceread(addr uintptr) 45 // Called from instrumented code. 46 TEXT runtime·raceread(SB), NOSPLIT, $0-8 47 MOVD addr+0(FP), R1 48 MOVD LR, R2 49 // void __tsan_read(ThreadState *thr, void *addr, void *pc); 50 MOVD $__tsan_read(SB), R9 51 JMP racecalladdr<>(SB) 52 53 // func runtime·RaceRead(addr uintptr) 54 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8 55 // This needs to be a tail call, because raceread reads caller pc. 56 JMP runtime·raceread(SB) 57 58 // func runtime·racereadpc(void *addr, void *callpc, void *pc) 59 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 60 MOVD addr+0(FP), R1 61 MOVD callpc+8(FP), R2 62 MOVD pc+16(FP), R3 63 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 64 MOVD $__tsan_read_pc(SB), R9 65 JMP racecalladdr<>(SB) 66 67 // func runtime·racewrite(addr uintptr) 68 // Called from instrumented code. 69 TEXT runtime·racewrite(SB), NOSPLIT, $0-8 70 MOVD addr+0(FP), R1 71 MOVD LR, R2 72 // void __tsan_write(ThreadState *thr, void *addr, void *pc); 73 MOVD $__tsan_write(SB), R9 74 JMP racecalladdr<>(SB) 75 76 // func runtime·RaceWrite(addr uintptr) 77 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8 78 // This needs to be a tail call, because racewrite reads caller pc. 79 JMP runtime·racewrite(SB) 80 81 // func runtime·racewritepc(void *addr, void *callpc, void *pc) 82 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 83 MOVD addr+0(FP), R1 84 MOVD callpc+8(FP), R2 85 MOVD pc+16(FP), R3 86 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 87 MOVD $__tsan_write_pc(SB), R9 88 JMP racecalladdr<>(SB) 89 90 // func runtime·racereadrange(addr, size uintptr) 91 // Called from instrumented code. 92 TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 93 MOVD addr+0(FP), R1 94 MOVD size+8(FP), R2 95 MOVD LR, R3 96 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 97 MOVD $__tsan_read_range(SB), R9 98 JMP racecalladdr<>(SB) 99 100 // func runtime·RaceReadRange(addr, size uintptr) 101 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16 102 // This needs to be a tail call, because racereadrange reads caller pc. 103 JMP runtime·racereadrange(SB) 104 105 // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc) 106 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 107 MOVD addr+0(FP), R1 108 MOVD size+8(FP), R2 109 MOVD pc+16(FP), R3 110 ADD $4, R3 // pc is function start, tsan wants return address. 111 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 112 MOVD $__tsan_read_range(SB), R9 113 JMP racecalladdr<>(SB) 114 115 // func runtime·racewriterange(addr, size uintptr) 116 // Called from instrumented code. 117 TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 118 MOVD addr+0(FP), R1 119 MOVD size+8(FP), R2 120 MOVD LR, R3 121 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 122 MOVD $__tsan_write_range(SB), R9 123 JMP racecalladdr<>(SB) 124 125 // func runtime·RaceWriteRange(addr, size uintptr) 126 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16 127 // This needs to be a tail call, because racewriterange reads caller pc. 128 JMP runtime·racewriterange(SB) 129 130 // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc) 131 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24 132 MOVD addr+0(FP), R1 133 MOVD size+8(FP), R2 134 MOVD pc+16(FP), R3 135 ADD $4, R3 // pc is function start, tsan wants return address. 136 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 137 MOVD $__tsan_write_range(SB), R9 138 JMP racecalladdr<>(SB) 139 140 // If addr (R1) is out of range, do nothing. 141 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set. 142 TEXT racecalladdr<>(SB), NOSPLIT, $0-0 143 load_g 144 MOVD g_racectx(g), R0 145 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 146 MOVD runtime·racearenastart(SB), R10 147 CMP R10, R1 148 BLT data 149 MOVD runtime·racearenaend(SB), R10 150 CMP R10, R1 151 BLT call 152 data: 153 MOVD runtime·racedatastart(SB), R10 154 CMP R10, R1 155 BLT ret 156 MOVD runtime·racedataend(SB), R10 157 CMP R10, R1 158 BGT ret 159 call: 160 JMP racecall<>(SB) 161 ret: 162 RET 163 164 // func runtime·racefuncenter(pc uintptr) 165 // Called from instrumented code. 166 TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8 167 MOVD callpc+0(FP), R9 168 JMP racefuncenter<>(SB) 169 170 // Common code for racefuncenter 171 // R9 = caller's return address 172 TEXT racefuncenter<>(SB), NOSPLIT, $0-0 173 load_g 174 MOVD g_racectx(g), R0 // goroutine racectx 175 MOVD R9, R1 176 // void __tsan_func_enter(ThreadState *thr, void *pc); 177 MOVD $__tsan_func_enter(SB), R9 178 BL racecall<>(SB) 179 RET 180 181 // func runtime·racefuncexit() 182 // Called from instrumented code. 183 TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0 184 load_g 185 MOVD g_racectx(g), R0 // race context 186 // void __tsan_func_exit(ThreadState *thr); 187 MOVD $__tsan_func_exit(SB), R9 188 JMP racecall<>(SB) 189 190 // Atomic operations for sync/atomic package. 191 // R3 = addr of arguments passed to this function, it can 192 // be fetched at 40(RSP) in racecallatomic after two times BL 193 // R0, R1, R2 set in racecallatomic 194 195 // Load 196 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12 197 GO_ARGS 198 MOVD $__tsan_go_atomic32_load(SB), R9 199 BL racecallatomic<>(SB) 200 RET 201 202 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16 203 GO_ARGS 204 MOVD $__tsan_go_atomic64_load(SB), R9 205 BL racecallatomic<>(SB) 206 RET 207 208 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12 209 GO_ARGS 210 JMP sync∕atomic·LoadInt32(SB) 211 212 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16 213 GO_ARGS 214 JMP sync∕atomic·LoadInt64(SB) 215 216 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16 217 GO_ARGS 218 JMP sync∕atomic·LoadInt64(SB) 219 220 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16 221 GO_ARGS 222 JMP sync∕atomic·LoadInt64(SB) 223 224 // Store 225 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12 226 GO_ARGS 227 MOVD $__tsan_go_atomic32_store(SB), R9 228 BL racecallatomic<>(SB) 229 RET 230 231 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16 232 GO_ARGS 233 MOVD $__tsan_go_atomic64_store(SB), R9 234 BL racecallatomic<>(SB) 235 RET 236 237 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12 238 GO_ARGS 239 JMP sync∕atomic·StoreInt32(SB) 240 241 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16 242 GO_ARGS 243 JMP sync∕atomic·StoreInt64(SB) 244 245 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16 246 GO_ARGS 247 JMP sync∕atomic·StoreInt64(SB) 248 249 // Swap 250 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20 251 GO_ARGS 252 MOVD $__tsan_go_atomic32_exchange(SB), R9 253 BL racecallatomic<>(SB) 254 RET 255 256 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24 257 GO_ARGS 258 MOVD $__tsan_go_atomic64_exchange(SB), R9 259 BL racecallatomic<>(SB) 260 RET 261 262 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20 263 GO_ARGS 264 JMP sync∕atomic·SwapInt32(SB) 265 266 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24 267 GO_ARGS 268 JMP sync∕atomic·SwapInt64(SB) 269 270 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24 271 GO_ARGS 272 JMP sync∕atomic·SwapInt64(SB) 273 274 // Add 275 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20 276 GO_ARGS 277 MOVD $__tsan_go_atomic32_fetch_add(SB), R9 278 BL racecallatomic<>(SB) 279 MOVW add+8(FP), R0 // convert fetch_add to add_fetch 280 MOVW ret+16(FP), R1 281 ADD R0, R1, R0 282 MOVW R0, ret+16(FP) 283 RET 284 285 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24 286 GO_ARGS 287 MOVD $__tsan_go_atomic64_fetch_add(SB), R9 288 BL racecallatomic<>(SB) 289 MOVD add+8(FP), R0 // convert fetch_add to add_fetch 290 MOVD ret+16(FP), R1 291 ADD R0, R1, R0 292 MOVD R0, ret+16(FP) 293 RET 294 295 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20 296 GO_ARGS 297 JMP sync∕atomic·AddInt32(SB) 298 299 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24 300 GO_ARGS 301 JMP sync∕atomic·AddInt64(SB) 302 303 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24 304 GO_ARGS 305 JMP sync∕atomic·AddInt64(SB) 306 307 // CompareAndSwap 308 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17 309 GO_ARGS 310 MOVD $__tsan_go_atomic32_compare_exchange(SB), R9 311 BL racecallatomic<>(SB) 312 RET 313 314 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25 315 GO_ARGS 316 MOVD $__tsan_go_atomic64_compare_exchange(SB), R9 317 BL racecallatomic<>(SB) 318 RET 319 320 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17 321 GO_ARGS 322 JMP sync∕atomic·CompareAndSwapInt32(SB) 323 324 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25 325 GO_ARGS 326 JMP sync∕atomic·CompareAndSwapInt64(SB) 327 328 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25 329 GO_ARGS 330 JMP sync∕atomic·CompareAndSwapInt64(SB) 331 332 // Generic atomic operation implementation. 333 // R9 = addr of target function 334 TEXT racecallatomic<>(SB), NOSPLIT, $0 335 // Set up these registers 336 // R0 = *ThreadState 337 // R1 = caller pc 338 // R2 = pc 339 // R3 = addr of incoming arg list 340 341 // Trigger SIGSEGV early. 342 MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP) 343 MOVD (R3), R13 // segv here if addr is bad 344 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 345 MOVD runtime·racearenastart(SB), R10 346 CMP R10, R3 347 BLT racecallatomic_data 348 MOVD runtime·racearenaend(SB), R10 349 CMP R10, R3 350 BLT racecallatomic_ok 351 racecallatomic_data: 352 MOVD runtime·racedatastart(SB), R10 353 CMP R10, R3 354 BLT racecallatomic_ignore 355 MOVD runtime·racedataend(SB), R10 356 CMP R10, R3 357 BGE racecallatomic_ignore 358 racecallatomic_ok: 359 // Addr is within the good range, call the atomic function. 360 load_g 361 MOVD g_racectx(g), R0 // goroutine context 362 MOVD 16(RSP), R1 // caller pc 363 MOVD R9, R2 // pc 364 ADD $40, RSP, R3 365 JMP racecall<>(SB) // does not return 366 racecallatomic_ignore: 367 // Addr is outside the good range. 368 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. 369 // An attempt to synchronize on the address would cause crash. 370 MOVD R9, R20 // remember the original function 371 MOVD $__tsan_go_ignore_sync_begin(SB), R9 372 load_g 373 MOVD g_racectx(g), R0 // goroutine context 374 BL racecall<>(SB) 375 MOVD R20, R9 // restore the original function 376 // Call the atomic function. 377 // racecall will call LLVM race code which might clobber R28 (g) 378 load_g 379 MOVD g_racectx(g), R0 // goroutine context 380 MOVD 16(RSP), R1 // caller pc 381 MOVD R9, R2 // pc 382 ADD $40, RSP, R3 // arguments 383 BL racecall<>(SB) 384 // Call __tsan_go_ignore_sync_end. 385 MOVD $__tsan_go_ignore_sync_end(SB), R9 386 MOVD g_racectx(g), R0 // goroutine context 387 BL racecall<>(SB) 388 RET 389 390 // func runtime·racecall(void(*f)(...), ...) 391 // Calls C function f from race runtime and passes up to 4 arguments to it. 392 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments. 393 TEXT runtime·racecall(SB), NOSPLIT, $0-0 394 MOVD fn+0(FP), R9 395 MOVD arg0+8(FP), R0 396 MOVD arg1+16(FP), R1 397 MOVD arg2+24(FP), R2 398 MOVD arg3+32(FP), R3 399 JMP racecall<>(SB) 400 401 // Switches SP to g0 stack and calls (R9). Arguments already set. 402 TEXT racecall<>(SB), NOSPLIT, $0-0 403 MOVD g_m(g), R10 404 // Switch to g0 stack. 405 MOVD RSP, R19 // callee-saved, preserved across the CALL 406 MOVD m_g0(R10), R11 407 CMP R11, g 408 BEQ call // already on g0 409 MOVD (g_sched+gobuf_sp)(R11), R12 410 MOVD R12, RSP 411 call: 412 BL R9 413 MOVD R19, RSP 414 RET 415 416 // C->Go callback thunk that allows to call runtime·racesymbolize from C code. 417 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g. 418 // The overall effect of Go->C->Go call chain is similar to that of mcall. 419 // R0 contains command code. R1 contains command-specific context. 420 // See racecallback for command codes. 421 TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0 422 // Handle command raceGetProcCmd (0) here. 423 // First, code below assumes that we are on curg, while raceGetProcCmd 424 // can be executed on g0. Second, it is called frequently, so will 425 // benefit from this fast path. 426 CBNZ R0, rest 427 MOVD g, R13 428 #ifdef TLS_darwin 429 MOVD R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it 430 #endif 431 load_g 432 #ifdef TLS_darwin 433 MOVD R12, R27 434 #endif 435 MOVD g_m(g), R0 436 MOVD m_p(R0), R0 437 MOVD p_raceprocctx(R0), R0 438 MOVD R0, (R1) 439 MOVD R13, g 440 JMP (LR) 441 rest: 442 // Save callee-saved registers (Go code won't respect that). 443 // 8(RSP) and 16(RSP) are for args passed through racecallback 444 SUB $112, RSP 445 MOVD LR, 0(RSP) 446 STP (R19, R20), 24(RSP) 447 STP (R21, R22), 40(RSP) 448 STP (R23, R24), 56(RSP) 449 STP (R25, R26), 72(RSP) 450 STP (R27, g), 88(RSP) 451 // Set g = g0. 452 // load_g will clobber R0, Save R0 453 MOVD R0, R13 454 load_g 455 // restore R0 456 MOVD R13, R0 457 MOVD g_m(g), R13 458 MOVD m_g0(R13), R14 459 CMP R14, g 460 BEQ noswitch // branch if already on g0 461 MOVD R14, g 462 463 MOVD R0, 8(RSP) // func arg 464 MOVD R1, 16(RSP) // func arg 465 BL runtime·racecallback(SB) 466 467 // All registers are smashed after Go code, reload. 468 MOVD g_m(g), R13 469 MOVD m_curg(R13), g // g = m->curg 470 ret: 471 // Restore callee-saved registers. 472 MOVD 0(RSP), LR 473 LDP 24(RSP), (R19, R20) 474 LDP 40(RSP), (R21, R22) 475 LDP 56(RSP), (R23, R24) 476 LDP 72(RSP), (R25, R26) 477 LDP 88(RSP), (R27, g) 478 ADD $112, RSP 479 JMP (LR) 480 481 noswitch: 482 // already on g0 483 MOVD R0, 8(RSP) // func arg 484 MOVD R1, 16(RSP) // func arg 485 BL runtime·racecallback(SB) 486 JMP ret 487 488 #ifndef TLSG_IS_VARIABLE 489 // tls_g, g value for each thread in TLS 490 GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8 491 #endif