github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/runtime/race_arm64.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build race 6 7 #include "go_asm.h" 8 #include "funcdata.h" 9 #include "textflag.h" 10 #include "tls_arm64.h" 11 #include "cgo/abi_arm64.h" 12 13 // The following thunks allow calling the gcc-compiled race runtime directly 14 // from Go code without going all the way through cgo. 15 // First, it's much faster (up to 50% speedup for real Go programs). 16 // Second, it eliminates race-related special cases from cgocall and scheduler. 17 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go. 18 19 // A brief recap of the arm64 calling convention. 20 // Arguments are passed in R0...R7, the rest is on stack. 21 // Callee-saved registers are: R19...R28. 22 // Temporary registers are: R9...R15 23 // SP must be 16-byte aligned. 24 25 // When calling racecalladdr, R9 is the call target address. 26 27 // The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr. 28 29 // Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s) 30 // No-op on other OSes. 31 #ifdef TLS_darwin 32 #define TP_ALIGN AND $~7, R0 33 #else 34 #define TP_ALIGN 35 #endif 36 37 // Load g from TLS. (See tls_arm64.s) 38 #define load_g \ 39 MRS_TPIDR_R0 \ 40 TP_ALIGN \ 41 MOVD runtime·tls_g(SB), R11 \ 42 MOVD (R0)(R11), g 43 44 // func runtime·raceread(addr uintptr) 45 // Called from instrumented code. 46 // Defined as ABIInternal so as to avoid introducing a wrapper, 47 // which would make caller's PC ineffective. 48 TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8 49 MOVD R0, R1 // addr 50 MOVD LR, R2 51 // void __tsan_read(ThreadState *thr, void *addr, void *pc); 52 MOVD $__tsan_read(SB), R9 53 JMP racecalladdr<>(SB) 54 55 // func runtime·RaceRead(addr uintptr) 56 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8 57 // This needs to be a tail call, because raceread reads caller pc. 58 JMP runtime·raceread(SB) 59 60 // func runtime·racereadpc(void *addr, void *callpc, void *pc) 61 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 62 MOVD addr+0(FP), R1 63 MOVD callpc+8(FP), R2 64 MOVD pc+16(FP), R3 65 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 66 MOVD $__tsan_read_pc(SB), R9 67 JMP racecalladdr<>(SB) 68 69 // func runtime·racewrite(addr uintptr) 70 // Called from instrumented code. 71 // Defined as ABIInternal so as to avoid introducing a wrapper, 72 // which would make caller's PC ineffective. 73 TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8 74 MOVD R0, R1 // addr 75 MOVD LR, R2 76 // void __tsan_write(ThreadState *thr, void *addr, void *pc); 77 MOVD $__tsan_write(SB), R9 78 JMP racecalladdr<>(SB) 79 80 // func runtime·RaceWrite(addr uintptr) 81 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8 82 // This needs to be a tail call, because racewrite reads caller pc. 83 JMP runtime·racewrite(SB) 84 85 // func runtime·racewritepc(void *addr, void *callpc, void *pc) 86 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 87 MOVD addr+0(FP), R1 88 MOVD callpc+8(FP), R2 89 MOVD pc+16(FP), R3 90 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 91 MOVD $__tsan_write_pc(SB), R9 92 JMP racecalladdr<>(SB) 93 94 // func runtime·racereadrange(addr, size uintptr) 95 // Called from instrumented code. 96 // Defined as ABIInternal so as to avoid introducing a wrapper, 97 // which would make caller's PC ineffective. 98 TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16 99 MOVD R1, R2 // size 100 MOVD R0, R1 // addr 101 MOVD LR, R3 102 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 103 MOVD $__tsan_read_range(SB), R9 104 JMP racecalladdr<>(SB) 105 106 // func runtime·RaceReadRange(addr, size uintptr) 107 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16 108 // This needs to be a tail call, because racereadrange reads caller pc. 109 JMP runtime·racereadrange(SB) 110 111 // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc) 112 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 113 MOVD addr+0(FP), R1 114 MOVD size+8(FP), R2 115 MOVD pc+16(FP), R3 116 ADD $4, R3 // pc is function start, tsan wants return address. 117 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 118 MOVD $__tsan_read_range(SB), R9 119 JMP racecalladdr<>(SB) 120 121 // func runtime·racewriterange(addr, size uintptr) 122 // Called from instrumented code. 123 // Defined as ABIInternal so as to avoid introducing a wrapper, 124 // which would make caller's PC ineffective. 125 TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16 126 MOVD R1, R2 // size 127 MOVD R0, R1 // addr 128 MOVD LR, R3 129 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 130 MOVD $__tsan_write_range(SB), R9 131 JMP racecalladdr<>(SB) 132 133 // func runtime·RaceWriteRange(addr, size uintptr) 134 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16 135 // This needs to be a tail call, because racewriterange reads caller pc. 136 JMP runtime·racewriterange(SB) 137 138 // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc) 139 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24 140 MOVD addr+0(FP), R1 141 MOVD size+8(FP), R2 142 MOVD pc+16(FP), R3 143 ADD $4, R3 // pc is function start, tsan wants return address. 144 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 145 MOVD $__tsan_write_range(SB), R9 146 JMP racecalladdr<>(SB) 147 148 // If addr (R1) is out of range, do nothing. 149 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set. 150 TEXT racecalladdr<>(SB), NOSPLIT, $0-0 151 load_g 152 MOVD g_racectx(g), R0 153 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 154 MOVD runtime·racearenastart(SB), R10 155 CMP R10, R1 156 BLT data 157 MOVD runtime·racearenaend(SB), R10 158 CMP R10, R1 159 BLT call 160 data: 161 MOVD runtime·racedatastart(SB), R10 162 CMP R10, R1 163 BLT ret 164 MOVD runtime·racedataend(SB), R10 165 CMP R10, R1 166 BGT ret 167 call: 168 JMP racecall<>(SB) 169 ret: 170 RET 171 172 // func runtime·racefuncenter(pc uintptr) 173 // Called from instrumented code. 174 TEXT runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8 175 MOVD R0, R9 // callpc 176 JMP racefuncenter<>(SB) 177 178 // Common code for racefuncenter 179 // R9 = caller's return address 180 TEXT racefuncenter<>(SB), NOSPLIT, $0-0 181 load_g 182 MOVD g_racectx(g), R0 // goroutine racectx 183 MOVD R9, R1 184 // void __tsan_func_enter(ThreadState *thr, void *pc); 185 MOVD $__tsan_func_enter(SB), R9 186 BL racecall<>(SB) 187 RET 188 189 // func runtime·racefuncexit() 190 // Called from instrumented code. 191 TEXT runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0 192 load_g 193 MOVD g_racectx(g), R0 // race context 194 // void __tsan_func_exit(ThreadState *thr); 195 MOVD $__tsan_func_exit(SB), R9 196 JMP racecall<>(SB) 197 198 // Atomic operations for sync/atomic package. 199 // R3 = addr of arguments passed to this function, it can 200 // be fetched at 40(RSP) in racecallatomic after two times BL 201 // R0, R1, R2 set in racecallatomic 202 203 // Load 204 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12 205 GO_ARGS 206 MOVD $__tsan_go_atomic32_load(SB), R9 207 BL racecallatomic<>(SB) 208 RET 209 210 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16 211 GO_ARGS 212 MOVD $__tsan_go_atomic64_load(SB), R9 213 BL racecallatomic<>(SB) 214 RET 215 216 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12 217 GO_ARGS 218 JMP sync∕atomic·LoadInt32(SB) 219 220 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16 221 GO_ARGS 222 JMP sync∕atomic·LoadInt64(SB) 223 224 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16 225 GO_ARGS 226 JMP sync∕atomic·LoadInt64(SB) 227 228 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16 229 GO_ARGS 230 JMP sync∕atomic·LoadInt64(SB) 231 232 // Store 233 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12 234 GO_ARGS 235 MOVD $__tsan_go_atomic32_store(SB), R9 236 BL racecallatomic<>(SB) 237 RET 238 239 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16 240 GO_ARGS 241 MOVD $__tsan_go_atomic64_store(SB), R9 242 BL racecallatomic<>(SB) 243 RET 244 245 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12 246 GO_ARGS 247 JMP sync∕atomic·StoreInt32(SB) 248 249 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16 250 GO_ARGS 251 JMP sync∕atomic·StoreInt64(SB) 252 253 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16 254 GO_ARGS 255 JMP sync∕atomic·StoreInt64(SB) 256 257 // Swap 258 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20 259 GO_ARGS 260 MOVD $__tsan_go_atomic32_exchange(SB), R9 261 BL racecallatomic<>(SB) 262 RET 263 264 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24 265 GO_ARGS 266 MOVD $__tsan_go_atomic64_exchange(SB), R9 267 BL racecallatomic<>(SB) 268 RET 269 270 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20 271 GO_ARGS 272 JMP sync∕atomic·SwapInt32(SB) 273 274 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24 275 GO_ARGS 276 JMP sync∕atomic·SwapInt64(SB) 277 278 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24 279 GO_ARGS 280 JMP sync∕atomic·SwapInt64(SB) 281 282 // Add 283 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20 284 GO_ARGS 285 MOVD $__tsan_go_atomic32_fetch_add(SB), R9 286 BL racecallatomic<>(SB) 287 MOVW add+8(FP), R0 // convert fetch_add to add_fetch 288 MOVW ret+16(FP), R1 289 ADD R0, R1, R0 290 MOVW R0, ret+16(FP) 291 RET 292 293 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24 294 GO_ARGS 295 MOVD $__tsan_go_atomic64_fetch_add(SB), R9 296 BL racecallatomic<>(SB) 297 MOVD add+8(FP), R0 // convert fetch_add to add_fetch 298 MOVD ret+16(FP), R1 299 ADD R0, R1, R0 300 MOVD R0, ret+16(FP) 301 RET 302 303 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20 304 GO_ARGS 305 JMP sync∕atomic·AddInt32(SB) 306 307 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24 308 GO_ARGS 309 JMP sync∕atomic·AddInt64(SB) 310 311 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24 312 GO_ARGS 313 JMP sync∕atomic·AddInt64(SB) 314 315 // CompareAndSwap 316 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17 317 GO_ARGS 318 MOVD $__tsan_go_atomic32_compare_exchange(SB), R9 319 BL racecallatomic<>(SB) 320 RET 321 322 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25 323 GO_ARGS 324 MOVD $__tsan_go_atomic64_compare_exchange(SB), R9 325 BL racecallatomic<>(SB) 326 RET 327 328 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17 329 GO_ARGS 330 JMP sync∕atomic·CompareAndSwapInt32(SB) 331 332 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25 333 GO_ARGS 334 JMP sync∕atomic·CompareAndSwapInt64(SB) 335 336 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25 337 GO_ARGS 338 JMP sync∕atomic·CompareAndSwapInt64(SB) 339 340 // Generic atomic operation implementation. 341 // R9 = addr of target function 342 TEXT racecallatomic<>(SB), NOSPLIT, $0 343 // Set up these registers 344 // R0 = *ThreadState 345 // R1 = caller pc 346 // R2 = pc 347 // R3 = addr of incoming arg list 348 349 // Trigger SIGSEGV early. 350 MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP) 351 MOVB (R3), R13 // segv here if addr is bad 352 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 353 MOVD runtime·racearenastart(SB), R10 354 CMP R10, R3 355 BLT racecallatomic_data 356 MOVD runtime·racearenaend(SB), R10 357 CMP R10, R3 358 BLT racecallatomic_ok 359 racecallatomic_data: 360 MOVD runtime·racedatastart(SB), R10 361 CMP R10, R3 362 BLT racecallatomic_ignore 363 MOVD runtime·racedataend(SB), R10 364 CMP R10, R3 365 BGE racecallatomic_ignore 366 racecallatomic_ok: 367 // Addr is within the good range, call the atomic function. 368 load_g 369 MOVD g_racectx(g), R0 // goroutine context 370 MOVD 16(RSP), R1 // caller pc 371 MOVD R9, R2 // pc 372 ADD $40, RSP, R3 373 JMP racecall<>(SB) // does not return 374 racecallatomic_ignore: 375 // Addr is outside the good range. 376 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. 377 // An attempt to synchronize on the address would cause crash. 378 MOVD R9, R21 // remember the original function 379 MOVD $__tsan_go_ignore_sync_begin(SB), R9 380 load_g 381 MOVD g_racectx(g), R0 // goroutine context 382 BL racecall<>(SB) 383 MOVD R21, R9 // restore the original function 384 // Call the atomic function. 385 // racecall will call LLVM race code which might clobber R28 (g) 386 load_g 387 MOVD g_racectx(g), R0 // goroutine context 388 MOVD 16(RSP), R1 // caller pc 389 MOVD R9, R2 // pc 390 ADD $40, RSP, R3 // arguments 391 BL racecall<>(SB) 392 // Call __tsan_go_ignore_sync_end. 393 MOVD $__tsan_go_ignore_sync_end(SB), R9 394 MOVD g_racectx(g), R0 // goroutine context 395 BL racecall<>(SB) 396 RET 397 398 // func runtime·racecall(void(*f)(...), ...) 399 // Calls C function f from race runtime and passes up to 4 arguments to it. 400 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments. 401 TEXT runtime·racecall(SB), NOSPLIT, $0-0 402 MOVD fn+0(FP), R9 403 MOVD arg0+8(FP), R0 404 MOVD arg1+16(FP), R1 405 MOVD arg2+24(FP), R2 406 MOVD arg3+32(FP), R3 407 JMP racecall<>(SB) 408 409 // Switches SP to g0 stack and calls (R9). Arguments already set. 410 // Clobbers R19, R20. 411 TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0 412 MOVD g_m(g), R10 413 // Switch to g0 stack. 414 MOVD RSP, R19 // callee-saved, preserved across the CALL 415 MOVD R30, R20 // callee-saved, preserved across the CALL 416 MOVD m_g0(R10), R11 417 CMP R11, g 418 BEQ call // already on g0 419 MOVD (g_sched+gobuf_sp)(R11), R12 420 MOVD R12, RSP 421 call: 422 // Decrement SP past where the frame pointer is saved in the Go arm64 423 // ABI (one word below the stack pointer) so the race detector library 424 // code doesn't clobber it 425 SUB $16, RSP 426 BL R9 427 MOVD R19, RSP 428 JMP (R20) 429 430 // C->Go callback thunk that allows to call runtime·racesymbolize from C code. 431 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g. 432 // The overall effect of Go->C->Go call chain is similar to that of mcall. 433 // R0 contains command code. R1 contains command-specific context. 434 // See racecallback for command codes. 435 TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0 436 // Handle command raceGetProcCmd (0) here. 437 // First, code below assumes that we are on curg, while raceGetProcCmd 438 // can be executed on g0. Second, it is called frequently, so will 439 // benefit from this fast path. 440 CBNZ R0, rest 441 MOVD g, R13 442 #ifdef TLS_darwin 443 MOVD R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it 444 #endif 445 load_g 446 #ifdef TLS_darwin 447 MOVD R12, R27 448 #endif 449 MOVD g_m(g), R0 450 MOVD m_p(R0), R0 451 MOVD p_raceprocctx(R0), R0 452 MOVD R0, (R1) 453 MOVD R13, g 454 JMP (LR) 455 rest: 456 // Save callee-saved registers (Go code won't respect that). 457 // 8(RSP) and 16(RSP) are for args passed through racecallback 458 SUB $176, RSP 459 MOVD LR, 0(RSP) 460 461 SAVE_R19_TO_R28(8*3) 462 SAVE_F8_TO_F15(8*13) 463 MOVD R29, (8*21)(RSP) 464 // Set g = g0. 465 // load_g will clobber R0, Save R0 466 MOVD R0, R13 467 load_g 468 // restore R0 469 MOVD R13, R0 470 MOVD g_m(g), R13 471 MOVD m_g0(R13), R14 472 CMP R14, g 473 BEQ noswitch // branch if already on g0 474 MOVD R14, g 475 476 MOVD R0, 8(RSP) // func arg 477 MOVD R1, 16(RSP) // func arg 478 BL runtime·racecallback(SB) 479 480 // All registers are smashed after Go code, reload. 481 MOVD g_m(g), R13 482 MOVD m_curg(R13), g // g = m->curg 483 ret: 484 // Restore callee-saved registers. 485 MOVD 0(RSP), LR 486 MOVD (8*21)(RSP), R29 487 RESTORE_F8_TO_F15(8*13) 488 RESTORE_R19_TO_R28(8*3) 489 ADD $176, RSP 490 JMP (LR) 491 492 noswitch: 493 // already on g0 494 MOVD R0, 8(RSP) // func arg 495 MOVD R1, 16(RSP) // func arg 496 BL runtime·racecallback(SB) 497 JMP ret 498 499 #ifndef TLSG_IS_VARIABLE 500 // tls_g, g value for each thread in TLS 501 GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8 502 #endif