github.com/c0deoo1/golang1.5@v0.0.0-20220525150107-c87c805d4593/src/runtime/asm_amd64p32.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 MOVL SP, CX 15 SUBL $128, SP // plenty of scratch 16 ANDL $~15, CX 17 MOVL CX, SP 18 19 MOVL AX, 16(SP) 20 MOVL BX, 24(SP) 21 22 // create istack out of the given (operating system) stack. 23 MOVL $runtime·g0(SB), DI 24 LEAL (-64*1024+104)(SP), BX 25 MOVL BX, g_stackguard0(DI) 26 MOVL BX, g_stackguard1(DI) 27 MOVL BX, (g_stack+stack_lo)(DI) 28 MOVL SP, (g_stack+stack_hi)(DI) 29 30 // find out information about the processor we're on 31 MOVQ $0, AX 32 CPUID 33 CMPQ AX, $0 34 JE nocpuinfo 35 MOVQ $1, AX 36 CPUID 37 MOVL CX, runtime·cpuid_ecx(SB) 38 MOVL DX, runtime·cpuid_edx(SB) 39 nocpuinfo: 40 41 needtls: 42 LEAL runtime·tls0(SB), DI 43 CALL runtime·settls(SB) 44 45 // store through it, to make sure it works 46 get_tls(BX) 47 MOVQ $0x123, g(BX) 48 MOVQ runtime·tls0(SB), AX 49 CMPQ AX, $0x123 50 JEQ 2(PC) 51 MOVL AX, 0 // abort 52 ok: 53 // set the per-goroutine and per-mach "registers" 54 get_tls(BX) 55 LEAL runtime·g0(SB), CX 56 MOVL CX, g(BX) 57 LEAL runtime·m0(SB), AX 58 59 // save m->g0 = g0 60 MOVL CX, m_g0(AX) 61 // save m0 to g0->m 62 MOVL AX, g_m(CX) 63 64 CLD // convention is D is always left cleared 65 CALL runtime·check(SB) 66 67 MOVL 16(SP), AX // copy argc 68 MOVL AX, 0(SP) 69 MOVL 24(SP), AX // copy argv 70 MOVL AX, 4(SP) 71 CALL runtime·args(SB) 72 CALL runtime·osinit(SB) 73 CALL runtime·schedinit(SB) 74 75 // create a new goroutine to start program 76 MOVL $runtime·mainPC(SB), AX // entry 77 MOVL $0, 0(SP) 78 MOVL AX, 4(SP) 79 CALL runtime·newproc(SB) 80 81 // start this M 82 CALL runtime·mstart(SB) 83 84 MOVL $0xf1, 0xf1 // crash 85 RET 86 87 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 88 GLOBL runtime·mainPC(SB),RODATA,$4 89 90 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 91 INT $3 92 RET 93 94 TEXT runtime·asminit(SB),NOSPLIT,$0-0 95 // No per-thread init. 96 RET 97 98 /* 99 * go-routine 100 */ 101 102 // void gosave(Gobuf*) 103 // save state in Gobuf; setjmp 104 TEXT runtime·gosave(SB), NOSPLIT, $0-4 105 MOVL buf+0(FP), AX // gobuf 106 LEAL buf+0(FP), BX // caller's SP 107 MOVL BX, gobuf_sp(AX) 108 MOVL 0(SP), BX // caller's PC 109 MOVL BX, gobuf_pc(AX) 110 MOVL $0, gobuf_ctxt(AX) 111 MOVQ $0, gobuf_ret(AX) 112 get_tls(CX) 113 MOVL g(CX), BX 114 MOVL BX, gobuf_g(AX) 115 RET 116 117 // void gogo(Gobuf*) 118 // restore state from Gobuf; longjmp 119 TEXT runtime·gogo(SB), NOSPLIT, $0-4 120 MOVL buf+0(FP), BX // gobuf 121 MOVL gobuf_g(BX), DX 122 MOVL 0(DX), CX // make sure g != nil 123 get_tls(CX) 124 MOVL DX, g(CX) 125 MOVL gobuf_sp(BX), SP // restore SP 126 MOVL gobuf_ctxt(BX), DX 127 MOVQ gobuf_ret(BX), AX 128 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 129 MOVQ $0, gobuf_ret(BX) 130 MOVL $0, gobuf_ctxt(BX) 131 MOVL gobuf_pc(BX), BX 132 JMP BX 133 134 // func mcall(fn func(*g)) 135 // Switch to m->g0's stack, call fn(g). 136 // Fn must never return. It should gogo(&g->sched) 137 // to keep running g. 138 TEXT runtime·mcall(SB), NOSPLIT, $0-4 139 MOVL fn+0(FP), DI 140 141 get_tls(CX) 142 MOVL g(CX), AX // save state in g->sched 143 MOVL 0(SP), BX // caller's PC 144 MOVL BX, (g_sched+gobuf_pc)(AX) 145 LEAL fn+0(FP), BX // caller's SP 146 MOVL BX, (g_sched+gobuf_sp)(AX) 147 MOVL AX, (g_sched+gobuf_g)(AX) 148 149 // switch to m->g0 & its stack, call fn 150 MOVL g(CX), BX 151 MOVL g_m(BX), BX 152 MOVL m_g0(BX), SI 153 CMPL SI, AX // if g == m->g0 call badmcall 154 JNE 3(PC) 155 MOVL $runtime·badmcall(SB), AX 156 JMP AX 157 MOVL SI, g(CX) // g = m->g0 158 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 159 PUSHQ AX 160 MOVL DI, DX 161 MOVL 0(DI), DI 162 CALL DI 163 POPQ AX 164 MOVL $runtime·badmcall2(SB), AX 165 JMP AX 166 RET 167 168 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 169 // of the G stack. We need to distinguish the routine that 170 // lives at the bottom of the G stack from the one that lives 171 // at the top of the system stack because the one at the top of 172 // the system stack terminates the stack walk (see topofstack()). 173 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 174 RET 175 176 // func systemstack(fn func()) 177 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 178 MOVL fn+0(FP), DI // DI = fn 179 get_tls(CX) 180 MOVL g(CX), AX // AX = g 181 MOVL g_m(AX), BX // BX = m 182 183 MOVL m_gsignal(BX), DX // DX = gsignal 184 CMPL AX, DX 185 JEQ noswitch 186 187 MOVL m_g0(BX), DX // DX = g0 188 CMPL AX, DX 189 JEQ noswitch 190 191 MOVL m_curg(BX), R8 192 CMPL AX, R8 193 JEQ switch 194 195 // Not g0, not curg. Must be gsignal, but that's not allowed. 196 // Hide call from linker nosplit analysis. 197 MOVL $runtime·badsystemstack(SB), AX 198 CALL AX 199 200 switch: 201 // save our state in g->sched. Pretend to 202 // be systemstack_switch if the G stack is scanned. 203 MOVL $runtime·systemstack_switch(SB), SI 204 MOVL SI, (g_sched+gobuf_pc)(AX) 205 MOVL SP, (g_sched+gobuf_sp)(AX) 206 MOVL AX, (g_sched+gobuf_g)(AX) 207 208 // switch to g0 209 MOVL DX, g(CX) 210 MOVL (g_sched+gobuf_sp)(DX), SP 211 212 // call target function 213 MOVL DI, DX 214 MOVL 0(DI), DI 215 CALL DI 216 217 // switch back to g 218 get_tls(CX) 219 MOVL g(CX), AX 220 MOVL g_m(AX), BX 221 MOVL m_curg(BX), AX 222 MOVL AX, g(CX) 223 MOVL (g_sched+gobuf_sp)(AX), SP 224 MOVL $0, (g_sched+gobuf_sp)(AX) 225 RET 226 227 noswitch: 228 // already on m stack, just call directly 229 MOVL DI, DX 230 MOVL 0(DI), DI 231 CALL DI 232 RET 233 234 /* 235 * support for morestack 236 */ 237 238 // Called during function prolog when more stack is needed. 239 // 240 // The traceback routines see morestack on a g0 as being 241 // the top of a stack (for example, morestack calling newstack 242 // calling the scheduler calling newm calling gc), so we must 243 // record an argument size. For that purpose, it has no arguments. 244 TEXT runtime·morestack(SB),NOSPLIT,$0-0 245 get_tls(CX) 246 MOVL g(CX), BX 247 MOVL g_m(BX), BX 248 249 // Cannot grow scheduler stack (m->g0). 250 MOVL m_g0(BX), SI 251 CMPL g(CX), SI 252 JNE 2(PC) 253 MOVL 0, AX 254 255 // Cannot grow signal stack (m->gsignal). 256 MOVL m_gsignal(BX), SI 257 CMPL g(CX), SI 258 JNE 2(PC) 259 MOVL 0, AX 260 261 // Called from f. 262 // Set m->morebuf to f's caller. 263 MOVL 8(SP), AX // f's caller's PC 264 MOVL AX, (m_morebuf+gobuf_pc)(BX) 265 LEAL 16(SP), AX // f's caller's SP 266 MOVL AX, (m_morebuf+gobuf_sp)(BX) 267 get_tls(CX) 268 MOVL g(CX), SI 269 MOVL SI, (m_morebuf+gobuf_g)(BX) 270 271 // Set g->sched to context in f. 272 MOVL 0(SP), AX // f's PC 273 MOVL AX, (g_sched+gobuf_pc)(SI) 274 MOVL SI, (g_sched+gobuf_g)(SI) 275 LEAL 8(SP), AX // f's SP 276 MOVL AX, (g_sched+gobuf_sp)(SI) 277 MOVL DX, (g_sched+gobuf_ctxt)(SI) 278 279 // Call newstack on m->g0's stack. 280 MOVL m_g0(BX), BX 281 MOVL BX, g(CX) 282 MOVL (g_sched+gobuf_sp)(BX), SP 283 CALL runtime·newstack(SB) 284 MOVL $0, 0x1003 // crash if newstack returns 285 RET 286 287 // morestack trampolines 288 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 289 MOVL $0, DX 290 JMP runtime·morestack(SB) 291 292 TEXT runtime·stackBarrier(SB),NOSPLIT,$0 293 // We came here via a RET to an overwritten return PC. 294 // AX may be live. Other registers are available. 295 296 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 297 get_tls(CX) 298 MOVL g(CX), CX 299 MOVL (g_stkbar+slice_array)(CX), DX 300 MOVL g_stkbarPos(CX), BX 301 IMULL $stkbar__size, BX // Too big for SIB. 302 ADDL DX, BX 303 MOVL stkbar_savedLRVal(BX), BX 304 // Record that this stack barrier was hit. 305 ADDL $1, g_stkbarPos(CX) 306 // Jump to the original return PC. 307 JMP BX 308 309 // reflectcall: call a function with the given argument list 310 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 311 // we don't have variable-sized frames, so we use a small number 312 // of constant-sized-frame functions to encode a few bits of size in the pc. 313 // Caution: ugly multiline assembly macros in your future! 314 315 #define DISPATCH(NAME,MAXSIZE) \ 316 CMPL CX, $MAXSIZE; \ 317 JA 3(PC); \ 318 MOVL $NAME(SB), AX; \ 319 JMP AX 320 // Note: can't just "JMP NAME(SB)" - bad inlining results. 321 322 TEXT reflect·call(SB), NOSPLIT, $0-0 323 JMP ·reflectcall(SB) 324 325 TEXT ·reflectcall(SB), NOSPLIT, $0-20 326 MOVLQZX argsize+12(FP), CX 327 DISPATCH(runtime·call16, 16) 328 DISPATCH(runtime·call32, 32) 329 DISPATCH(runtime·call64, 64) 330 DISPATCH(runtime·call128, 128) 331 DISPATCH(runtime·call256, 256) 332 DISPATCH(runtime·call512, 512) 333 DISPATCH(runtime·call1024, 1024) 334 DISPATCH(runtime·call2048, 2048) 335 DISPATCH(runtime·call4096, 4096) 336 DISPATCH(runtime·call8192, 8192) 337 DISPATCH(runtime·call16384, 16384) 338 DISPATCH(runtime·call32768, 32768) 339 DISPATCH(runtime·call65536, 65536) 340 DISPATCH(runtime·call131072, 131072) 341 DISPATCH(runtime·call262144, 262144) 342 DISPATCH(runtime·call524288, 524288) 343 DISPATCH(runtime·call1048576, 1048576) 344 DISPATCH(runtime·call2097152, 2097152) 345 DISPATCH(runtime·call4194304, 4194304) 346 DISPATCH(runtime·call8388608, 8388608) 347 DISPATCH(runtime·call16777216, 16777216) 348 DISPATCH(runtime·call33554432, 33554432) 349 DISPATCH(runtime·call67108864, 67108864) 350 DISPATCH(runtime·call134217728, 134217728) 351 DISPATCH(runtime·call268435456, 268435456) 352 DISPATCH(runtime·call536870912, 536870912) 353 DISPATCH(runtime·call1073741824, 1073741824) 354 MOVL $runtime·badreflectcall(SB), AX 355 JMP AX 356 357 #define CALLFN(NAME,MAXSIZE) \ 358 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 359 NO_LOCAL_POINTERS; \ 360 /* copy arguments to stack */ \ 361 MOVL argptr+8(FP), SI; \ 362 MOVL argsize+12(FP), CX; \ 363 MOVL SP, DI; \ 364 REP;MOVSB; \ 365 /* call function */ \ 366 MOVL f+4(FP), DX; \ 367 MOVL (DX), AX; \ 368 CALL AX; \ 369 /* copy return values back */ \ 370 MOVL argptr+8(FP), DI; \ 371 MOVL argsize+12(FP), CX; \ 372 MOVL retoffset+16(FP), BX; \ 373 MOVL SP, SI; \ 374 ADDL BX, DI; \ 375 ADDL BX, SI; \ 376 SUBL BX, CX; \ 377 REP;MOVSB; \ 378 /* execute write barrier updates */ \ 379 MOVL argtype+0(FP), DX; \ 380 MOVL argptr+8(FP), DI; \ 381 MOVL argsize+12(FP), CX; \ 382 MOVL retoffset+16(FP), BX; \ 383 MOVL DX, 0(SP); \ 384 MOVL DI, 4(SP); \ 385 MOVL CX, 8(SP); \ 386 MOVL BX, 12(SP); \ 387 CALL runtime·callwritebarrier(SB); \ 388 RET 389 390 CALLFN(·call16, 16) 391 CALLFN(·call32, 32) 392 CALLFN(·call64, 64) 393 CALLFN(·call128, 128) 394 CALLFN(·call256, 256) 395 CALLFN(·call512, 512) 396 CALLFN(·call1024, 1024) 397 CALLFN(·call2048, 2048) 398 CALLFN(·call4096, 4096) 399 CALLFN(·call8192, 8192) 400 CALLFN(·call16384, 16384) 401 CALLFN(·call32768, 32768) 402 CALLFN(·call65536, 65536) 403 CALLFN(·call131072, 131072) 404 CALLFN(·call262144, 262144) 405 CALLFN(·call524288, 524288) 406 CALLFN(·call1048576, 1048576) 407 CALLFN(·call2097152, 2097152) 408 CALLFN(·call4194304, 4194304) 409 CALLFN(·call8388608, 8388608) 410 CALLFN(·call16777216, 16777216) 411 CALLFN(·call33554432, 33554432) 412 CALLFN(·call67108864, 67108864) 413 CALLFN(·call134217728, 134217728) 414 CALLFN(·call268435456, 268435456) 415 CALLFN(·call536870912, 536870912) 416 CALLFN(·call1073741824, 1073741824) 417 418 // bool cas(int32 *val, int32 old, int32 new) 419 // Atomically: 420 // if(*val == old){ 421 // *val = new; 422 // return 1; 423 // } else 424 // return 0; 425 TEXT runtime·cas(SB), NOSPLIT, $0-17 426 MOVL ptr+0(FP), BX 427 MOVL old+4(FP), AX 428 MOVL new+8(FP), CX 429 LOCK 430 CMPXCHGL CX, 0(BX) 431 SETEQ ret+16(FP) 432 RET 433 434 TEXT runtime·casuintptr(SB), NOSPLIT, $0-17 435 JMP runtime·cas(SB) 436 437 TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-12 438 JMP runtime·atomicload(SB) 439 440 TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-12 441 JMP runtime·atomicload(SB) 442 443 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-12 444 JMP runtime·atomicstore(SB) 445 446 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new) 447 // Atomically: 448 // if(*val == *old){ 449 // *val = new; 450 // return 1; 451 // } else { 452 // return 0; 453 // } 454 TEXT runtime·cas64(SB), NOSPLIT, $0-25 455 MOVL ptr+0(FP), BX 456 MOVQ old+8(FP), AX 457 MOVQ new+16(FP), CX 458 LOCK 459 CMPXCHGQ CX, 0(BX) 460 SETEQ ret+24(FP) 461 RET 462 463 // bool casp(void **val, void *old, void *new) 464 // Atomically: 465 // if(*val == old){ 466 // *val = new; 467 // return 1; 468 // } else 469 // return 0; 470 TEXT runtime·casp1(SB), NOSPLIT, $0-17 471 MOVL ptr+0(FP), BX 472 MOVL old+4(FP), AX 473 MOVL new+8(FP), CX 474 LOCK 475 CMPXCHGL CX, 0(BX) 476 SETEQ ret+16(FP) 477 RET 478 479 // uint32 xadd(uint32 volatile *val, int32 delta) 480 // Atomically: 481 // *val += delta; 482 // return *val; 483 TEXT runtime·xadd(SB), NOSPLIT, $0-12 484 MOVL ptr+0(FP), BX 485 MOVL delta+4(FP), AX 486 MOVL AX, CX 487 LOCK 488 XADDL AX, 0(BX) 489 ADDL CX, AX 490 MOVL AX, ret+8(FP) 491 RET 492 493 TEXT runtime·xadd64(SB), NOSPLIT, $0-24 494 MOVL ptr+0(FP), BX 495 MOVQ delta+8(FP), AX 496 MOVQ AX, CX 497 LOCK 498 XADDQ AX, 0(BX) 499 ADDQ CX, AX 500 MOVQ AX, ret+16(FP) 501 RET 502 503 TEXT runtime·xadduintptr(SB), NOSPLIT, $0-12 504 JMP runtime·xadd(SB) 505 506 TEXT runtime·xchg(SB), NOSPLIT, $0-12 507 MOVL ptr+0(FP), BX 508 MOVL new+4(FP), AX 509 XCHGL AX, 0(BX) 510 MOVL AX, ret+8(FP) 511 RET 512 513 TEXT runtime·xchg64(SB), NOSPLIT, $0-24 514 MOVL ptr+0(FP), BX 515 MOVQ new+8(FP), AX 516 XCHGQ AX, 0(BX) 517 MOVQ AX, ret+16(FP) 518 RET 519 520 TEXT runtime·xchgp1(SB), NOSPLIT, $0-12 521 MOVL ptr+0(FP), BX 522 MOVL new+4(FP), AX 523 XCHGL AX, 0(BX) 524 MOVL AX, ret+8(FP) 525 RET 526 527 TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12 528 JMP runtime·xchg(SB) 529 530 TEXT runtime·procyield(SB),NOSPLIT,$0-0 531 MOVL cycles+0(FP), AX 532 again: 533 PAUSE 534 SUBL $1, AX 535 JNZ again 536 RET 537 538 TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8 539 MOVL ptr+0(FP), BX 540 MOVL val+4(FP), AX 541 XCHGL AX, 0(BX) 542 RET 543 544 TEXT runtime·atomicstore(SB), NOSPLIT, $0-8 545 MOVL ptr+0(FP), BX 546 MOVL val+4(FP), AX 547 XCHGL AX, 0(BX) 548 RET 549 550 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16 551 MOVL ptr+0(FP), BX 552 MOVQ val+8(FP), AX 553 XCHGQ AX, 0(BX) 554 RET 555 556 // void runtime·atomicor8(byte volatile*, byte); 557 TEXT runtime·atomicor8(SB), NOSPLIT, $0-5 558 MOVL ptr+0(FP), BX 559 MOVB val+4(FP), AX 560 LOCK 561 ORB AX, 0(BX) 562 RET 563 564 // void runtime·atomicand8(byte volatile*, byte); 565 TEXT runtime·atomicand8(SB), NOSPLIT, $0-5 566 MOVL ptr+0(FP), BX 567 MOVB val+4(FP), AX 568 LOCK 569 ANDB AX, 0(BX) 570 RET 571 572 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 573 // Stores are already ordered on x86, so this is just a 574 // compile barrier. 575 RET 576 577 // void jmpdefer(fn, sp); 578 // called from deferreturn. 579 // 1. pop the caller 580 // 2. sub 5 bytes from the callers return 581 // 3. jmp to the argument 582 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 583 MOVL fv+0(FP), DX 584 MOVL argp+4(FP), BX 585 LEAL -8(BX), SP // caller sp after CALL 586 SUBL $5, (SP) // return to CALL again 587 MOVL 0(DX), BX 588 JMP BX // but first run the deferred function 589 590 // func asmcgocall(fn, arg unsafe.Pointer) int32 591 // Not implemented. 592 TEXT runtime·asmcgocall(SB),NOSPLIT,$0-12 593 MOVL 0, AX 594 RET 595 596 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 597 // Not implemented. 598 TEXT runtime·cgocallback(SB),NOSPLIT,$0-12 599 MOVL 0, AX 600 RET 601 602 // void setg(G*); set g. for use by needm. 603 // Not implemented. 604 TEXT runtime·setg(SB), NOSPLIT, $0-4 605 MOVL 0, AX 606 RET 607 608 // check that SP is in range [g->stack.lo, g->stack.hi) 609 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 610 get_tls(CX) 611 MOVL g(CX), AX 612 CMPL (g_stack+stack_hi)(AX), SP 613 JHI 2(PC) 614 MOVL 0, AX 615 CMPL SP, (g_stack+stack_lo)(AX) 616 JHI 2(PC) 617 MOVL 0, AX 618 RET 619 620 TEXT runtime·memclr(SB),NOSPLIT,$0-8 621 MOVL ptr+0(FP), DI 622 MOVL n+4(FP), CX 623 MOVQ CX, BX 624 ANDQ $7, BX 625 SHRQ $3, CX 626 MOVQ $0, AX 627 CLD 628 REP 629 STOSQ 630 MOVQ BX, CX 631 REP 632 STOSB 633 RET 634 635 TEXT runtime·getcallerpc(SB),NOSPLIT,$8-12 636 MOVL argp+0(FP),AX // addr of first arg 637 MOVL -8(AX),AX // get calling pc 638 CMPL AX, runtime·stackBarrierPC(SB) 639 JNE nobar 640 // Get original return PC. 641 CALL runtime·nextBarrierPC(SB) 642 MOVL 0(SP), AX 643 nobar: 644 MOVL AX, ret+8(FP) 645 RET 646 647 TEXT runtime·setcallerpc(SB),NOSPLIT,$8-8 648 MOVL argp+0(FP),AX // addr of first arg 649 MOVL pc+4(FP), BX // pc to set 650 MOVL -8(AX), CX 651 CMPL CX, runtime·stackBarrierPC(SB) 652 JEQ setbar 653 MOVQ BX, -8(AX) // set calling pc 654 RET 655 setbar: 656 // Set the stack barrier return PC. 657 MOVL BX, 0(SP) 658 CALL runtime·setNextBarrierPC(SB) 659 RET 660 661 TEXT runtime·getcallersp(SB),NOSPLIT,$0-12 662 MOVL argp+0(FP), AX 663 MOVL AX, ret+8(FP) 664 RET 665 666 // int64 runtime·cputicks(void) 667 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 668 RDTSC 669 SHLQ $32, DX 670 ADDQ DX, AX 671 MOVQ AX, ret+0(FP) 672 RET 673 674 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 675 // redirects to memhash(p, h, size) using the size 676 // stored in the closure. 677 TEXT runtime·memhash_varlen(SB),NOSPLIT,$24-12 678 GO_ARGS 679 NO_LOCAL_POINTERS 680 MOVL p+0(FP), AX 681 MOVL h+4(FP), BX 682 MOVL 4(DX), CX 683 MOVL AX, 0(SP) 684 MOVL BX, 4(SP) 685 MOVL CX, 8(SP) 686 CALL runtime·memhash(SB) 687 MOVL 16(SP), AX 688 MOVL AX, ret+8(FP) 689 RET 690 691 // hash function using AES hardware instructions 692 // For now, our one amd64p32 system (NaCl) does not 693 // support using AES instructions, so have not bothered to 694 // write the implementations. Can copy and adjust the ones 695 // in asm_amd64.s when the time comes. 696 697 TEXT runtime·aeshash(SB),NOSPLIT,$0-20 698 MOVL AX, ret+16(FP) 699 RET 700 701 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-20 702 MOVL AX, ret+16(FP) 703 RET 704 705 TEXT runtime·aeshash32(SB),NOSPLIT,$0-20 706 MOVL AX, ret+16(FP) 707 RET 708 709 TEXT runtime·aeshash64(SB),NOSPLIT,$0-20 710 MOVL AX, ret+16(FP) 711 RET 712 713 TEXT runtime·memeq(SB),NOSPLIT,$0-17 714 MOVL a+0(FP), SI 715 MOVL b+4(FP), DI 716 MOVL size+8(FP), BX 717 CALL runtime·memeqbody(SB) 718 MOVB AX, ret+16(FP) 719 RET 720 721 // memequal_varlen(a, b unsafe.Pointer) bool 722 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 723 MOVL a+0(FP), SI 724 MOVL b+4(FP), DI 725 CMPL SI, DI 726 JEQ eq 727 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 728 CALL runtime·memeqbody(SB) 729 MOVB AX, ret+8(FP) 730 RET 731 eq: 732 MOVB $1, ret+8(FP) 733 RET 734 735 // eqstring tests whether two strings are equal. 736 // The compiler guarantees that strings passed 737 // to eqstring have equal length. 738 // See runtime_test.go:eqstring_generic for 739 // equivalent Go code. 740 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 741 MOVL s1str+0(FP), SI 742 MOVL s2str+8(FP), DI 743 CMPL SI, DI 744 JEQ same 745 MOVL s1len+4(FP), BX 746 CALL runtime·memeqbody(SB) 747 MOVB AX, v+16(FP) 748 RET 749 same: 750 MOVB $1, v+16(FP) 751 RET 752 753 // a in SI 754 // b in DI 755 // count in BX 756 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 757 XORQ AX, AX 758 759 CMPQ BX, $8 760 JB small 761 762 // 64 bytes at a time using xmm registers 763 hugeloop: 764 CMPQ BX, $64 765 JB bigloop 766 MOVOU (SI), X0 767 MOVOU (DI), X1 768 MOVOU 16(SI), X2 769 MOVOU 16(DI), X3 770 MOVOU 32(SI), X4 771 MOVOU 32(DI), X5 772 MOVOU 48(SI), X6 773 MOVOU 48(DI), X7 774 PCMPEQB X1, X0 775 PCMPEQB X3, X2 776 PCMPEQB X5, X4 777 PCMPEQB X7, X6 778 PAND X2, X0 779 PAND X6, X4 780 PAND X4, X0 781 PMOVMSKB X0, DX 782 ADDQ $64, SI 783 ADDQ $64, DI 784 SUBQ $64, BX 785 CMPL DX, $0xffff 786 JEQ hugeloop 787 RET 788 789 // 8 bytes at a time using 64-bit register 790 bigloop: 791 CMPQ BX, $8 792 JBE leftover 793 MOVQ (SI), CX 794 MOVQ (DI), DX 795 ADDQ $8, SI 796 ADDQ $8, DI 797 SUBQ $8, BX 798 CMPQ CX, DX 799 JEQ bigloop 800 RET 801 802 // remaining 0-8 bytes 803 leftover: 804 ADDQ BX, SI 805 ADDQ BX, DI 806 MOVQ -8(SI), CX 807 MOVQ -8(DI), DX 808 CMPQ CX, DX 809 SETEQ AX 810 RET 811 812 small: 813 CMPQ BX, $0 814 JEQ equal 815 816 LEAQ 0(BX*8), CX 817 NEGQ CX 818 819 CMPB SI, $0xf8 820 JA si_high 821 822 // load at SI won't cross a page boundary. 823 MOVQ (SI), SI 824 JMP si_finish 825 si_high: 826 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 827 MOVQ BX, DX 828 ADDQ SI, DX 829 MOVQ -8(DX), SI 830 SHRQ CX, SI 831 si_finish: 832 833 // same for DI. 834 CMPB DI, $0xf8 835 JA di_high 836 MOVQ (DI), DI 837 JMP di_finish 838 di_high: 839 MOVQ BX, DX 840 ADDQ DI, DX 841 MOVQ -8(DX), DI 842 SHRQ CX, DI 843 di_finish: 844 845 SUBQ SI, DI 846 SHLQ CX, DI 847 equal: 848 SETEQ AX 849 RET 850 851 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 852 MOVL s1_base+0(FP), SI 853 MOVL s1_len+4(FP), BX 854 MOVL s2_base+8(FP), DI 855 MOVL s2_len+12(FP), DX 856 CALL runtime·cmpbody(SB) 857 MOVL AX, ret+16(FP) 858 RET 859 860 TEXT bytes·Compare(SB),NOSPLIT,$0-28 861 MOVL s1+0(FP), SI 862 MOVL s1+4(FP), BX 863 MOVL s2+12(FP), DI 864 MOVL s2+16(FP), DX 865 CALL runtime·cmpbody(SB) 866 MOVL AX, res+24(FP) 867 RET 868 869 // input: 870 // SI = a 871 // DI = b 872 // BX = alen 873 // DX = blen 874 // output: 875 // AX = 1/0/-1 876 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 877 CMPQ SI, DI 878 JEQ allsame 879 CMPQ BX, DX 880 MOVQ DX, R8 881 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare 882 CMPQ R8, $8 883 JB small 884 885 loop: 886 CMPQ R8, $16 887 JBE _0through16 888 MOVOU (SI), X0 889 MOVOU (DI), X1 890 PCMPEQB X0, X1 891 PMOVMSKB X1, AX 892 XORQ $0xffff, AX // convert EQ to NE 893 JNE diff16 // branch if at least one byte is not equal 894 ADDQ $16, SI 895 ADDQ $16, DI 896 SUBQ $16, R8 897 JMP loop 898 899 // AX = bit mask of differences 900 diff16: 901 BSFQ AX, BX // index of first byte that differs 902 XORQ AX, AX 903 ADDQ BX, SI 904 MOVB (SI), CX 905 ADDQ BX, DI 906 CMPB CX, (DI) 907 SETHI AX 908 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 909 RET 910 911 // 0 through 16 bytes left, alen>=8, blen>=8 912 _0through16: 913 CMPQ R8, $8 914 JBE _0through8 915 MOVQ (SI), AX 916 MOVQ (DI), CX 917 CMPQ AX, CX 918 JNE diff8 919 _0through8: 920 ADDQ R8, SI 921 ADDQ R8, DI 922 MOVQ -8(SI), AX 923 MOVQ -8(DI), CX 924 CMPQ AX, CX 925 JEQ allsame 926 927 // AX and CX contain parts of a and b that differ. 928 diff8: 929 BSWAPQ AX // reverse order of bytes 930 BSWAPQ CX 931 XORQ AX, CX 932 BSRQ CX, CX // index of highest bit difference 933 SHRQ CX, AX // move a's bit to bottom 934 ANDQ $1, AX // mask bit 935 LEAQ -1(AX*2), AX // 1/0 => +1/-1 936 RET 937 938 // 0-7 bytes in common 939 small: 940 LEAQ (R8*8), CX // bytes left -> bits left 941 NEGQ CX // - bits lift (== 64 - bits left mod 64) 942 JEQ allsame 943 944 // load bytes of a into high bytes of AX 945 CMPB SI, $0xf8 946 JA si_high 947 MOVQ (SI), SI 948 JMP si_finish 949 si_high: 950 ADDQ R8, SI 951 MOVQ -8(SI), SI 952 SHRQ CX, SI 953 si_finish: 954 SHLQ CX, SI 955 956 // load bytes of b in to high bytes of BX 957 CMPB DI, $0xf8 958 JA di_high 959 MOVQ (DI), DI 960 JMP di_finish 961 di_high: 962 ADDQ R8, DI 963 MOVQ -8(DI), DI 964 SHRQ CX, DI 965 di_finish: 966 SHLQ CX, DI 967 968 BSWAPQ SI // reverse order of bytes 969 BSWAPQ DI 970 XORQ SI, DI // find bit differences 971 JEQ allsame 972 BSRQ DI, CX // index of highest bit difference 973 SHRQ CX, SI // move a's bit to bottom 974 ANDQ $1, SI // mask bit 975 LEAQ -1(SI*2), AX // 1/0 => +1/-1 976 RET 977 978 allsame: 979 XORQ AX, AX 980 XORQ CX, CX 981 CMPQ BX, DX 982 SETGT AX // 1 if alen > blen 983 SETEQ CX // 1 if alen == blen 984 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 985 RET 986 987 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 988 MOVL s+0(FP), SI 989 MOVL s_len+4(FP), BX 990 MOVB c+12(FP), AL 991 CALL runtime·indexbytebody(SB) 992 MOVL AX, ret+16(FP) 993 RET 994 995 TEXT strings·IndexByte(SB),NOSPLIT,$0-20 996 MOVL s+0(FP), SI 997 MOVL s_len+4(FP), BX 998 MOVB c+8(FP), AL 999 CALL runtime·indexbytebody(SB) 1000 MOVL AX, ret+16(FP) 1001 RET 1002 1003 // input: 1004 // SI: data 1005 // BX: data len 1006 // AL: byte sought 1007 // output: 1008 // AX 1009 TEXT runtime·indexbytebody(SB),NOSPLIT,$0 1010 MOVL SI, DI 1011 1012 CMPL BX, $16 1013 JLT small 1014 1015 // round up to first 16-byte boundary 1016 TESTL $15, SI 1017 JZ aligned 1018 MOVL SI, CX 1019 ANDL $~15, CX 1020 ADDL $16, CX 1021 1022 // search the beginning 1023 SUBL SI, CX 1024 REPN; SCASB 1025 JZ success 1026 1027 // DI is 16-byte aligned; get ready to search using SSE instructions 1028 aligned: 1029 // round down to last 16-byte boundary 1030 MOVL BX, R11 1031 ADDL SI, R11 1032 ANDL $~15, R11 1033 1034 // shuffle X0 around so that each byte contains c 1035 MOVD AX, X0 1036 PUNPCKLBW X0, X0 1037 PUNPCKLBW X0, X0 1038 PSHUFL $0, X0, X0 1039 JMP condition 1040 1041 sse: 1042 // move the next 16-byte chunk of the buffer into X1 1043 MOVO (DI), X1 1044 // compare bytes in X0 to X1 1045 PCMPEQB X0, X1 1046 // take the top bit of each byte in X1 and put the result in DX 1047 PMOVMSKB X1, DX 1048 TESTL DX, DX 1049 JNZ ssesuccess 1050 ADDL $16, DI 1051 1052 condition: 1053 CMPL DI, R11 1054 JLT sse 1055 1056 // search the end 1057 MOVL SI, CX 1058 ADDL BX, CX 1059 SUBL R11, CX 1060 // if CX == 0, the zero flag will be set and we'll end up 1061 // returning a false success 1062 JZ failure 1063 REPN; SCASB 1064 JZ success 1065 1066 failure: 1067 MOVL $-1, AX 1068 RET 1069 1070 // handle for lengths < 16 1071 small: 1072 MOVL BX, CX 1073 REPN; SCASB 1074 JZ success 1075 MOVL $-1, AX 1076 RET 1077 1078 // we've found the chunk containing the byte 1079 // now just figure out which specific byte it is 1080 ssesuccess: 1081 // get the index of the least significant set bit 1082 BSFW DX, DX 1083 SUBL SI, DI 1084 ADDL DI, DX 1085 MOVL DX, AX 1086 RET 1087 1088 success: 1089 SUBL SI, DI 1090 SUBL $1, DI 1091 MOVL DI, AX 1092 RET 1093 1094 TEXT bytes·Equal(SB),NOSPLIT,$0-25 1095 MOVL a_len+4(FP), BX 1096 MOVL b_len+16(FP), CX 1097 XORL AX, AX 1098 CMPL BX, CX 1099 JNE eqret 1100 MOVL a+0(FP), SI 1101 MOVL b+12(FP), DI 1102 CALL runtime·memeqbody(SB) 1103 eqret: 1104 MOVB AX, ret+24(FP) 1105 RET 1106 1107 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 1108 get_tls(CX) 1109 MOVL g(CX), AX 1110 MOVL g_m(AX), AX 1111 MOVL m_fastrand(AX), DX 1112 ADDL DX, DX 1113 MOVL DX, BX 1114 XORL $0x88888eef, DX 1115 CMOVLMI BX, DX 1116 MOVL DX, m_fastrand(AX) 1117 MOVL DX, ret+0(FP) 1118 RET 1119 1120 TEXT runtime·return0(SB), NOSPLIT, $0 1121 MOVL $0, AX 1122 RET 1123 1124 // The top-most function running on a goroutine 1125 // returns to goexit+PCQuantum. 1126 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1127 BYTE $0x90 // NOP 1128 CALL runtime·goexit1(SB) // does not return 1129 // traceback from goexit1 must hit code range of goexit 1130 BYTE $0x90 // NOP 1131 1132 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1133 MOVL addr+0(FP), AX 1134 PREFETCHT0 (AX) 1135 RET 1136 1137 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1138 MOVL addr+0(FP), AX 1139 PREFETCHT1 (AX) 1140 RET 1141 1142 1143 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1144 MOVL addr+0(FP), AX 1145 PREFETCHT2 (AX) 1146 RET 1147 1148 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1149 MOVL addr+0(FP), AX 1150 PREFETCHNTA (AX) 1151 RET