github.com/MangoDowner/go-gm@v0.0.0-20180818020936-8baa2bd4408c/src/runtime/asm_amd64p32.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 MOVL SP, CX 15 SUBL $128, CX // plenty of scratch 16 ANDL $~15, CX 17 MOVL CX, SP 18 19 MOVL AX, 16(SP) 20 MOVL BX, 24(SP) 21 22 // create istack out of the given (operating system) stack. 23 MOVL $runtime·g0(SB), DI 24 LEAL (-64*1024+104)(SP), BX 25 MOVL BX, g_stackguard0(DI) 26 MOVL BX, g_stackguard1(DI) 27 MOVL BX, (g_stack+stack_lo)(DI) 28 MOVL SP, (g_stack+stack_hi)(DI) 29 30 // find out information about the processor we're on 31 MOVL $0, AX 32 CPUID 33 CMPL AX, $0 34 JE nocpuinfo 35 36 CMPL BX, $0x756E6547 // "Genu" 37 JNE notintel 38 CMPL DX, $0x49656E69 // "ineI" 39 JNE notintel 40 CMPL CX, $0x6C65746E // "ntel" 41 JNE notintel 42 MOVB $1, runtime·isIntel(SB) 43 notintel: 44 45 // Load EAX=1 cpuid flags 46 MOVL $1, AX 47 CPUID 48 MOVL AX, runtime·processorVersionInfo(SB) 49 50 TESTL $(1<<26), DX // SSE2 51 SETNE runtime·support_sse2(SB) 52 53 TESTL $(1<<9), CX // SSSE3 54 SETNE runtime·support_ssse3(SB) 55 56 TESTL $(1<<19), CX // SSE4.1 57 SETNE runtime·support_sse41(SB) 58 59 TESTL $(1<<20), CX // SSE4.2 60 SETNE runtime·support_sse42(SB) 61 62 TESTL $(1<<23), CX // POPCNT 63 SETNE runtime·support_popcnt(SB) 64 65 TESTL $(1<<25), CX // AES 66 SETNE runtime·support_aes(SB) 67 68 TESTL $(1<<27), CX // OSXSAVE 69 SETNE runtime·support_osxsave(SB) 70 71 // If OS support for XMM and YMM is not present 72 // support_avx will be set back to false later. 73 TESTL $(1<<28), CX // AVX 74 SETNE runtime·support_avx(SB) 75 76 eax7: 77 // Load EAX=7/ECX=0 cpuid flags 78 CMPL SI, $7 79 JLT osavx 80 MOVL $7, AX 81 MOVL $0, CX 82 CPUID 83 84 TESTL $(1<<3), BX // BMI1 85 SETNE runtime·support_bmi1(SB) 86 87 // If OS support for XMM and YMM is not present 88 // support_avx2 will be set back to false later. 89 TESTL $(1<<5), BX 90 SETNE runtime·support_avx2(SB) 91 92 TESTL $(1<<8), BX // BMI2 93 SETNE runtime·support_bmi2(SB) 94 95 TESTL $(1<<9), BX // ERMS 96 SETNE runtime·support_erms(SB) 97 98 osavx: 99 // nacl does not support XGETBV to test 100 // for XMM and YMM OS support. 101 #ifndef GOOS_nacl 102 CMPB runtime·support_osxsave(SB), $1 103 JNE noavx 104 MOVL $0, CX 105 // For XGETBV, OSXSAVE bit is required and sufficient 106 XGETBV 107 ANDL $6, AX 108 CMPL AX, $6 // Check for OS support of XMM and YMM registers. 109 JE nocpuinfo 110 #endif 111 noavx: 112 MOVB $0, runtime·support_avx(SB) 113 MOVB $0, runtime·support_avx2(SB) 114 115 nocpuinfo: 116 117 needtls: 118 LEAL runtime·m0+m_tls(SB), DI 119 CALL runtime·settls(SB) 120 121 // store through it, to make sure it works 122 get_tls(BX) 123 MOVQ $0x123, g(BX) 124 MOVQ runtime·m0+m_tls(SB), AX 125 CMPQ AX, $0x123 126 JEQ 2(PC) 127 MOVL AX, 0 // abort 128 ok: 129 // set the per-goroutine and per-mach "registers" 130 get_tls(BX) 131 LEAL runtime·g0(SB), CX 132 MOVL CX, g(BX) 133 LEAL runtime·m0(SB), AX 134 135 // save m->g0 = g0 136 MOVL CX, m_g0(AX) 137 // save m0 to g0->m 138 MOVL AX, g_m(CX) 139 140 CLD // convention is D is always left cleared 141 CALL runtime·check(SB) 142 143 MOVL 16(SP), AX // copy argc 144 MOVL AX, 0(SP) 145 MOVL 24(SP), AX // copy argv 146 MOVL AX, 4(SP) 147 CALL runtime·args(SB) 148 CALL runtime·osinit(SB) 149 CALL runtime·schedinit(SB) 150 151 // create a new goroutine to start program 152 MOVL $runtime·mainPC(SB), AX // entry 153 MOVL $0, 0(SP) 154 MOVL AX, 4(SP) 155 CALL runtime·newproc(SB) 156 157 // start this M 158 CALL runtime·mstart(SB) 159 160 MOVL $0xf1, 0xf1 // crash 161 RET 162 163 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 164 GLOBL runtime·mainPC(SB),RODATA,$4 165 166 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 167 INT $3 168 RET 169 170 TEXT runtime·asminit(SB),NOSPLIT,$0-0 171 // No per-thread init. 172 RET 173 174 /* 175 * go-routine 176 */ 177 178 // void gosave(Gobuf*) 179 // save state in Gobuf; setjmp 180 TEXT runtime·gosave(SB), NOSPLIT, $0-4 181 MOVL buf+0(FP), AX // gobuf 182 LEAL buf+0(FP), BX // caller's SP 183 MOVL BX, gobuf_sp(AX) 184 MOVL 0(SP), BX // caller's PC 185 MOVL BX, gobuf_pc(AX) 186 MOVQ $0, gobuf_ret(AX) 187 // Assert ctxt is zero. See func save. 188 MOVL gobuf_ctxt(AX), BX 189 TESTL BX, BX 190 JZ 2(PC) 191 CALL runtime·badctxt(SB) 192 get_tls(CX) 193 MOVL g(CX), BX 194 MOVL BX, gobuf_g(AX) 195 RET 196 197 // void gogo(Gobuf*) 198 // restore state from Gobuf; longjmp 199 TEXT runtime·gogo(SB), NOSPLIT, $8-4 200 MOVL buf+0(FP), BX // gobuf 201 202 // If ctxt is not nil, invoke deletion barrier before overwriting. 203 MOVL gobuf_ctxt(BX), DX 204 TESTL DX, DX 205 JZ nilctxt 206 LEAL gobuf_ctxt(BX), AX 207 MOVL AX, 0(SP) 208 MOVL $0, 4(SP) 209 CALL runtime·writebarrierptr_prewrite(SB) 210 MOVL buf+0(FP), BX 211 212 nilctxt: 213 MOVL gobuf_g(BX), DX 214 MOVL 0(DX), CX // make sure g != nil 215 get_tls(CX) 216 MOVL DX, g(CX) 217 MOVL gobuf_sp(BX), SP // restore SP 218 MOVL gobuf_ctxt(BX), DX 219 MOVQ gobuf_ret(BX), AX 220 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 221 MOVQ $0, gobuf_ret(BX) 222 MOVL $0, gobuf_ctxt(BX) 223 MOVL gobuf_pc(BX), BX 224 JMP BX 225 226 // func mcall(fn func(*g)) 227 // Switch to m->g0's stack, call fn(g). 228 // Fn must never return. It should gogo(&g->sched) 229 // to keep running g. 230 TEXT runtime·mcall(SB), NOSPLIT, $0-4 231 MOVL fn+0(FP), DI 232 233 get_tls(CX) 234 MOVL g(CX), AX // save state in g->sched 235 MOVL 0(SP), BX // caller's PC 236 MOVL BX, (g_sched+gobuf_pc)(AX) 237 LEAL fn+0(FP), BX // caller's SP 238 MOVL BX, (g_sched+gobuf_sp)(AX) 239 MOVL AX, (g_sched+gobuf_g)(AX) 240 241 // switch to m->g0 & its stack, call fn 242 MOVL g(CX), BX 243 MOVL g_m(BX), BX 244 MOVL m_g0(BX), SI 245 CMPL SI, AX // if g == m->g0 call badmcall 246 JNE 3(PC) 247 MOVL $runtime·badmcall(SB), AX 248 JMP AX 249 MOVL SI, g(CX) // g = m->g0 250 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 251 PUSHQ AX 252 MOVL DI, DX 253 MOVL 0(DI), DI 254 CALL DI 255 POPQ AX 256 MOVL $runtime·badmcall2(SB), AX 257 JMP AX 258 RET 259 260 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 261 // of the G stack. We need to distinguish the routine that 262 // lives at the bottom of the G stack from the one that lives 263 // at the top of the system stack because the one at the top of 264 // the system stack terminates the stack walk (see topofstack()). 265 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 266 RET 267 268 // func systemstack(fn func()) 269 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 270 MOVL fn+0(FP), DI // DI = fn 271 get_tls(CX) 272 MOVL g(CX), AX // AX = g 273 MOVL g_m(AX), BX // BX = m 274 275 MOVL m_gsignal(BX), DX // DX = gsignal 276 CMPL AX, DX 277 JEQ noswitch 278 279 MOVL m_g0(BX), DX // DX = g0 280 CMPL AX, DX 281 JEQ noswitch 282 283 MOVL m_curg(BX), R8 284 CMPL AX, R8 285 JEQ switch 286 287 // Not g0, not curg. Must be gsignal, but that's not allowed. 288 // Hide call from linker nosplit analysis. 289 MOVL $runtime·badsystemstack(SB), AX 290 CALL AX 291 292 switch: 293 // save our state in g->sched. Pretend to 294 // be systemstack_switch if the G stack is scanned. 295 MOVL $runtime·systemstack_switch(SB), SI 296 MOVL SI, (g_sched+gobuf_pc)(AX) 297 MOVL SP, (g_sched+gobuf_sp)(AX) 298 MOVL AX, (g_sched+gobuf_g)(AX) 299 300 // switch to g0 301 MOVL DX, g(CX) 302 MOVL (g_sched+gobuf_sp)(DX), SP 303 304 // call target function 305 MOVL DI, DX 306 MOVL 0(DI), DI 307 CALL DI 308 309 // switch back to g 310 get_tls(CX) 311 MOVL g(CX), AX 312 MOVL g_m(AX), BX 313 MOVL m_curg(BX), AX 314 MOVL AX, g(CX) 315 MOVL (g_sched+gobuf_sp)(AX), SP 316 MOVL $0, (g_sched+gobuf_sp)(AX) 317 RET 318 319 noswitch: 320 // already on m stack, just call directly 321 MOVL DI, DX 322 MOVL 0(DI), DI 323 CALL DI 324 RET 325 326 /* 327 * support for morestack 328 */ 329 330 // Called during function prolog when more stack is needed. 331 // 332 // The traceback routines see morestack on a g0 as being 333 // the top of a stack (for example, morestack calling newstack 334 // calling the scheduler calling newm calling gc), so we must 335 // record an argument size. For that purpose, it has no arguments. 336 TEXT runtime·morestack(SB),NOSPLIT,$0-0 337 get_tls(CX) 338 MOVL g(CX), BX 339 MOVL g_m(BX), BX 340 341 // Cannot grow scheduler stack (m->g0). 342 MOVL m_g0(BX), SI 343 CMPL g(CX), SI 344 JNE 3(PC) 345 CALL runtime·badmorestackg0(SB) 346 MOVL 0, AX 347 348 // Cannot grow signal stack (m->gsignal). 349 MOVL m_gsignal(BX), SI 350 CMPL g(CX), SI 351 JNE 3(PC) 352 CALL runtime·badmorestackgsignal(SB) 353 MOVL 0, AX 354 355 // Called from f. 356 // Set m->morebuf to f's caller. 357 MOVL 8(SP), AX // f's caller's PC 358 MOVL AX, (m_morebuf+gobuf_pc)(BX) 359 LEAL 16(SP), AX // f's caller's SP 360 MOVL AX, (m_morebuf+gobuf_sp)(BX) 361 get_tls(CX) 362 MOVL g(CX), SI 363 MOVL SI, (m_morebuf+gobuf_g)(BX) 364 365 // Set g->sched to context in f. 366 MOVL 0(SP), AX // f's PC 367 MOVL AX, (g_sched+gobuf_pc)(SI) 368 MOVL SI, (g_sched+gobuf_g)(SI) 369 LEAL 8(SP), AX // f's SP 370 MOVL AX, (g_sched+gobuf_sp)(SI) 371 // newstack will fill gobuf.ctxt. 372 373 // Call newstack on m->g0's stack. 374 MOVL m_g0(BX), BX 375 MOVL BX, g(CX) 376 MOVL (g_sched+gobuf_sp)(BX), SP 377 PUSHQ DX // ctxt argument 378 CALL runtime·newstack(SB) 379 MOVL $0, 0x1003 // crash if newstack returns 380 POPQ DX // keep balance check happy 381 RET 382 383 // morestack trampolines 384 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 385 MOVL $0, DX 386 JMP runtime·morestack(SB) 387 388 // reflectcall: call a function with the given argument list 389 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 390 // we don't have variable-sized frames, so we use a small number 391 // of constant-sized-frame functions to encode a few bits of size in the pc. 392 // Caution: ugly multiline assembly macros in your future! 393 394 #define DISPATCH(NAME,MAXSIZE) \ 395 CMPL CX, $MAXSIZE; \ 396 JA 3(PC); \ 397 MOVL $NAME(SB), AX; \ 398 JMP AX 399 // Note: can't just "JMP NAME(SB)" - bad inlining results. 400 401 TEXT reflect·call(SB), NOSPLIT, $0-0 402 JMP ·reflectcall(SB) 403 404 TEXT ·reflectcall(SB), NOSPLIT, $0-20 405 MOVLQZX argsize+12(FP), CX 406 DISPATCH(runtime·call16, 16) 407 DISPATCH(runtime·call32, 32) 408 DISPATCH(runtime·call64, 64) 409 DISPATCH(runtime·call128, 128) 410 DISPATCH(runtime·call256, 256) 411 DISPATCH(runtime·call512, 512) 412 DISPATCH(runtime·call1024, 1024) 413 DISPATCH(runtime·call2048, 2048) 414 DISPATCH(runtime·call4096, 4096) 415 DISPATCH(runtime·call8192, 8192) 416 DISPATCH(runtime·call16384, 16384) 417 DISPATCH(runtime·call32768, 32768) 418 DISPATCH(runtime·call65536, 65536) 419 DISPATCH(runtime·call131072, 131072) 420 DISPATCH(runtime·call262144, 262144) 421 DISPATCH(runtime·call524288, 524288) 422 DISPATCH(runtime·call1048576, 1048576) 423 DISPATCH(runtime·call2097152, 2097152) 424 DISPATCH(runtime·call4194304, 4194304) 425 DISPATCH(runtime·call8388608, 8388608) 426 DISPATCH(runtime·call16777216, 16777216) 427 DISPATCH(runtime·call33554432, 33554432) 428 DISPATCH(runtime·call67108864, 67108864) 429 DISPATCH(runtime·call134217728, 134217728) 430 DISPATCH(runtime·call268435456, 268435456) 431 DISPATCH(runtime·call536870912, 536870912) 432 DISPATCH(runtime·call1073741824, 1073741824) 433 MOVL $runtime·badreflectcall(SB), AX 434 JMP AX 435 436 #define CALLFN(NAME,MAXSIZE) \ 437 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 438 NO_LOCAL_POINTERS; \ 439 /* copy arguments to stack */ \ 440 MOVL argptr+8(FP), SI; \ 441 MOVL argsize+12(FP), CX; \ 442 MOVL SP, DI; \ 443 REP;MOVSB; \ 444 /* call function */ \ 445 MOVL f+4(FP), DX; \ 446 MOVL (DX), AX; \ 447 CALL AX; \ 448 /* copy return values back */ \ 449 MOVL argtype+0(FP), DX; \ 450 MOVL argptr+8(FP), DI; \ 451 MOVL argsize+12(FP), CX; \ 452 MOVL retoffset+16(FP), BX; \ 453 MOVL SP, SI; \ 454 ADDL BX, DI; \ 455 ADDL BX, SI; \ 456 SUBL BX, CX; \ 457 CALL callRet<>(SB); \ 458 RET 459 460 // callRet copies return values back at the end of call*. This is a 461 // separate function so it can allocate stack space for the arguments 462 // to reflectcallmove. It does not follow the Go ABI; it expects its 463 // arguments in registers. 464 TEXT callRet<>(SB), NOSPLIT, $16-0 465 MOVL DX, 0(SP) 466 MOVL DI, 4(SP) 467 MOVL SI, 8(SP) 468 MOVL CX, 12(SP) 469 CALL runtime·reflectcallmove(SB) 470 RET 471 472 CALLFN(·call16, 16) 473 CALLFN(·call32, 32) 474 CALLFN(·call64, 64) 475 CALLFN(·call128, 128) 476 CALLFN(·call256, 256) 477 CALLFN(·call512, 512) 478 CALLFN(·call1024, 1024) 479 CALLFN(·call2048, 2048) 480 CALLFN(·call4096, 4096) 481 CALLFN(·call8192, 8192) 482 CALLFN(·call16384, 16384) 483 CALLFN(·call32768, 32768) 484 CALLFN(·call65536, 65536) 485 CALLFN(·call131072, 131072) 486 CALLFN(·call262144, 262144) 487 CALLFN(·call524288, 524288) 488 CALLFN(·call1048576, 1048576) 489 CALLFN(·call2097152, 2097152) 490 CALLFN(·call4194304, 4194304) 491 CALLFN(·call8388608, 8388608) 492 CALLFN(·call16777216, 16777216) 493 CALLFN(·call33554432, 33554432) 494 CALLFN(·call67108864, 67108864) 495 CALLFN(·call134217728, 134217728) 496 CALLFN(·call268435456, 268435456) 497 CALLFN(·call536870912, 536870912) 498 CALLFN(·call1073741824, 1073741824) 499 500 TEXT runtime·procyield(SB),NOSPLIT,$0-0 501 MOVL cycles+0(FP), AX 502 again: 503 PAUSE 504 SUBL $1, AX 505 JNZ again 506 RET 507 508 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 509 // Stores are already ordered on x86, so this is just a 510 // compile barrier. 511 RET 512 513 // void jmpdefer(fn, sp); 514 // called from deferreturn. 515 // 1. pop the caller 516 // 2. sub 5 bytes from the callers return 517 // 3. jmp to the argument 518 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 519 MOVL fv+0(FP), DX 520 MOVL argp+4(FP), BX 521 LEAL -8(BX), SP // caller sp after CALL 522 SUBL $5, (SP) // return to CALL again 523 MOVL 0(DX), BX 524 JMP BX // but first run the deferred function 525 526 // func asmcgocall(fn, arg unsafe.Pointer) int32 527 // Not implemented. 528 TEXT runtime·asmcgocall(SB),NOSPLIT,$0-12 529 MOVL 0, AX 530 RET 531 532 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 533 // Not implemented. 534 TEXT runtime·cgocallback(SB),NOSPLIT,$0-16 535 MOVL 0, AX 536 RET 537 538 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 539 // Not implemented. 540 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$0-16 541 MOVL 0, AX 542 RET 543 544 // void setg(G*); set g. for use by needm. 545 // Not implemented. 546 TEXT runtime·setg(SB), NOSPLIT, $0-4 547 MOVL 0, AX 548 RET 549 550 // check that SP is in range [g->stack.lo, g->stack.hi) 551 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 552 get_tls(CX) 553 MOVL g(CX), AX 554 CMPL (g_stack+stack_hi)(AX), SP 555 JHI 2(PC) 556 MOVL 0, AX 557 CMPL SP, (g_stack+stack_lo)(AX) 558 JHI 2(PC) 559 MOVL 0, AX 560 RET 561 562 TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-8 563 MOVL ptr+0(FP), DI 564 MOVL n+4(FP), CX 565 MOVQ CX, BX 566 ANDQ $3, BX 567 SHRQ $2, CX 568 MOVQ $0, AX 569 CLD 570 REP 571 STOSL 572 MOVQ BX, CX 573 REP 574 STOSB 575 // Note: we zero only 4 bytes at a time so that the tail is at most 576 // 3 bytes. That guarantees that we aren't zeroing pointers with STOSB. 577 // See issue 13160. 578 RET 579 580 TEXT runtime·getcallerpc(SB),NOSPLIT,$8-12 581 MOVL argp+0(FP),AX // addr of first arg 582 MOVL -8(AX),AX // get calling pc 583 MOVL AX, ret+8(FP) 584 RET 585 586 // int64 runtime·cputicks(void) 587 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 588 RDTSC 589 SHLQ $32, DX 590 ADDQ DX, AX 591 MOVQ AX, ret+0(FP) 592 RET 593 594 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 595 // redirects to memhash(p, h, size) using the size 596 // stored in the closure. 597 TEXT runtime·memhash_varlen(SB),NOSPLIT,$24-12 598 GO_ARGS 599 NO_LOCAL_POINTERS 600 MOVL p+0(FP), AX 601 MOVL h+4(FP), BX 602 MOVL 4(DX), CX 603 MOVL AX, 0(SP) 604 MOVL BX, 4(SP) 605 MOVL CX, 8(SP) 606 CALL runtime·memhash(SB) 607 MOVL 16(SP), AX 608 MOVL AX, ret+8(FP) 609 RET 610 611 // hash function using AES hardware instructions 612 // For now, our one amd64p32 system (NaCl) does not 613 // support using AES instructions, so have not bothered to 614 // write the implementations. Can copy and adjust the ones 615 // in asm_amd64.s when the time comes. 616 617 TEXT runtime·aeshash(SB),NOSPLIT,$0-20 618 MOVL AX, ret+16(FP) 619 RET 620 621 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 622 MOVL AX, ret+8(FP) 623 RET 624 625 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12 626 MOVL AX, ret+8(FP) 627 RET 628 629 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 630 MOVL AX, ret+8(FP) 631 RET 632 633 // memequal(p, q unsafe.Pointer, size uintptr) bool 634 TEXT runtime·memequal(SB),NOSPLIT,$0-17 635 MOVL a+0(FP), SI 636 MOVL b+4(FP), DI 637 CMPL SI, DI 638 JEQ eq 639 MOVL size+8(FP), BX 640 CALL runtime·memeqbody(SB) 641 MOVB AX, ret+16(FP) 642 RET 643 eq: 644 MOVB $1, ret+16(FP) 645 RET 646 647 // memequal_varlen(a, b unsafe.Pointer) bool 648 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 649 MOVL a+0(FP), SI 650 MOVL b+4(FP), DI 651 CMPL SI, DI 652 JEQ eq 653 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 654 CALL runtime·memeqbody(SB) 655 MOVB AX, ret+8(FP) 656 RET 657 eq: 658 MOVB $1, ret+8(FP) 659 RET 660 661 // eqstring tests whether two strings are equal. 662 // The compiler guarantees that strings passed 663 // to eqstring have equal length. 664 // See runtime_test.go:eqstring_generic for 665 // equivalent Go code. 666 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 667 MOVL s1_base+0(FP), SI 668 MOVL s2_base+8(FP), DI 669 CMPL SI, DI 670 JEQ same 671 MOVL s1_len+4(FP), BX 672 CALL runtime·memeqbody(SB) 673 MOVB AX, ret+16(FP) 674 RET 675 same: 676 MOVB $1, ret+16(FP) 677 RET 678 679 // a in SI 680 // b in DI 681 // count in BX 682 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 683 XORQ AX, AX 684 685 CMPQ BX, $8 686 JB small 687 688 // 64 bytes at a time using xmm registers 689 hugeloop: 690 CMPQ BX, $64 691 JB bigloop 692 MOVOU (SI), X0 693 MOVOU (DI), X1 694 MOVOU 16(SI), X2 695 MOVOU 16(DI), X3 696 MOVOU 32(SI), X4 697 MOVOU 32(DI), X5 698 MOVOU 48(SI), X6 699 MOVOU 48(DI), X7 700 PCMPEQB X1, X0 701 PCMPEQB X3, X2 702 PCMPEQB X5, X4 703 PCMPEQB X7, X6 704 PAND X2, X0 705 PAND X6, X4 706 PAND X4, X0 707 PMOVMSKB X0, DX 708 ADDQ $64, SI 709 ADDQ $64, DI 710 SUBQ $64, BX 711 CMPL DX, $0xffff 712 JEQ hugeloop 713 RET 714 715 // 8 bytes at a time using 64-bit register 716 bigloop: 717 CMPQ BX, $8 718 JBE leftover 719 MOVQ (SI), CX 720 MOVQ (DI), DX 721 ADDQ $8, SI 722 ADDQ $8, DI 723 SUBQ $8, BX 724 CMPQ CX, DX 725 JEQ bigloop 726 RET 727 728 // remaining 0-8 bytes 729 leftover: 730 ADDQ BX, SI 731 ADDQ BX, DI 732 MOVQ -8(SI), CX 733 MOVQ -8(DI), DX 734 CMPQ CX, DX 735 SETEQ AX 736 RET 737 738 small: 739 CMPQ BX, $0 740 JEQ equal 741 742 LEAQ 0(BX*8), CX 743 NEGQ CX 744 745 CMPB SI, $0xf8 746 JA si_high 747 748 // load at SI won't cross a page boundary. 749 MOVQ (SI), SI 750 JMP si_finish 751 si_high: 752 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 753 MOVQ BX, DX 754 ADDQ SI, DX 755 MOVQ -8(DX), SI 756 SHRQ CX, SI 757 si_finish: 758 759 // same for DI. 760 CMPB DI, $0xf8 761 JA di_high 762 MOVQ (DI), DI 763 JMP di_finish 764 di_high: 765 MOVQ BX, DX 766 ADDQ DI, DX 767 MOVQ -8(DX), DI 768 SHRQ CX, DI 769 di_finish: 770 771 SUBQ SI, DI 772 SHLQ CX, DI 773 equal: 774 SETEQ AX 775 RET 776 777 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 778 MOVL s1_base+0(FP), SI 779 MOVL s1_len+4(FP), BX 780 MOVL s2_base+8(FP), DI 781 MOVL s2_len+12(FP), DX 782 CALL runtime·cmpbody(SB) 783 MOVL AX, ret+16(FP) 784 RET 785 786 TEXT bytes·Compare(SB),NOSPLIT,$0-28 787 MOVL s1+0(FP), SI 788 MOVL s1+4(FP), BX 789 MOVL s2+12(FP), DI 790 MOVL s2+16(FP), DX 791 CALL runtime·cmpbody(SB) 792 MOVL AX, res+24(FP) 793 RET 794 795 // input: 796 // SI = a 797 // DI = b 798 // BX = alen 799 // DX = blen 800 // output: 801 // AX = 1/0/-1 802 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 803 CMPQ SI, DI 804 JEQ allsame 805 CMPQ BX, DX 806 MOVQ DX, R8 807 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare 808 CMPQ R8, $8 809 JB small 810 811 loop: 812 CMPQ R8, $16 813 JBE _0through16 814 MOVOU (SI), X0 815 MOVOU (DI), X1 816 PCMPEQB X0, X1 817 PMOVMSKB X1, AX 818 XORQ $0xffff, AX // convert EQ to NE 819 JNE diff16 // branch if at least one byte is not equal 820 ADDQ $16, SI 821 ADDQ $16, DI 822 SUBQ $16, R8 823 JMP loop 824 825 // AX = bit mask of differences 826 diff16: 827 BSFQ AX, BX // index of first byte that differs 828 XORQ AX, AX 829 ADDQ BX, SI 830 MOVB (SI), CX 831 ADDQ BX, DI 832 CMPB CX, (DI) 833 SETHI AX 834 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 835 RET 836 837 // 0 through 16 bytes left, alen>=8, blen>=8 838 _0through16: 839 CMPQ R8, $8 840 JBE _0through8 841 MOVQ (SI), AX 842 MOVQ (DI), CX 843 CMPQ AX, CX 844 JNE diff8 845 _0through8: 846 ADDQ R8, SI 847 ADDQ R8, DI 848 MOVQ -8(SI), AX 849 MOVQ -8(DI), CX 850 CMPQ AX, CX 851 JEQ allsame 852 853 // AX and CX contain parts of a and b that differ. 854 diff8: 855 BSWAPQ AX // reverse order of bytes 856 BSWAPQ CX 857 XORQ AX, CX 858 BSRQ CX, CX // index of highest bit difference 859 SHRQ CX, AX // move a's bit to bottom 860 ANDQ $1, AX // mask bit 861 LEAQ -1(AX*2), AX // 1/0 => +1/-1 862 RET 863 864 // 0-7 bytes in common 865 small: 866 LEAQ (R8*8), CX // bytes left -> bits left 867 NEGQ CX // - bits lift (== 64 - bits left mod 64) 868 JEQ allsame 869 870 // load bytes of a into high bytes of AX 871 CMPB SI, $0xf8 872 JA si_high 873 MOVQ (SI), SI 874 JMP si_finish 875 si_high: 876 ADDQ R8, SI 877 MOVQ -8(SI), SI 878 SHRQ CX, SI 879 si_finish: 880 SHLQ CX, SI 881 882 // load bytes of b in to high bytes of BX 883 CMPB DI, $0xf8 884 JA di_high 885 MOVQ (DI), DI 886 JMP di_finish 887 di_high: 888 ADDQ R8, DI 889 MOVQ -8(DI), DI 890 SHRQ CX, DI 891 di_finish: 892 SHLQ CX, DI 893 894 BSWAPQ SI // reverse order of bytes 895 BSWAPQ DI 896 XORQ SI, DI // find bit differences 897 JEQ allsame 898 BSRQ DI, CX // index of highest bit difference 899 SHRQ CX, SI // move a's bit to bottom 900 ANDQ $1, SI // mask bit 901 LEAQ -1(SI*2), AX // 1/0 => +1/-1 902 RET 903 904 allsame: 905 XORQ AX, AX 906 XORQ CX, CX 907 CMPQ BX, DX 908 SETGT AX // 1 if alen > blen 909 SETEQ CX // 1 if alen == blen 910 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 911 RET 912 913 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 914 MOVL s+0(FP), SI 915 MOVL s_len+4(FP), BX 916 MOVB c+12(FP), AL 917 CALL runtime·indexbytebody(SB) 918 MOVL AX, ret+16(FP) 919 RET 920 921 TEXT strings·IndexByte(SB),NOSPLIT,$0-20 922 MOVL s+0(FP), SI 923 MOVL s_len+4(FP), BX 924 MOVB c+8(FP), AL 925 CALL runtime·indexbytebody(SB) 926 MOVL AX, ret+16(FP) 927 RET 928 929 // input: 930 // SI: data 931 // BX: data len 932 // AL: byte sought 933 // output: 934 // AX 935 TEXT runtime·indexbytebody(SB),NOSPLIT,$0 936 MOVL SI, DI 937 938 CMPL BX, $16 939 JLT small 940 941 // round up to first 16-byte boundary 942 TESTL $15, SI 943 JZ aligned 944 MOVL SI, CX 945 ANDL $~15, CX 946 ADDL $16, CX 947 948 // search the beginning 949 SUBL SI, CX 950 REPN; SCASB 951 JZ success 952 953 // DI is 16-byte aligned; get ready to search using SSE instructions 954 aligned: 955 // round down to last 16-byte boundary 956 MOVL BX, R11 957 ADDL SI, R11 958 ANDL $~15, R11 959 960 // shuffle X0 around so that each byte contains c 961 MOVD AX, X0 962 PUNPCKLBW X0, X0 963 PUNPCKLBW X0, X0 964 PSHUFL $0, X0, X0 965 JMP condition 966 967 sse: 968 // move the next 16-byte chunk of the buffer into X1 969 MOVO (DI), X1 970 // compare bytes in X0 to X1 971 PCMPEQB X0, X1 972 // take the top bit of each byte in X1 and put the result in DX 973 PMOVMSKB X1, DX 974 TESTL DX, DX 975 JNZ ssesuccess 976 ADDL $16, DI 977 978 condition: 979 CMPL DI, R11 980 JLT sse 981 982 // search the end 983 MOVL SI, CX 984 ADDL BX, CX 985 SUBL R11, CX 986 // if CX == 0, the zero flag will be set and we'll end up 987 // returning a false success 988 JZ failure 989 REPN; SCASB 990 JZ success 991 992 failure: 993 MOVL $-1, AX 994 RET 995 996 // handle for lengths < 16 997 small: 998 MOVL BX, CX 999 REPN; SCASB 1000 JZ success 1001 MOVL $-1, AX 1002 RET 1003 1004 // we've found the chunk containing the byte 1005 // now just figure out which specific byte it is 1006 ssesuccess: 1007 // get the index of the least significant set bit 1008 BSFW DX, DX 1009 SUBL SI, DI 1010 ADDL DI, DX 1011 MOVL DX, AX 1012 RET 1013 1014 success: 1015 SUBL SI, DI 1016 SUBL $1, DI 1017 MOVL DI, AX 1018 RET 1019 1020 TEXT bytes·Equal(SB),NOSPLIT,$0-25 1021 MOVL a_len+4(FP), BX 1022 MOVL b_len+16(FP), CX 1023 XORL AX, AX 1024 CMPL BX, CX 1025 JNE eqret 1026 MOVL a+0(FP), SI 1027 MOVL b+12(FP), DI 1028 CALL runtime·memeqbody(SB) 1029 eqret: 1030 MOVB AX, ret+24(FP) 1031 RET 1032 1033 TEXT runtime·return0(SB), NOSPLIT, $0 1034 MOVL $0, AX 1035 RET 1036 1037 // The top-most function running on a goroutine 1038 // returns to goexit+PCQuantum. 1039 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1040 BYTE $0x90 // NOP 1041 CALL runtime·goexit1(SB) // does not return 1042 // traceback from goexit1 must hit code range of goexit 1043 BYTE $0x90 // NOP 1044 1045 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1046 MOVL addr+0(FP), AX 1047 PREFETCHT0 (AX) 1048 RET 1049 1050 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1051 MOVL addr+0(FP), AX 1052 PREFETCHT1 (AX) 1053 RET 1054 1055 1056 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1057 MOVL addr+0(FP), AX 1058 PREFETCHT2 (AX) 1059 RET 1060 1061 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1062 MOVL addr+0(FP), AX 1063 PREFETCHNTA (AX) 1064 RET 1065 1066 TEXT ·checkASM(SB),NOSPLIT,$0-1 1067 MOVB $1, ret+0(FP) 1068 RET