github.com/stingnevermore/go@v0.0.0-20180120041312-3810f5bfed72/src/runtime/asm_amd64p32.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 MOVL SP, CX 15 SUBL $128, CX // plenty of scratch 16 ANDL $~15, CX 17 MOVL CX, SP 18 19 MOVL AX, 16(SP) 20 MOVL BX, 24(SP) 21 22 // create istack out of the given (operating system) stack. 23 MOVL $runtime·g0(SB), DI 24 LEAL (-64*1024+104)(SP), BX 25 MOVL BX, g_stackguard0(DI) 26 MOVL BX, g_stackguard1(DI) 27 MOVL BX, (g_stack+stack_lo)(DI) 28 MOVL SP, (g_stack+stack_hi)(DI) 29 30 // find out information about the processor we're on 31 MOVL $0, AX 32 CPUID 33 CMPL AX, $0 34 JE nocpuinfo 35 36 CMPL BX, $0x756E6547 // "Genu" 37 JNE notintel 38 CMPL DX, $0x49656E69 // "ineI" 39 JNE notintel 40 CMPL CX, $0x6C65746E // "ntel" 41 JNE notintel 42 MOVB $1, runtime·isIntel(SB) 43 notintel: 44 45 // Load EAX=1 cpuid flags 46 MOVL $1, AX 47 CPUID 48 MOVL AX, runtime·processorVersionInfo(SB) 49 50 TESTL $(1<<26), DX // SSE2 51 SETNE runtime·support_sse2(SB) 52 53 TESTL $(1<<9), CX // SSSE3 54 SETNE runtime·support_ssse3(SB) 55 56 TESTL $(1<<19), CX // SSE4.1 57 SETNE runtime·support_sse41(SB) 58 59 TESTL $(1<<20), CX // SSE4.2 60 SETNE runtime·support_sse42(SB) 61 62 TESTL $(1<<23), CX // POPCNT 63 SETNE runtime·support_popcnt(SB) 64 65 TESTL $(1<<25), CX // AES 66 SETNE runtime·support_aes(SB) 67 68 TESTL $(1<<27), CX // OSXSAVE 69 SETNE runtime·support_osxsave(SB) 70 71 // If OS support for XMM and YMM is not present 72 // support_avx will be set back to false later. 73 TESTL $(1<<28), CX // AVX 74 SETNE runtime·support_avx(SB) 75 76 eax7: 77 // Load EAX=7/ECX=0 cpuid flags 78 CMPL SI, $7 79 JLT osavx 80 MOVL $7, AX 81 MOVL $0, CX 82 CPUID 83 84 TESTL $(1<<3), BX // BMI1 85 SETNE runtime·support_bmi1(SB) 86 87 // If OS support for XMM and YMM is not present 88 // support_avx2 will be set back to false later. 89 TESTL $(1<<5), BX 90 SETNE runtime·support_avx2(SB) 91 92 TESTL $(1<<8), BX // BMI2 93 SETNE runtime·support_bmi2(SB) 94 95 TESTL $(1<<9), BX // ERMS 96 SETNE runtime·support_erms(SB) 97 98 osavx: 99 // nacl does not support XGETBV to test 100 // for XMM and YMM OS support. 101 #ifndef GOOS_nacl 102 CMPB runtime·support_osxsave(SB), $1 103 JNE noavx 104 MOVL $0, CX 105 // For XGETBV, OSXSAVE bit is required and sufficient 106 XGETBV 107 ANDL $6, AX 108 CMPL AX, $6 // Check for OS support of XMM and YMM registers. 109 JE nocpuinfo 110 #endif 111 noavx: 112 MOVB $0, runtime·support_avx(SB) 113 MOVB $0, runtime·support_avx2(SB) 114 115 nocpuinfo: 116 117 needtls: 118 LEAL runtime·m0+m_tls(SB), DI 119 CALL runtime·settls(SB) 120 121 // store through it, to make sure it works 122 get_tls(BX) 123 MOVQ $0x123, g(BX) 124 MOVQ runtime·m0+m_tls(SB), AX 125 CMPQ AX, $0x123 126 JEQ 2(PC) 127 MOVL AX, 0 // abort 128 ok: 129 // set the per-goroutine and per-mach "registers" 130 get_tls(BX) 131 LEAL runtime·g0(SB), CX 132 MOVL CX, g(BX) 133 LEAL runtime·m0(SB), AX 134 135 // save m->g0 = g0 136 MOVL CX, m_g0(AX) 137 // save m0 to g0->m 138 MOVL AX, g_m(CX) 139 140 CLD // convention is D is always left cleared 141 CALL runtime·check(SB) 142 143 MOVL 16(SP), AX // copy argc 144 MOVL AX, 0(SP) 145 MOVL 24(SP), AX // copy argv 146 MOVL AX, 4(SP) 147 CALL runtime·args(SB) 148 CALL runtime·osinit(SB) 149 CALL runtime·schedinit(SB) 150 151 // create a new goroutine to start program 152 MOVL $runtime·mainPC(SB), AX // entry 153 MOVL $0, 0(SP) 154 MOVL AX, 4(SP) 155 CALL runtime·newproc(SB) 156 157 // start this M 158 CALL runtime·mstart(SB) 159 160 MOVL $0xf1, 0xf1 // crash 161 RET 162 163 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 164 GLOBL runtime·mainPC(SB),RODATA,$4 165 166 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 167 INT $3 168 RET 169 170 TEXT runtime·asminit(SB),NOSPLIT,$0-0 171 // No per-thread init. 172 RET 173 174 /* 175 * go-routine 176 */ 177 178 // void gosave(Gobuf*) 179 // save state in Gobuf; setjmp 180 TEXT runtime·gosave(SB), NOSPLIT, $0-4 181 MOVL buf+0(FP), AX // gobuf 182 LEAL buf+0(FP), BX // caller's SP 183 MOVL BX, gobuf_sp(AX) 184 MOVL 0(SP), BX // caller's PC 185 MOVL BX, gobuf_pc(AX) 186 MOVQ $0, gobuf_ret(AX) 187 // Assert ctxt is zero. See func save. 188 MOVL gobuf_ctxt(AX), BX 189 TESTL BX, BX 190 JZ 2(PC) 191 CALL runtime·badctxt(SB) 192 get_tls(CX) 193 MOVL g(CX), BX 194 MOVL BX, gobuf_g(AX) 195 RET 196 197 // void gogo(Gobuf*) 198 // restore state from Gobuf; longjmp 199 TEXT runtime·gogo(SB), NOSPLIT, $8-4 200 MOVL buf+0(FP), BX // gobuf 201 MOVL gobuf_g(BX), DX 202 MOVL 0(DX), CX // make sure g != nil 203 get_tls(CX) 204 MOVL DX, g(CX) 205 MOVL gobuf_sp(BX), SP // restore SP 206 MOVL gobuf_ctxt(BX), DX 207 MOVQ gobuf_ret(BX), AX 208 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 209 MOVQ $0, gobuf_ret(BX) 210 MOVL $0, gobuf_ctxt(BX) 211 MOVL gobuf_pc(BX), BX 212 JMP BX 213 214 // func mcall(fn func(*g)) 215 // Switch to m->g0's stack, call fn(g). 216 // Fn must never return. It should gogo(&g->sched) 217 // to keep running g. 218 TEXT runtime·mcall(SB), NOSPLIT, $0-4 219 MOVL fn+0(FP), DI 220 221 get_tls(CX) 222 MOVL g(CX), AX // save state in g->sched 223 MOVL 0(SP), BX // caller's PC 224 MOVL BX, (g_sched+gobuf_pc)(AX) 225 LEAL fn+0(FP), BX // caller's SP 226 MOVL BX, (g_sched+gobuf_sp)(AX) 227 MOVL AX, (g_sched+gobuf_g)(AX) 228 229 // switch to m->g0 & its stack, call fn 230 MOVL g(CX), BX 231 MOVL g_m(BX), BX 232 MOVL m_g0(BX), SI 233 CMPL SI, AX // if g == m->g0 call badmcall 234 JNE 3(PC) 235 MOVL $runtime·badmcall(SB), AX 236 JMP AX 237 MOVL SI, g(CX) // g = m->g0 238 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 239 PUSHQ AX 240 MOVL DI, DX 241 MOVL 0(DI), DI 242 CALL DI 243 POPQ AX 244 MOVL $runtime·badmcall2(SB), AX 245 JMP AX 246 RET 247 248 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 249 // of the G stack. We need to distinguish the routine that 250 // lives at the bottom of the G stack from the one that lives 251 // at the top of the system stack because the one at the top of 252 // the system stack terminates the stack walk (see topofstack()). 253 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 254 RET 255 256 // func systemstack(fn func()) 257 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 258 MOVL fn+0(FP), DI // DI = fn 259 get_tls(CX) 260 MOVL g(CX), AX // AX = g 261 MOVL g_m(AX), BX // BX = m 262 263 MOVL m_gsignal(BX), DX // DX = gsignal 264 CMPL AX, DX 265 JEQ noswitch 266 267 MOVL m_g0(BX), DX // DX = g0 268 CMPL AX, DX 269 JEQ noswitch 270 271 MOVL m_curg(BX), R8 272 CMPL AX, R8 273 JEQ switch 274 275 // Not g0, not curg. Must be gsignal, but that's not allowed. 276 // Hide call from linker nosplit analysis. 277 MOVL $runtime·badsystemstack(SB), AX 278 CALL AX 279 280 switch: 281 // save our state in g->sched. Pretend to 282 // be systemstack_switch if the G stack is scanned. 283 MOVL $runtime·systemstack_switch(SB), SI 284 MOVL SI, (g_sched+gobuf_pc)(AX) 285 MOVL SP, (g_sched+gobuf_sp)(AX) 286 MOVL AX, (g_sched+gobuf_g)(AX) 287 288 // switch to g0 289 MOVL DX, g(CX) 290 MOVL (g_sched+gobuf_sp)(DX), SP 291 292 // call target function 293 MOVL DI, DX 294 MOVL 0(DI), DI 295 CALL DI 296 297 // switch back to g 298 get_tls(CX) 299 MOVL g(CX), AX 300 MOVL g_m(AX), BX 301 MOVL m_curg(BX), AX 302 MOVL AX, g(CX) 303 MOVL (g_sched+gobuf_sp)(AX), SP 304 MOVL $0, (g_sched+gobuf_sp)(AX) 305 RET 306 307 noswitch: 308 // already on m stack, just call directly 309 // Using a tail call here cleans up tracebacks since we won't stop 310 // at an intermediate systemstack. 311 MOVL DI, DX 312 MOVL 0(DI), DI 313 JMP DI 314 315 /* 316 * support for morestack 317 */ 318 319 // Called during function prolog when more stack is needed. 320 // 321 // The traceback routines see morestack on a g0 as being 322 // the top of a stack (for example, morestack calling newstack 323 // calling the scheduler calling newm calling gc), so we must 324 // record an argument size. For that purpose, it has no arguments. 325 TEXT runtime·morestack(SB),NOSPLIT,$0-0 326 get_tls(CX) 327 MOVL g(CX), BX 328 MOVL g_m(BX), BX 329 330 // Cannot grow scheduler stack (m->g0). 331 MOVL m_g0(BX), SI 332 CMPL g(CX), SI 333 JNE 3(PC) 334 CALL runtime·badmorestackg0(SB) 335 MOVL 0, AX 336 337 // Cannot grow signal stack (m->gsignal). 338 MOVL m_gsignal(BX), SI 339 CMPL g(CX), SI 340 JNE 3(PC) 341 CALL runtime·badmorestackgsignal(SB) 342 MOVL 0, AX 343 344 // Called from f. 345 // Set m->morebuf to f's caller. 346 MOVL 8(SP), AX // f's caller's PC 347 MOVL AX, (m_morebuf+gobuf_pc)(BX) 348 LEAL 16(SP), AX // f's caller's SP 349 MOVL AX, (m_morebuf+gobuf_sp)(BX) 350 get_tls(CX) 351 MOVL g(CX), SI 352 MOVL SI, (m_morebuf+gobuf_g)(BX) 353 354 // Set g->sched to context in f. 355 MOVL 0(SP), AX // f's PC 356 MOVL AX, (g_sched+gobuf_pc)(SI) 357 MOVL SI, (g_sched+gobuf_g)(SI) 358 LEAL 8(SP), AX // f's SP 359 MOVL AX, (g_sched+gobuf_sp)(SI) 360 MOVL DX, (g_sched+gobuf_ctxt)(SI) 361 362 // Call newstack on m->g0's stack. 363 MOVL m_g0(BX), BX 364 MOVL BX, g(CX) 365 MOVL (g_sched+gobuf_sp)(BX), SP 366 CALL runtime·newstack(SB) 367 MOVL $0, 0x1003 // crash if newstack returns 368 RET 369 370 // morestack trampolines 371 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 372 MOVL $0, DX 373 JMP runtime·morestack(SB) 374 375 // reflectcall: call a function with the given argument list 376 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 377 // we don't have variable-sized frames, so we use a small number 378 // of constant-sized-frame functions to encode a few bits of size in the pc. 379 // Caution: ugly multiline assembly macros in your future! 380 381 #define DISPATCH(NAME,MAXSIZE) \ 382 CMPL CX, $MAXSIZE; \ 383 JA 3(PC); \ 384 MOVL $NAME(SB), AX; \ 385 JMP AX 386 // Note: can't just "JMP NAME(SB)" - bad inlining results. 387 388 TEXT reflect·call(SB), NOSPLIT, $0-0 389 JMP ·reflectcall(SB) 390 391 TEXT ·reflectcall(SB), NOSPLIT, $0-20 392 MOVLQZX argsize+12(FP), CX 393 DISPATCH(runtime·call16, 16) 394 DISPATCH(runtime·call32, 32) 395 DISPATCH(runtime·call64, 64) 396 DISPATCH(runtime·call128, 128) 397 DISPATCH(runtime·call256, 256) 398 DISPATCH(runtime·call512, 512) 399 DISPATCH(runtime·call1024, 1024) 400 DISPATCH(runtime·call2048, 2048) 401 DISPATCH(runtime·call4096, 4096) 402 DISPATCH(runtime·call8192, 8192) 403 DISPATCH(runtime·call16384, 16384) 404 DISPATCH(runtime·call32768, 32768) 405 DISPATCH(runtime·call65536, 65536) 406 DISPATCH(runtime·call131072, 131072) 407 DISPATCH(runtime·call262144, 262144) 408 DISPATCH(runtime·call524288, 524288) 409 DISPATCH(runtime·call1048576, 1048576) 410 DISPATCH(runtime·call2097152, 2097152) 411 DISPATCH(runtime·call4194304, 4194304) 412 DISPATCH(runtime·call8388608, 8388608) 413 DISPATCH(runtime·call16777216, 16777216) 414 DISPATCH(runtime·call33554432, 33554432) 415 DISPATCH(runtime·call67108864, 67108864) 416 DISPATCH(runtime·call134217728, 134217728) 417 DISPATCH(runtime·call268435456, 268435456) 418 DISPATCH(runtime·call536870912, 536870912) 419 DISPATCH(runtime·call1073741824, 1073741824) 420 MOVL $runtime·badreflectcall(SB), AX 421 JMP AX 422 423 #define CALLFN(NAME,MAXSIZE) \ 424 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 425 NO_LOCAL_POINTERS; \ 426 /* copy arguments to stack */ \ 427 MOVL argptr+8(FP), SI; \ 428 MOVL argsize+12(FP), CX; \ 429 MOVL SP, DI; \ 430 REP;MOVSB; \ 431 /* call function */ \ 432 MOVL f+4(FP), DX; \ 433 MOVL (DX), AX; \ 434 CALL AX; \ 435 /* copy return values back */ \ 436 MOVL argtype+0(FP), DX; \ 437 MOVL argptr+8(FP), DI; \ 438 MOVL argsize+12(FP), CX; \ 439 MOVL retoffset+16(FP), BX; \ 440 MOVL SP, SI; \ 441 ADDL BX, DI; \ 442 ADDL BX, SI; \ 443 SUBL BX, CX; \ 444 CALL callRet<>(SB); \ 445 RET 446 447 // callRet copies return values back at the end of call*. This is a 448 // separate function so it can allocate stack space for the arguments 449 // to reflectcallmove. It does not follow the Go ABI; it expects its 450 // arguments in registers. 451 TEXT callRet<>(SB), NOSPLIT, $16-0 452 MOVL DX, 0(SP) 453 MOVL DI, 4(SP) 454 MOVL SI, 8(SP) 455 MOVL CX, 12(SP) 456 CALL runtime·reflectcallmove(SB) 457 RET 458 459 CALLFN(·call16, 16) 460 CALLFN(·call32, 32) 461 CALLFN(·call64, 64) 462 CALLFN(·call128, 128) 463 CALLFN(·call256, 256) 464 CALLFN(·call512, 512) 465 CALLFN(·call1024, 1024) 466 CALLFN(·call2048, 2048) 467 CALLFN(·call4096, 4096) 468 CALLFN(·call8192, 8192) 469 CALLFN(·call16384, 16384) 470 CALLFN(·call32768, 32768) 471 CALLFN(·call65536, 65536) 472 CALLFN(·call131072, 131072) 473 CALLFN(·call262144, 262144) 474 CALLFN(·call524288, 524288) 475 CALLFN(·call1048576, 1048576) 476 CALLFN(·call2097152, 2097152) 477 CALLFN(·call4194304, 4194304) 478 CALLFN(·call8388608, 8388608) 479 CALLFN(·call16777216, 16777216) 480 CALLFN(·call33554432, 33554432) 481 CALLFN(·call67108864, 67108864) 482 CALLFN(·call134217728, 134217728) 483 CALLFN(·call268435456, 268435456) 484 CALLFN(·call536870912, 536870912) 485 CALLFN(·call1073741824, 1073741824) 486 487 TEXT runtime·procyield(SB),NOSPLIT,$0-0 488 MOVL cycles+0(FP), AX 489 again: 490 PAUSE 491 SUBL $1, AX 492 JNZ again 493 RET 494 495 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 496 // Stores are already ordered on x86, so this is just a 497 // compile barrier. 498 RET 499 500 // void jmpdefer(fn, sp); 501 // called from deferreturn. 502 // 1. pop the caller 503 // 2. sub 5 bytes from the callers return 504 // 3. jmp to the argument 505 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 506 MOVL fv+0(FP), DX 507 MOVL argp+4(FP), BX 508 LEAL -8(BX), SP // caller sp after CALL 509 SUBL $5, (SP) // return to CALL again 510 MOVL 0(DX), BX 511 JMP BX // but first run the deferred function 512 513 // func asmcgocall(fn, arg unsafe.Pointer) int32 514 // Not implemented. 515 TEXT runtime·asmcgocall(SB),NOSPLIT,$0-12 516 MOVL 0, AX 517 RET 518 519 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 520 // Not implemented. 521 TEXT runtime·cgocallback(SB),NOSPLIT,$0-16 522 MOVL 0, AX 523 RET 524 525 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 526 // Not implemented. 527 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$0-16 528 MOVL 0, AX 529 RET 530 531 // void setg(G*); set g. for use by needm. 532 // Not implemented. 533 TEXT runtime·setg(SB), NOSPLIT, $0-4 534 MOVL 0, AX 535 RET 536 537 // check that SP is in range [g->stack.lo, g->stack.hi) 538 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 539 get_tls(CX) 540 MOVL g(CX), AX 541 CMPL (g_stack+stack_hi)(AX), SP 542 JHI 2(PC) 543 MOVL 0, AX 544 CMPL SP, (g_stack+stack_lo)(AX) 545 JHI 2(PC) 546 MOVL 0, AX 547 RET 548 549 // int64 runtime·cputicks(void) 550 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 551 RDTSC 552 SHLQ $32, DX 553 ADDQ DX, AX 554 MOVQ AX, ret+0(FP) 555 RET 556 557 // hash function using AES hardware instructions 558 // For now, our one amd64p32 system (NaCl) does not 559 // support using AES instructions, so have not bothered to 560 // write the implementations. Can copy and adjust the ones 561 // in asm_amd64.s when the time comes. 562 563 TEXT runtime·aeshash(SB),NOSPLIT,$0-20 564 MOVL AX, ret+16(FP) 565 RET 566 567 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 568 MOVL AX, ret+8(FP) 569 RET 570 571 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12 572 MOVL AX, ret+8(FP) 573 RET 574 575 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 576 MOVL AX, ret+8(FP) 577 RET 578 579 // memequal(p, q unsafe.Pointer, size uintptr) bool 580 TEXT runtime·memequal(SB),NOSPLIT,$0-17 581 MOVL a+0(FP), SI 582 MOVL b+4(FP), DI 583 CMPL SI, DI 584 JEQ eq 585 MOVL size+8(FP), BX 586 CALL runtime·memeqbody(SB) 587 MOVB AX, ret+16(FP) 588 RET 589 eq: 590 MOVB $1, ret+16(FP) 591 RET 592 593 // memequal_varlen(a, b unsafe.Pointer) bool 594 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 595 MOVL a+0(FP), SI 596 MOVL b+4(FP), DI 597 CMPL SI, DI 598 JEQ eq 599 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 600 CALL runtime·memeqbody(SB) 601 MOVB AX, ret+8(FP) 602 RET 603 eq: 604 MOVB $1, ret+8(FP) 605 RET 606 607 // a in SI 608 // b in DI 609 // count in BX 610 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 611 XORQ AX, AX 612 613 CMPQ BX, $8 614 JB small 615 616 // 64 bytes at a time using xmm registers 617 hugeloop: 618 CMPQ BX, $64 619 JB bigloop 620 MOVOU (SI), X0 621 MOVOU (DI), X1 622 MOVOU 16(SI), X2 623 MOVOU 16(DI), X3 624 MOVOU 32(SI), X4 625 MOVOU 32(DI), X5 626 MOVOU 48(SI), X6 627 MOVOU 48(DI), X7 628 PCMPEQB X1, X0 629 PCMPEQB X3, X2 630 PCMPEQB X5, X4 631 PCMPEQB X7, X6 632 PAND X2, X0 633 PAND X6, X4 634 PAND X4, X0 635 PMOVMSKB X0, DX 636 ADDQ $64, SI 637 ADDQ $64, DI 638 SUBQ $64, BX 639 CMPL DX, $0xffff 640 JEQ hugeloop 641 RET 642 643 // 8 bytes at a time using 64-bit register 644 bigloop: 645 CMPQ BX, $8 646 JBE leftover 647 MOVQ (SI), CX 648 MOVQ (DI), DX 649 ADDQ $8, SI 650 ADDQ $8, DI 651 SUBQ $8, BX 652 CMPQ CX, DX 653 JEQ bigloop 654 RET 655 656 // remaining 0-8 bytes 657 leftover: 658 ADDQ BX, SI 659 ADDQ BX, DI 660 MOVQ -8(SI), CX 661 MOVQ -8(DI), DX 662 CMPQ CX, DX 663 SETEQ AX 664 RET 665 666 small: 667 CMPQ BX, $0 668 JEQ equal 669 670 LEAQ 0(BX*8), CX 671 NEGQ CX 672 673 CMPB SI, $0xf8 674 JA si_high 675 676 // load at SI won't cross a page boundary. 677 MOVQ (SI), SI 678 JMP si_finish 679 si_high: 680 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 681 MOVQ BX, DX 682 ADDQ SI, DX 683 MOVQ -8(DX), SI 684 SHRQ CX, SI 685 si_finish: 686 687 // same for DI. 688 CMPB DI, $0xf8 689 JA di_high 690 MOVQ (DI), DI 691 JMP di_finish 692 di_high: 693 MOVQ BX, DX 694 ADDQ DI, DX 695 MOVQ -8(DX), DI 696 SHRQ CX, DI 697 di_finish: 698 699 SUBQ SI, DI 700 SHLQ CX, DI 701 equal: 702 SETEQ AX 703 RET 704 705 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 706 MOVL s1_base+0(FP), SI 707 MOVL s1_len+4(FP), BX 708 MOVL s2_base+8(FP), DI 709 MOVL s2_len+12(FP), DX 710 CALL runtime·cmpbody(SB) 711 MOVL AX, ret+16(FP) 712 RET 713 714 TEXT bytes·Compare(SB),NOSPLIT,$0-28 715 MOVL s1+0(FP), SI 716 MOVL s1+4(FP), BX 717 MOVL s2+12(FP), DI 718 MOVL s2+16(FP), DX 719 CALL runtime·cmpbody(SB) 720 MOVL AX, res+24(FP) 721 RET 722 723 // input: 724 // SI = a 725 // DI = b 726 // BX = alen 727 // DX = blen 728 // output: 729 // AX = 1/0/-1 730 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 731 CMPQ SI, DI 732 JEQ allsame 733 CMPQ BX, DX 734 MOVQ DX, R8 735 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare 736 CMPQ R8, $8 737 JB small 738 739 loop: 740 CMPQ R8, $16 741 JBE _0through16 742 MOVOU (SI), X0 743 MOVOU (DI), X1 744 PCMPEQB X0, X1 745 PMOVMSKB X1, AX 746 XORQ $0xffff, AX // convert EQ to NE 747 JNE diff16 // branch if at least one byte is not equal 748 ADDQ $16, SI 749 ADDQ $16, DI 750 SUBQ $16, R8 751 JMP loop 752 753 // AX = bit mask of differences 754 diff16: 755 BSFQ AX, BX // index of first byte that differs 756 XORQ AX, AX 757 ADDQ BX, SI 758 MOVB (SI), CX 759 ADDQ BX, DI 760 CMPB CX, (DI) 761 SETHI AX 762 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 763 RET 764 765 // 0 through 16 bytes left, alen>=8, blen>=8 766 _0through16: 767 CMPQ R8, $8 768 JBE _0through8 769 MOVQ (SI), AX 770 MOVQ (DI), CX 771 CMPQ AX, CX 772 JNE diff8 773 _0through8: 774 ADDQ R8, SI 775 ADDQ R8, DI 776 MOVQ -8(SI), AX 777 MOVQ -8(DI), CX 778 CMPQ AX, CX 779 JEQ allsame 780 781 // AX and CX contain parts of a and b that differ. 782 diff8: 783 BSWAPQ AX // reverse order of bytes 784 BSWAPQ CX 785 XORQ AX, CX 786 BSRQ CX, CX // index of highest bit difference 787 SHRQ CX, AX // move a's bit to bottom 788 ANDQ $1, AX // mask bit 789 LEAQ -1(AX*2), AX // 1/0 => +1/-1 790 RET 791 792 // 0-7 bytes in common 793 small: 794 LEAQ (R8*8), CX // bytes left -> bits left 795 NEGQ CX // - bits lift (== 64 - bits left mod 64) 796 JEQ allsame 797 798 // load bytes of a into high bytes of AX 799 CMPB SI, $0xf8 800 JA si_high 801 MOVQ (SI), SI 802 JMP si_finish 803 si_high: 804 ADDQ R8, SI 805 MOVQ -8(SI), SI 806 SHRQ CX, SI 807 si_finish: 808 SHLQ CX, SI 809 810 // load bytes of b in to high bytes of BX 811 CMPB DI, $0xf8 812 JA di_high 813 MOVQ (DI), DI 814 JMP di_finish 815 di_high: 816 ADDQ R8, DI 817 MOVQ -8(DI), DI 818 SHRQ CX, DI 819 di_finish: 820 SHLQ CX, DI 821 822 BSWAPQ SI // reverse order of bytes 823 BSWAPQ DI 824 XORQ SI, DI // find bit differences 825 JEQ allsame 826 BSRQ DI, CX // index of highest bit difference 827 SHRQ CX, SI // move a's bit to bottom 828 ANDQ $1, SI // mask bit 829 LEAQ -1(SI*2), AX // 1/0 => +1/-1 830 RET 831 832 allsame: 833 XORQ AX, AX 834 XORQ CX, CX 835 CMPQ BX, DX 836 SETGT AX // 1 if alen > blen 837 SETEQ CX // 1 if alen == blen 838 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 839 RET 840 841 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 842 MOVL s+0(FP), SI 843 MOVL s_len+4(FP), BX 844 MOVB c+12(FP), AL 845 CALL runtime·indexbytebody(SB) 846 MOVL AX, ret+16(FP) 847 RET 848 849 TEXT strings·IndexByte(SB),NOSPLIT,$0-20 850 MOVL s+0(FP), SI 851 MOVL s_len+4(FP), BX 852 MOVB c+8(FP), AL 853 CALL runtime·indexbytebody(SB) 854 MOVL AX, ret+16(FP) 855 RET 856 857 // input: 858 // SI: data 859 // BX: data len 860 // AL: byte sought 861 // output: 862 // AX 863 TEXT runtime·indexbytebody(SB),NOSPLIT,$0 864 MOVL SI, DI 865 866 CMPL BX, $16 867 JLT small 868 869 // round up to first 16-byte boundary 870 TESTL $15, SI 871 JZ aligned 872 MOVL SI, CX 873 ANDL $~15, CX 874 ADDL $16, CX 875 876 // search the beginning 877 SUBL SI, CX 878 REPN; SCASB 879 JZ success 880 881 // DI is 16-byte aligned; get ready to search using SSE instructions 882 aligned: 883 // round down to last 16-byte boundary 884 MOVL BX, R11 885 ADDL SI, R11 886 ANDL $~15, R11 887 888 // shuffle X0 around so that each byte contains c 889 MOVD AX, X0 890 PUNPCKLBW X0, X0 891 PUNPCKLBW X0, X0 892 PSHUFL $0, X0, X0 893 JMP condition 894 895 sse: 896 // move the next 16-byte chunk of the buffer into X1 897 MOVO (DI), X1 898 // compare bytes in X0 to X1 899 PCMPEQB X0, X1 900 // take the top bit of each byte in X1 and put the result in DX 901 PMOVMSKB X1, DX 902 TESTL DX, DX 903 JNZ ssesuccess 904 ADDL $16, DI 905 906 condition: 907 CMPL DI, R11 908 JLT sse 909 910 // search the end 911 MOVL SI, CX 912 ADDL BX, CX 913 SUBL R11, CX 914 // if CX == 0, the zero flag will be set and we'll end up 915 // returning a false success 916 JZ failure 917 REPN; SCASB 918 JZ success 919 920 failure: 921 MOVL $-1, AX 922 RET 923 924 // handle for lengths < 16 925 small: 926 MOVL BX, CX 927 REPN; SCASB 928 JZ success 929 MOVL $-1, AX 930 RET 931 932 // we've found the chunk containing the byte 933 // now just figure out which specific byte it is 934 ssesuccess: 935 // get the index of the least significant set bit 936 BSFW DX, DX 937 SUBL SI, DI 938 ADDL DI, DX 939 MOVL DX, AX 940 RET 941 942 success: 943 SUBL SI, DI 944 SUBL $1, DI 945 MOVL DI, AX 946 RET 947 948 TEXT bytes·Equal(SB),NOSPLIT,$0-25 949 MOVL a_len+4(FP), BX 950 MOVL b_len+16(FP), CX 951 XORL AX, AX 952 CMPL BX, CX 953 JNE eqret 954 MOVL a+0(FP), SI 955 MOVL b+12(FP), DI 956 CALL runtime·memeqbody(SB) 957 eqret: 958 MOVB AX, ret+24(FP) 959 RET 960 961 TEXT runtime·return0(SB), NOSPLIT, $0 962 MOVL $0, AX 963 RET 964 965 // The top-most function running on a goroutine 966 // returns to goexit+PCQuantum. 967 TEXT runtime·goexit(SB),NOSPLIT,$0-0 968 BYTE $0x90 // NOP 969 CALL runtime·goexit1(SB) // does not return 970 // traceback from goexit1 must hit code range of goexit 971 BYTE $0x90 // NOP 972 973 TEXT ·checkASM(SB),NOSPLIT,$0-1 974 MOVB $1, ret+0(FP) 975 RET