github.com/4ad/go@v0.0.0-20161219182952-69a12818b605/src/runtime/asm_amd64p32.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// copy arguments forward on an even stack
    12  	MOVL	argc+0(FP), AX
    13  	MOVL	argv+4(FP), BX
    14  	MOVL	SP, CX
    15  	SUBL	$128, SP		// plenty of scratch
    16  	ANDL	$~15, CX
    17  	MOVL	CX, SP
    18  
    19  	MOVL	AX, 16(SP)
    20  	MOVL	BX, 24(SP)
    21  	
    22  	// create istack out of the given (operating system) stack.
    23  	MOVL	$runtime·g0(SB), DI
    24  	LEAL	(-64*1024+104)(SP), BX
    25  	MOVL	BX, g_stackguard0(DI)
    26  	MOVL	BX, g_stackguard1(DI)
    27  	MOVL	BX, (g_stack+stack_lo)(DI)
    28  	MOVL	SP, (g_stack+stack_hi)(DI)
    29  
    30  	// find out information about the processor we're on
    31  	MOVQ	$0, AX
    32  	CPUID
    33  	CMPQ	AX, $0
    34  	JE	nocpuinfo
    35  	MOVQ	$1, AX
    36  	CPUID
    37  	MOVL	CX, runtime·cpuid_ecx(SB)
    38  	MOVL	DX, runtime·cpuid_edx(SB)
    39  nocpuinfo:	
    40  	
    41  needtls:
    42  	LEAL	runtime·m0+m_tls(SB), DI
    43  	CALL	runtime·settls(SB)
    44  
    45  	// store through it, to make sure it works
    46  	get_tls(BX)
    47  	MOVQ	$0x123, g(BX)
    48  	MOVQ	runtime·m0+m_tls(SB), AX
    49  	CMPQ	AX, $0x123
    50  	JEQ 2(PC)
    51  	MOVL	AX, 0	// abort
    52  ok:
    53  	// set the per-goroutine and per-mach "registers"
    54  	get_tls(BX)
    55  	LEAL	runtime·g0(SB), CX
    56  	MOVL	CX, g(BX)
    57  	LEAL	runtime·m0(SB), AX
    58  
    59  	// save m->g0 = g0
    60  	MOVL	CX, m_g0(AX)
    61  	// save m0 to g0->m
    62  	MOVL	AX, g_m(CX)
    63  
    64  	CLD				// convention is D is always left cleared
    65  	CALL	runtime·check(SB)
    66  
    67  	MOVL	16(SP), AX		// copy argc
    68  	MOVL	AX, 0(SP)
    69  	MOVL	24(SP), AX		// copy argv
    70  	MOVL	AX, 4(SP)
    71  	CALL	runtime·args(SB)
    72  	CALL	runtime·osinit(SB)
    73  	CALL	runtime·schedinit(SB)
    74  
    75  	// create a new goroutine to start program
    76  	MOVL	$runtime·mainPC(SB), AX	// entry
    77  	MOVL	$0, 0(SP)
    78  	MOVL	AX, 4(SP)
    79  	CALL	runtime·newproc(SB)
    80  
    81  	// start this M
    82  	CALL	runtime·mstart(SB)
    83  
    84  	MOVL	$0xf1, 0xf1  // crash
    85  	RET
    86  
    87  DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
    88  GLOBL	runtime·mainPC(SB),RODATA,$4
    89  
    90  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
    91  	INT $3
    92  	RET
    93  
    94  TEXT runtime·asminit(SB),NOSPLIT,$0-0
    95  	// No per-thread init.
    96  	RET
    97  
    98  /*
    99   *  go-routine
   100   */
   101  
   102  // void gosave(Gobuf*)
   103  // save state in Gobuf; setjmp
   104  TEXT runtime·gosave(SB), NOSPLIT, $0-4
   105  	MOVL	buf+0(FP), AX	// gobuf
   106  	LEAL	buf+0(FP), BX	// caller's SP
   107  	MOVL	BX, gobuf_sp(AX)
   108  	MOVL	0(SP), BX		// caller's PC
   109  	MOVL	BX, gobuf_pc(AX)
   110  	MOVL	$0, gobuf_ctxt(AX)
   111  	MOVQ	$0, gobuf_ret(AX)
   112  	get_tls(CX)
   113  	MOVL	g(CX), BX
   114  	MOVL	BX, gobuf_g(AX)
   115  	RET
   116  
   117  // void gogo(Gobuf*)
   118  // restore state from Gobuf; longjmp
   119  TEXT runtime·gogo(SB), NOSPLIT, $0-4
   120  	MOVL	buf+0(FP), BX		// gobuf
   121  	MOVL	gobuf_g(BX), DX
   122  	MOVL	0(DX), CX		// make sure g != nil
   123  	get_tls(CX)
   124  	MOVL	DX, g(CX)
   125  	MOVL	gobuf_sp(BX), SP	// restore SP
   126  	MOVL	gobuf_ctxt(BX), DX
   127  	MOVQ	gobuf_ret(BX), AX
   128  	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   129  	MOVQ	$0, gobuf_ret(BX)
   130  	MOVL	$0, gobuf_ctxt(BX)
   131  	MOVL	gobuf_pc(BX), BX
   132  	JMP	BX
   133  
   134  // func mcall(fn func(*g))
   135  // Switch to m->g0's stack, call fn(g).
   136  // Fn must never return. It should gogo(&g->sched)
   137  // to keep running g.
   138  TEXT runtime·mcall(SB), NOSPLIT, $0-4
   139  	MOVL	fn+0(FP), DI
   140  	
   141  	get_tls(CX)
   142  	MOVL	g(CX), AX	// save state in g->sched
   143  	MOVL	0(SP), BX	// caller's PC
   144  	MOVL	BX, (g_sched+gobuf_pc)(AX)
   145  	LEAL	fn+0(FP), BX	// caller's SP
   146  	MOVL	BX, (g_sched+gobuf_sp)(AX)
   147  	MOVL	AX, (g_sched+gobuf_g)(AX)
   148  
   149  	// switch to m->g0 & its stack, call fn
   150  	MOVL	g(CX), BX
   151  	MOVL	g_m(BX), BX
   152  	MOVL	m_g0(BX), SI
   153  	CMPL	SI, AX	// if g == m->g0 call badmcall
   154  	JNE	3(PC)
   155  	MOVL	$runtime·badmcall(SB), AX
   156  	JMP	AX
   157  	MOVL	SI, g(CX)	// g = m->g0
   158  	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   159  	PUSHQ	AX
   160  	MOVL	DI, DX
   161  	MOVL	0(DI), DI
   162  	CALL	DI
   163  	POPQ	AX
   164  	MOVL	$runtime·badmcall2(SB), AX
   165  	JMP	AX
   166  	RET
   167  
   168  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   169  // of the G stack. We need to distinguish the routine that
   170  // lives at the bottom of the G stack from the one that lives
   171  // at the top of the system stack because the one at the top of
   172  // the system stack terminates the stack walk (see topofstack()).
   173  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   174  	RET
   175  
   176  // func systemstack(fn func())
   177  TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   178  	MOVL	fn+0(FP), DI	// DI = fn
   179  	get_tls(CX)
   180  	MOVL	g(CX), AX	// AX = g
   181  	MOVL	g_m(AX), BX	// BX = m
   182  
   183  	MOVL	m_gsignal(BX), DX	// DX = gsignal
   184  	CMPL	AX, DX
   185  	JEQ	noswitch
   186  
   187  	MOVL	m_g0(BX), DX	// DX = g0
   188  	CMPL	AX, DX
   189  	JEQ	noswitch
   190  
   191  	MOVL	m_curg(BX), R8
   192  	CMPL	AX, R8
   193  	JEQ	switch
   194  	
   195  	// Not g0, not curg. Must be gsignal, but that's not allowed.
   196  	// Hide call from linker nosplit analysis.
   197  	MOVL	$runtime·badsystemstack(SB), AX
   198  	CALL	AX
   199  
   200  switch:
   201  	// save our state in g->sched. Pretend to
   202  	// be systemstack_switch if the G stack is scanned.
   203  	MOVL	$runtime·systemstack_switch(SB), SI
   204  	MOVL	SI, (g_sched+gobuf_pc)(AX)
   205  	MOVL	SP, (g_sched+gobuf_sp)(AX)
   206  	MOVL	AX, (g_sched+gobuf_g)(AX)
   207  
   208  	// switch to g0
   209  	MOVL	DX, g(CX)
   210  	MOVL	(g_sched+gobuf_sp)(DX), SP
   211  
   212  	// call target function
   213  	MOVL	DI, DX
   214  	MOVL	0(DI), DI
   215  	CALL	DI
   216  
   217  	// switch back to g
   218  	get_tls(CX)
   219  	MOVL	g(CX), AX
   220  	MOVL	g_m(AX), BX
   221  	MOVL	m_curg(BX), AX
   222  	MOVL	AX, g(CX)
   223  	MOVL	(g_sched+gobuf_sp)(AX), SP
   224  	MOVL	$0, (g_sched+gobuf_sp)(AX)
   225  	RET
   226  
   227  noswitch:
   228  	// already on m stack, just call directly
   229  	MOVL	DI, DX
   230  	MOVL	0(DI), DI
   231  	CALL	DI
   232  	RET
   233  
   234  /*
   235   * support for morestack
   236   */
   237  
   238  // Called during function prolog when more stack is needed.
   239  //
   240  // The traceback routines see morestack on a g0 as being
   241  // the top of a stack (for example, morestack calling newstack
   242  // calling the scheduler calling newm calling gc), so we must
   243  // record an argument size. For that purpose, it has no arguments.
   244  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   245  	get_tls(CX)
   246  	MOVL	g(CX), BX
   247  	MOVL	g_m(BX), BX
   248  
   249  	// Cannot grow scheduler stack (m->g0).
   250  	MOVL	m_g0(BX), SI
   251  	CMPL	g(CX), SI
   252  	JNE	2(PC)
   253  	MOVL	0, AX
   254  
   255  	// Cannot grow signal stack (m->gsignal).
   256  	MOVL	m_gsignal(BX), SI
   257  	CMPL	g(CX), SI
   258  	JNE	2(PC)
   259  	MOVL	0, AX
   260  
   261  	// Called from f.
   262  	// Set m->morebuf to f's caller.
   263  	MOVL	8(SP), AX	// f's caller's PC
   264  	MOVL	AX, (m_morebuf+gobuf_pc)(BX)
   265  	LEAL	16(SP), AX	// f's caller's SP
   266  	MOVL	AX, (m_morebuf+gobuf_sp)(BX)
   267  	get_tls(CX)
   268  	MOVL	g(CX), SI
   269  	MOVL	SI, (m_morebuf+gobuf_g)(BX)
   270  
   271  	// Set g->sched to context in f.
   272  	MOVL	0(SP), AX // f's PC
   273  	MOVL	AX, (g_sched+gobuf_pc)(SI)
   274  	MOVL	SI, (g_sched+gobuf_g)(SI)
   275  	LEAL	8(SP), AX // f's SP
   276  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   277  	MOVL	DX, (g_sched+gobuf_ctxt)(SI)
   278  
   279  	// Call newstack on m->g0's stack.
   280  	MOVL	m_g0(BX), BX
   281  	MOVL	BX, g(CX)
   282  	MOVL	(g_sched+gobuf_sp)(BX), SP
   283  	CALL	runtime·newstack(SB)
   284  	MOVL	$0, 0x1003	// crash if newstack returns
   285  	RET
   286  
   287  // morestack trampolines
   288  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   289  	MOVL	$0, DX
   290  	JMP	runtime·morestack(SB)
   291  
   292  TEXT runtime·stackBarrier(SB),NOSPLIT,$0
   293  	// We came here via a RET to an overwritten return PC.
   294  	// AX may be live. Other registers are available.
   295  
   296  	// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
   297  	get_tls(CX)
   298  	MOVL	g(CX), CX
   299  	MOVL	(g_stkbar+slice_array)(CX), DX
   300  	MOVL	g_stkbarPos(CX), BX
   301  	IMULL	$stkbar__size, BX	// Too big for SIB.
   302  	ADDL	DX, BX
   303  	MOVL	stkbar_savedLRVal(BX), BX
   304  	// Record that this stack barrier was hit.
   305  	ADDL	$1, g_stkbarPos(CX)
   306  	// Jump to the original return PC.
   307  	JMP	BX
   308  
   309  // reflectcall: call a function with the given argument list
   310  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   311  // we don't have variable-sized frames, so we use a small number
   312  // of constant-sized-frame functions to encode a few bits of size in the pc.
   313  // Caution: ugly multiline assembly macros in your future!
   314  
   315  #define DISPATCH(NAME,MAXSIZE)		\
   316  	CMPL	CX, $MAXSIZE;		\
   317  	JA	3(PC);			\
   318  	MOVL	$NAME(SB), AX;		\
   319  	JMP	AX
   320  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   321  
   322  TEXT reflect·call(SB), NOSPLIT, $0-0
   323  	JMP	·reflectcall(SB)
   324  
   325  TEXT ·reflectcall(SB), NOSPLIT, $0-20
   326  	MOVLQZX argsize+12(FP), CX
   327  	DISPATCH(runtime·call16, 16)
   328  	DISPATCH(runtime·call32, 32)
   329  	DISPATCH(runtime·call64, 64)
   330  	DISPATCH(runtime·call128, 128)
   331  	DISPATCH(runtime·call256, 256)
   332  	DISPATCH(runtime·call512, 512)
   333  	DISPATCH(runtime·call1024, 1024)
   334  	DISPATCH(runtime·call2048, 2048)
   335  	DISPATCH(runtime·call4096, 4096)
   336  	DISPATCH(runtime·call8192, 8192)
   337  	DISPATCH(runtime·call16384, 16384)
   338  	DISPATCH(runtime·call32768, 32768)
   339  	DISPATCH(runtime·call65536, 65536)
   340  	DISPATCH(runtime·call131072, 131072)
   341  	DISPATCH(runtime·call262144, 262144)
   342  	DISPATCH(runtime·call524288, 524288)
   343  	DISPATCH(runtime·call1048576, 1048576)
   344  	DISPATCH(runtime·call2097152, 2097152)
   345  	DISPATCH(runtime·call4194304, 4194304)
   346  	DISPATCH(runtime·call8388608, 8388608)
   347  	DISPATCH(runtime·call16777216, 16777216)
   348  	DISPATCH(runtime·call33554432, 33554432)
   349  	DISPATCH(runtime·call67108864, 67108864)
   350  	DISPATCH(runtime·call134217728, 134217728)
   351  	DISPATCH(runtime·call268435456, 268435456)
   352  	DISPATCH(runtime·call536870912, 536870912)
   353  	DISPATCH(runtime·call1073741824, 1073741824)
   354  	MOVL	$runtime·badreflectcall(SB), AX
   355  	JMP	AX
   356  
   357  #define CALLFN(NAME,MAXSIZE)			\
   358  TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   359  	NO_LOCAL_POINTERS;			\
   360  	/* copy arguments to stack */		\
   361  	MOVL	argptr+8(FP), SI;		\
   362  	MOVL	argsize+12(FP), CX;		\
   363  	MOVL	SP, DI;				\
   364  	REP;MOVSB;				\
   365  	/* call function */			\
   366  	MOVL	f+4(FP), DX;			\
   367  	MOVL	(DX), AX;			\
   368  	CALL	AX;				\
   369  	/* copy return values back */		\
   370  	MOVL	argptr+8(FP), DI;		\
   371  	MOVL	argsize+12(FP), CX;		\
   372  	MOVL	retoffset+16(FP), BX;		\
   373  	MOVL	SP, SI;				\
   374  	ADDL	BX, DI;				\
   375  	ADDL	BX, SI;				\
   376  	SUBL	BX, CX;				\
   377  	REP;MOVSB;				\
   378  	/* execute write barrier updates */	\
   379  	MOVL	argtype+0(FP), DX;		\
   380  	MOVL	argptr+8(FP), DI;		\
   381  	MOVL	argsize+12(FP), CX;		\
   382  	MOVL	retoffset+16(FP), BX;		\
   383  	MOVL	DX, 0(SP);			\
   384  	MOVL	DI, 4(SP);			\
   385  	MOVL	CX, 8(SP);			\
   386  	MOVL	BX, 12(SP);			\
   387  	CALL	runtime·callwritebarrier(SB);	\
   388  	RET
   389  
   390  CALLFN(·call16, 16)
   391  CALLFN(·call32, 32)
   392  CALLFN(·call64, 64)
   393  CALLFN(·call128, 128)
   394  CALLFN(·call256, 256)
   395  CALLFN(·call512, 512)
   396  CALLFN(·call1024, 1024)
   397  CALLFN(·call2048, 2048)
   398  CALLFN(·call4096, 4096)
   399  CALLFN(·call8192, 8192)
   400  CALLFN(·call16384, 16384)
   401  CALLFN(·call32768, 32768)
   402  CALLFN(·call65536, 65536)
   403  CALLFN(·call131072, 131072)
   404  CALLFN(·call262144, 262144)
   405  CALLFN(·call524288, 524288)
   406  CALLFN(·call1048576, 1048576)
   407  CALLFN(·call2097152, 2097152)
   408  CALLFN(·call4194304, 4194304)
   409  CALLFN(·call8388608, 8388608)
   410  CALLFN(·call16777216, 16777216)
   411  CALLFN(·call33554432, 33554432)
   412  CALLFN(·call67108864, 67108864)
   413  CALLFN(·call134217728, 134217728)
   414  CALLFN(·call268435456, 268435456)
   415  CALLFN(·call536870912, 536870912)
   416  CALLFN(·call1073741824, 1073741824)
   417  
   418  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   419  	MOVL	cycles+0(FP), AX
   420  again:
   421  	PAUSE
   422  	SUBL	$1, AX
   423  	JNZ	again
   424  	RET
   425  
   426  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   427  	// Stores are already ordered on x86, so this is just a
   428  	// compile barrier.
   429  	RET
   430  
   431  // void jmpdefer(fn, sp);
   432  // called from deferreturn.
   433  // 1. pop the caller
   434  // 2. sub 5 bytes from the callers return
   435  // 3. jmp to the argument
   436  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   437  	MOVL	fv+0(FP), DX
   438  	MOVL	argp+4(FP), BX
   439  	LEAL	-8(BX), SP	// caller sp after CALL
   440  	SUBL	$5, (SP)	// return to CALL again
   441  	MOVL	0(DX), BX
   442  	JMP	BX	// but first run the deferred function
   443  
   444  // func asmcgocall(fn, arg unsafe.Pointer) int32
   445  // Not implemented.
   446  TEXT runtime·asmcgocall(SB),NOSPLIT,$0-12
   447  	MOVL	0, AX
   448  	RET
   449  
   450  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   451  // Not implemented.
   452  TEXT runtime·cgocallback(SB),NOSPLIT,$0-12
   453  	MOVL	0, AX
   454  	RET
   455  
   456  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
   457  // Not implemented.
   458  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$0-12
   459  	MOVL	0, AX
   460  	RET
   461  
   462  // void setg(G*); set g. for use by needm.
   463  // Not implemented.
   464  TEXT runtime·setg(SB), NOSPLIT, $0-4
   465  	MOVL	0, AX
   466  	RET
   467  
   468  // check that SP is in range [g->stack.lo, g->stack.hi)
   469  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   470  	get_tls(CX)
   471  	MOVL	g(CX), AX
   472  	CMPL	(g_stack+stack_hi)(AX), SP
   473  	JHI	2(PC)
   474  	MOVL	0, AX
   475  	CMPL	SP, (g_stack+stack_lo)(AX)
   476  	JHI	2(PC)
   477  	MOVL	0, AX
   478  	RET
   479  
   480  TEXT runtime·memclr(SB),NOSPLIT,$0-8
   481  	MOVL	ptr+0(FP), DI
   482  	MOVL	n+4(FP), CX
   483  	MOVQ	CX, BX
   484  	ANDQ	$3, BX
   485  	SHRQ	$2, CX
   486  	MOVQ	$0, AX
   487  	CLD
   488  	REP
   489  	STOSL
   490  	MOVQ	BX, CX
   491  	REP
   492  	STOSB
   493  	// Note: we zero only 4 bytes at a time so that the tail is at most
   494  	// 3 bytes. That guarantees that we aren't zeroing pointers with STOSB.
   495  	// See issue 13160.
   496  	RET
   497  
   498  TEXT runtime·getcallerpc(SB),NOSPLIT,$8-12
   499  	MOVL	argp+0(FP),AX		// addr of first arg
   500  	MOVL	-8(AX),AX		// get calling pc
   501  	CMPL	AX, runtime·stackBarrierPC(SB)
   502  	JNE	nobar
   503  	// Get original return PC.
   504  	CALL	runtime·nextBarrierPC(SB)
   505  	MOVL	0(SP), AX
   506  nobar:
   507  	MOVL	AX, ret+8(FP)
   508  	RET
   509  
   510  TEXT runtime·setcallerpc(SB),NOSPLIT,$8-8
   511  	MOVL	argp+0(FP),AX		// addr of first arg
   512  	MOVL	pc+4(FP), BX		// pc to set
   513  	MOVL	-8(AX), CX
   514  	CMPL	CX, runtime·stackBarrierPC(SB)
   515  	JEQ	setbar
   516  	MOVQ	BX, -8(AX)		// set calling pc
   517  	RET
   518  setbar:
   519  	// Set the stack barrier return PC.
   520  	MOVL	BX, 0(SP)
   521  	CALL	runtime·setNextBarrierPC(SB)
   522  	RET
   523  
   524  TEXT runtime·getcallersp(SB),NOSPLIT,$0-12
   525  	MOVL	argp+0(FP), AX
   526  	MOVL	AX, ret+8(FP)
   527  	RET
   528  
   529  // int64 runtime·cputicks(void)
   530  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   531  	RDTSC
   532  	SHLQ	$32, DX
   533  	ADDQ	DX, AX
   534  	MOVQ	AX, ret+0(FP)
   535  	RET
   536  
   537  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   538  // redirects to memhash(p, h, size) using the size
   539  // stored in the closure.
   540  TEXT runtime·memhash_varlen(SB),NOSPLIT,$24-12
   541  	GO_ARGS
   542  	NO_LOCAL_POINTERS
   543  	MOVL	p+0(FP), AX
   544  	MOVL	h+4(FP), BX
   545  	MOVL	4(DX), CX
   546  	MOVL	AX, 0(SP)
   547  	MOVL	BX, 4(SP)
   548  	MOVL	CX, 8(SP)
   549  	CALL	runtime·memhash(SB)
   550  	MOVL	16(SP), AX
   551  	MOVL	AX, ret+8(FP)
   552  	RET
   553  
   554  // hash function using AES hardware instructions
   555  // For now, our one amd64p32 system (NaCl) does not
   556  // support using AES instructions, so have not bothered to
   557  // write the implementations. Can copy and adjust the ones
   558  // in asm_amd64.s when the time comes.
   559  
   560  TEXT runtime·aeshash(SB),NOSPLIT,$0-20
   561  	MOVL	AX, ret+16(FP)
   562  	RET
   563  
   564  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-20
   565  	MOVL	AX, ret+16(FP)
   566  	RET
   567  
   568  TEXT runtime·aeshash32(SB),NOSPLIT,$0-20
   569  	MOVL	AX, ret+16(FP)
   570  	RET
   571  
   572  TEXT runtime·aeshash64(SB),NOSPLIT,$0-20
   573  	MOVL	AX, ret+16(FP)
   574  	RET
   575  
   576  // memequal(p, q unsafe.Pointer, size uintptr) bool
   577  TEXT runtime·memequal(SB),NOSPLIT,$0-13
   578  	MOVL	a+0(FP), SI
   579  	MOVL	b+4(FP), DI
   580  	CMPL	SI, DI
   581  	JEQ	eq
   582  	MOVL	size+8(FP), BX
   583  	CALL	runtime·memeqbody(SB)
   584  	MOVB	AX, ret+16(FP)
   585  	RET
   586  eq:
   587  	MOVB    $1, ret+16(FP)
   588  	RET
   589  
   590  // memequal_varlen(a, b unsafe.Pointer) bool
   591  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
   592  	MOVL    a+0(FP), SI
   593  	MOVL    b+4(FP), DI
   594  	CMPL    SI, DI
   595  	JEQ     eq
   596  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
   597  	CALL    runtime·memeqbody(SB)
   598  	MOVB    AX, ret+8(FP)
   599  	RET
   600  eq:
   601  	MOVB    $1, ret+8(FP)
   602  	RET
   603  
   604  // eqstring tests whether two strings are equal.
   605  // The compiler guarantees that strings passed
   606  // to eqstring have equal length.
   607  // See runtime_test.go:eqstring_generic for
   608  // equivalent Go code.
   609  TEXT runtime·eqstring(SB),NOSPLIT,$0-17
   610  	MOVL	s1str+0(FP), SI
   611  	MOVL	s2str+8(FP), DI
   612  	CMPL	SI, DI
   613  	JEQ	same
   614  	MOVL	s1len+4(FP), BX
   615  	CALL	runtime·memeqbody(SB)
   616  	MOVB	AX, v+16(FP)
   617  	RET
   618  same:
   619  	MOVB	$1, v+16(FP)
   620  	RET
   621  
   622  // a in SI
   623  // b in DI
   624  // count in BX
   625  TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
   626  	XORQ	AX, AX
   627  
   628  	CMPQ	BX, $8
   629  	JB	small
   630  	
   631  	// 64 bytes at a time using xmm registers
   632  hugeloop:
   633  	CMPQ	BX, $64
   634  	JB	bigloop
   635  	MOVOU	(SI), X0
   636  	MOVOU	(DI), X1
   637  	MOVOU	16(SI), X2
   638  	MOVOU	16(DI), X3
   639  	MOVOU	32(SI), X4
   640  	MOVOU	32(DI), X5
   641  	MOVOU	48(SI), X6
   642  	MOVOU	48(DI), X7
   643  	PCMPEQB	X1, X0
   644  	PCMPEQB	X3, X2
   645  	PCMPEQB	X5, X4
   646  	PCMPEQB	X7, X6
   647  	PAND	X2, X0
   648  	PAND	X6, X4
   649  	PAND	X4, X0
   650  	PMOVMSKB X0, DX
   651  	ADDQ	$64, SI
   652  	ADDQ	$64, DI
   653  	SUBQ	$64, BX
   654  	CMPL	DX, $0xffff
   655  	JEQ	hugeloop
   656  	RET
   657  
   658  	// 8 bytes at a time using 64-bit register
   659  bigloop:
   660  	CMPQ	BX, $8
   661  	JBE	leftover
   662  	MOVQ	(SI), CX
   663  	MOVQ	(DI), DX
   664  	ADDQ	$8, SI
   665  	ADDQ	$8, DI
   666  	SUBQ	$8, BX
   667  	CMPQ	CX, DX
   668  	JEQ	bigloop
   669  	RET
   670  
   671  	// remaining 0-8 bytes
   672  leftover:
   673  	ADDQ	BX, SI
   674  	ADDQ	BX, DI
   675  	MOVQ	-8(SI), CX
   676  	MOVQ	-8(DI), DX
   677  	CMPQ	CX, DX
   678  	SETEQ	AX
   679  	RET
   680  
   681  small:
   682  	CMPQ	BX, $0
   683  	JEQ	equal
   684  
   685  	LEAQ	0(BX*8), CX
   686  	NEGQ	CX
   687  
   688  	CMPB	SI, $0xf8
   689  	JA	si_high
   690  
   691  	// load at SI won't cross a page boundary.
   692  	MOVQ	(SI), SI
   693  	JMP	si_finish
   694  si_high:
   695  	// address ends in 11111xxx. Load up to bytes we want, move to correct position.
   696  	MOVQ	BX, DX
   697  	ADDQ	SI, DX
   698  	MOVQ	-8(DX), SI
   699  	SHRQ	CX, SI
   700  si_finish:
   701  
   702  	// same for DI.
   703  	CMPB	DI, $0xf8
   704  	JA	di_high
   705  	MOVQ	(DI), DI
   706  	JMP	di_finish
   707  di_high:
   708  	MOVQ	BX, DX
   709  	ADDQ	DI, DX
   710  	MOVQ	-8(DX), DI
   711  	SHRQ	CX, DI
   712  di_finish:
   713  
   714  	SUBQ	SI, DI
   715  	SHLQ	CX, DI
   716  equal:
   717  	SETEQ	AX
   718  	RET
   719  
   720  TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
   721  	MOVL	s1_base+0(FP), SI
   722  	MOVL	s1_len+4(FP), BX
   723  	MOVL	s2_base+8(FP), DI
   724  	MOVL	s2_len+12(FP), DX
   725  	CALL	runtime·cmpbody(SB)
   726  	MOVL	AX, ret+16(FP)
   727  	RET
   728  
   729  TEXT bytes·Compare(SB),NOSPLIT,$0-28
   730  	MOVL	s1+0(FP), SI
   731  	MOVL	s1+4(FP), BX
   732  	MOVL	s2+12(FP), DI
   733  	MOVL	s2+16(FP), DX
   734  	CALL	runtime·cmpbody(SB)
   735  	MOVL	AX, res+24(FP)
   736  	RET
   737  
   738  // input:
   739  //   SI = a
   740  //   DI = b
   741  //   BX = alen
   742  //   DX = blen
   743  // output:
   744  //   AX = 1/0/-1
   745  TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
   746  	CMPQ	SI, DI
   747  	JEQ	allsame
   748  	CMPQ	BX, DX
   749  	MOVQ	DX, R8
   750  	CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
   751  	CMPQ	R8, $8
   752  	JB	small
   753  
   754  loop:
   755  	CMPQ	R8, $16
   756  	JBE	_0through16
   757  	MOVOU	(SI), X0
   758  	MOVOU	(DI), X1
   759  	PCMPEQB X0, X1
   760  	PMOVMSKB X1, AX
   761  	XORQ	$0xffff, AX	// convert EQ to NE
   762  	JNE	diff16	// branch if at least one byte is not equal
   763  	ADDQ	$16, SI
   764  	ADDQ	$16, DI
   765  	SUBQ	$16, R8
   766  	JMP	loop
   767  	
   768  	// AX = bit mask of differences
   769  diff16:
   770  	BSFQ	AX, BX	// index of first byte that differs
   771  	XORQ	AX, AX
   772  	ADDQ	BX, SI
   773  	MOVB	(SI), CX
   774  	ADDQ	BX, DI
   775  	CMPB	CX, (DI)
   776  	SETHI	AX
   777  	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
   778  	RET
   779  
   780  	// 0 through 16 bytes left, alen>=8, blen>=8
   781  _0through16:
   782  	CMPQ	R8, $8
   783  	JBE	_0through8
   784  	MOVQ	(SI), AX
   785  	MOVQ	(DI), CX
   786  	CMPQ	AX, CX
   787  	JNE	diff8
   788  _0through8:
   789  	ADDQ	R8, SI
   790  	ADDQ	R8, DI
   791  	MOVQ	-8(SI), AX
   792  	MOVQ	-8(DI), CX
   793  	CMPQ	AX, CX
   794  	JEQ	allsame
   795  
   796  	// AX and CX contain parts of a and b that differ.
   797  diff8:
   798  	BSWAPQ	AX	// reverse order of bytes
   799  	BSWAPQ	CX
   800  	XORQ	AX, CX
   801  	BSRQ	CX, CX	// index of highest bit difference
   802  	SHRQ	CX, AX	// move a's bit to bottom
   803  	ANDQ	$1, AX	// mask bit
   804  	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
   805  	RET
   806  
   807  	// 0-7 bytes in common
   808  small:
   809  	LEAQ	(R8*8), CX	// bytes left -> bits left
   810  	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
   811  	JEQ	allsame
   812  
   813  	// load bytes of a into high bytes of AX
   814  	CMPB	SI, $0xf8
   815  	JA	si_high
   816  	MOVQ	(SI), SI
   817  	JMP	si_finish
   818  si_high:
   819  	ADDQ	R8, SI
   820  	MOVQ	-8(SI), SI
   821  	SHRQ	CX, SI
   822  si_finish:
   823  	SHLQ	CX, SI
   824  
   825  	// load bytes of b in to high bytes of BX
   826  	CMPB	DI, $0xf8
   827  	JA	di_high
   828  	MOVQ	(DI), DI
   829  	JMP	di_finish
   830  di_high:
   831  	ADDQ	R8, DI
   832  	MOVQ	-8(DI), DI
   833  	SHRQ	CX, DI
   834  di_finish:
   835  	SHLQ	CX, DI
   836  
   837  	BSWAPQ	SI	// reverse order of bytes
   838  	BSWAPQ	DI
   839  	XORQ	SI, DI	// find bit differences
   840  	JEQ	allsame
   841  	BSRQ	DI, CX	// index of highest bit difference
   842  	SHRQ	CX, SI	// move a's bit to bottom
   843  	ANDQ	$1, SI	// mask bit
   844  	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
   845  	RET
   846  
   847  allsame:
   848  	XORQ	AX, AX
   849  	XORQ	CX, CX
   850  	CMPQ	BX, DX
   851  	SETGT	AX	// 1 if alen > blen
   852  	SETEQ	CX	// 1 if alen == blen
   853  	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
   854  	RET
   855  
   856  TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
   857  	MOVL s+0(FP), SI
   858  	MOVL s_len+4(FP), BX
   859  	MOVB c+12(FP), AL
   860  	CALL runtime·indexbytebody(SB)
   861  	MOVL AX, ret+16(FP)
   862  	RET
   863  
   864  TEXT strings·IndexByte(SB),NOSPLIT,$0-20
   865  	MOVL s+0(FP), SI
   866  	MOVL s_len+4(FP), BX
   867  	MOVB c+8(FP), AL
   868  	CALL runtime·indexbytebody(SB)
   869  	MOVL AX, ret+16(FP)
   870  	RET
   871  
   872  // input:
   873  //   SI: data
   874  //   BX: data len
   875  //   AL: byte sought
   876  // output:
   877  //   AX
   878  TEXT runtime·indexbytebody(SB),NOSPLIT,$0
   879  	MOVL SI, DI
   880  
   881  	CMPL BX, $16
   882  	JLT small
   883  
   884  	// round up to first 16-byte boundary
   885  	TESTL $15, SI
   886  	JZ aligned
   887  	MOVL SI, CX
   888  	ANDL $~15, CX
   889  	ADDL $16, CX
   890  
   891  	// search the beginning
   892  	SUBL SI, CX
   893  	REPN; SCASB
   894  	JZ success
   895  
   896  // DI is 16-byte aligned; get ready to search using SSE instructions
   897  aligned:
   898  	// round down to last 16-byte boundary
   899  	MOVL BX, R11
   900  	ADDL SI, R11
   901  	ANDL $~15, R11
   902  
   903  	// shuffle X0 around so that each byte contains c
   904  	MOVD AX, X0
   905  	PUNPCKLBW X0, X0
   906  	PUNPCKLBW X0, X0
   907  	PSHUFL $0, X0, X0
   908  	JMP condition
   909  
   910  sse:
   911  	// move the next 16-byte chunk of the buffer into X1
   912  	MOVO (DI), X1
   913  	// compare bytes in X0 to X1
   914  	PCMPEQB X0, X1
   915  	// take the top bit of each byte in X1 and put the result in DX
   916  	PMOVMSKB X1, DX
   917  	TESTL DX, DX
   918  	JNZ ssesuccess
   919  	ADDL $16, DI
   920  
   921  condition:
   922  	CMPL DI, R11
   923  	JLT sse
   924  
   925  	// search the end
   926  	MOVL SI, CX
   927  	ADDL BX, CX
   928  	SUBL R11, CX
   929  	// if CX == 0, the zero flag will be set and we'll end up
   930  	// returning a false success
   931  	JZ failure
   932  	REPN; SCASB
   933  	JZ success
   934  
   935  failure:
   936  	MOVL $-1, AX
   937  	RET
   938  
   939  // handle for lengths < 16
   940  small:
   941  	MOVL BX, CX
   942  	REPN; SCASB
   943  	JZ success
   944  	MOVL $-1, AX
   945  	RET
   946  
   947  // we've found the chunk containing the byte
   948  // now just figure out which specific byte it is
   949  ssesuccess:
   950  	// get the index of the least significant set bit
   951  	BSFW DX, DX
   952  	SUBL SI, DI
   953  	ADDL DI, DX
   954  	MOVL DX, AX
   955  	RET
   956  
   957  success:
   958  	SUBL SI, DI
   959  	SUBL $1, DI
   960  	MOVL DI, AX
   961  	RET
   962  
   963  TEXT bytes·Equal(SB),NOSPLIT,$0-25
   964  	MOVL	a_len+4(FP), BX
   965  	MOVL	b_len+16(FP), CX
   966  	XORL	AX, AX
   967  	CMPL	BX, CX
   968  	JNE	eqret
   969  	MOVL	a+0(FP), SI
   970  	MOVL	b+12(FP), DI
   971  	CALL	runtime·memeqbody(SB)
   972  eqret:
   973  	MOVB	AX, ret+24(FP)
   974  	RET
   975  
   976  TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
   977  	get_tls(CX)
   978  	MOVL	g(CX), AX
   979  	MOVL	g_m(AX), AX
   980  	MOVL	m_fastrand(AX), DX
   981  	ADDL	DX, DX
   982  	MOVL	DX, BX
   983  	XORL	$0x88888eef, DX
   984  	CMOVLMI	BX, DX
   985  	MOVL	DX, m_fastrand(AX)
   986  	MOVL	DX, ret+0(FP)
   987  	RET
   988  
   989  TEXT runtime·return0(SB), NOSPLIT, $0
   990  	MOVL	$0, AX
   991  	RET
   992  
   993  // The top-most function running on a goroutine
   994  // returns to goexit+PCQuantum.
   995  TEXT runtime·goexit(SB),NOSPLIT,$0-0
   996  	BYTE	$0x90	// NOP
   997  	CALL	runtime·goexit1(SB)	// does not return
   998  	// traceback from goexit1 must hit code range of goexit
   999  	BYTE	$0x90	// NOP
  1000  
  1001  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
  1002  	MOVL	addr+0(FP), AX
  1003  	PREFETCHT0	(AX)
  1004  	RET
  1005  
  1006  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
  1007  	MOVL	addr+0(FP), AX
  1008  	PREFETCHT1	(AX)
  1009  	RET
  1010  
  1011  
  1012  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
  1013  	MOVL	addr+0(FP), AX
  1014  	PREFETCHT2	(AX)
  1015  	RET
  1016  
  1017  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
  1018  	MOVL	addr+0(FP), AX
  1019  	PREFETCHNTA	(AX)
  1020  	RET
  1021  
  1022  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1023  	MOVB	$1, ret+0(FP)
  1024  	RET