github.com/FenixAra/go@v0.0.0-20170127160404-96ea0918e670/src/runtime/asm_amd64p32.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// copy arguments forward on an even stack
    12  	MOVL	argc+0(FP), AX
    13  	MOVL	argv+4(FP), BX
    14  	MOVL	SP, CX
    15  	SUBL	$128, CX		// plenty of scratch
    16  	ANDL	$~15, CX
    17  	MOVL	CX, SP
    18  
    19  	MOVL	AX, 16(SP)
    20  	MOVL	BX, 24(SP)
    21  	
    22  	// create istack out of the given (operating system) stack.
    23  	MOVL	$runtime·g0(SB), DI
    24  	LEAL	(-64*1024+104)(SP), BX
    25  	MOVL	BX, g_stackguard0(DI)
    26  	MOVL	BX, g_stackguard1(DI)
    27  	MOVL	BX, (g_stack+stack_lo)(DI)
    28  	MOVL	SP, (g_stack+stack_hi)(DI)
    29  
    30  	// find out information about the processor we're on
    31  	MOVQ	$0, AX
    32  	CPUID
    33  	CMPQ	AX, $0
    34  	JE	nocpuinfo
    35  	MOVQ	$1, AX
    36  	CPUID
    37  	MOVL	CX, runtime·cpuid_ecx(SB)
    38  	MOVL	DX, runtime·cpuid_edx(SB)
    39  nocpuinfo:	
    40  	
    41  needtls:
    42  	LEAL	runtime·m0+m_tls(SB), DI
    43  	CALL	runtime·settls(SB)
    44  
    45  	// store through it, to make sure it works
    46  	get_tls(BX)
    47  	MOVQ	$0x123, g(BX)
    48  	MOVQ	runtime·m0+m_tls(SB), AX
    49  	CMPQ	AX, $0x123
    50  	JEQ 2(PC)
    51  	MOVL	AX, 0	// abort
    52  ok:
    53  	// set the per-goroutine and per-mach "registers"
    54  	get_tls(BX)
    55  	LEAL	runtime·g0(SB), CX
    56  	MOVL	CX, g(BX)
    57  	LEAL	runtime·m0(SB), AX
    58  
    59  	// save m->g0 = g0
    60  	MOVL	CX, m_g0(AX)
    61  	// save m0 to g0->m
    62  	MOVL	AX, g_m(CX)
    63  
    64  	CLD				// convention is D is always left cleared
    65  	CALL	runtime·check(SB)
    66  
    67  	MOVL	16(SP), AX		// copy argc
    68  	MOVL	AX, 0(SP)
    69  	MOVL	24(SP), AX		// copy argv
    70  	MOVL	AX, 4(SP)
    71  	CALL	runtime·args(SB)
    72  	CALL	runtime·osinit(SB)
    73  	CALL	runtime·schedinit(SB)
    74  
    75  	// create a new goroutine to start program
    76  	MOVL	$runtime·mainPC(SB), AX	// entry
    77  	MOVL	$0, 0(SP)
    78  	MOVL	AX, 4(SP)
    79  	CALL	runtime·newproc(SB)
    80  
    81  	// start this M
    82  	CALL	runtime·mstart(SB)
    83  
    84  	MOVL	$0xf1, 0xf1  // crash
    85  	RET
    86  
    87  DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
    88  GLOBL	runtime·mainPC(SB),RODATA,$4
    89  
    90  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
    91  	INT $3
    92  	RET
    93  
    94  TEXT runtime·asminit(SB),NOSPLIT,$0-0
    95  	// No per-thread init.
    96  	RET
    97  
    98  /*
    99   *  go-routine
   100   */
   101  
   102  // void gosave(Gobuf*)
   103  // save state in Gobuf; setjmp
   104  TEXT runtime·gosave(SB), NOSPLIT, $0-4
   105  	MOVL	buf+0(FP), AX	// gobuf
   106  	LEAL	buf+0(FP), BX	// caller's SP
   107  	MOVL	BX, gobuf_sp(AX)
   108  	MOVL	0(SP), BX		// caller's PC
   109  	MOVL	BX, gobuf_pc(AX)
   110  	MOVQ	$0, gobuf_ret(AX)
   111  	// Assert ctxt is zero. See func save.
   112  	MOVL	gobuf_ctxt(AX), BX
   113  	TESTL	BX, BX
   114  	JZ	2(PC)
   115  	CALL	runtime·badctxt(SB)
   116  	get_tls(CX)
   117  	MOVL	g(CX), BX
   118  	MOVL	BX, gobuf_g(AX)
   119  	RET
   120  
   121  // void gogo(Gobuf*)
   122  // restore state from Gobuf; longjmp
   123  TEXT runtime·gogo(SB), NOSPLIT, $8-4
   124  	MOVL	buf+0(FP), BX		// gobuf
   125  
   126  	// If ctxt is not nil, invoke deletion barrier before overwriting.
   127  	MOVL	gobuf_ctxt(BX), DX
   128  	TESTL	DX, DX
   129  	JZ	nilctxt
   130  	LEAL	gobuf_ctxt(BX), AX
   131  	MOVL	AX, 0(SP)
   132  	MOVL	$0, 4(SP)
   133  	CALL	runtime·writebarrierptr_prewrite(SB)
   134  	MOVL	buf+0(FP), BX
   135  
   136  nilctxt:
   137  	MOVL	gobuf_g(BX), DX
   138  	MOVL	0(DX), CX		// make sure g != nil
   139  	get_tls(CX)
   140  	MOVL	DX, g(CX)
   141  	MOVL	gobuf_sp(BX), SP	// restore SP
   142  	MOVL	gobuf_ctxt(BX), DX
   143  	MOVQ	gobuf_ret(BX), AX
   144  	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   145  	MOVQ	$0, gobuf_ret(BX)
   146  	MOVL	$0, gobuf_ctxt(BX)
   147  	MOVL	gobuf_pc(BX), BX
   148  	JMP	BX
   149  
   150  // func mcall(fn func(*g))
   151  // Switch to m->g0's stack, call fn(g).
   152  // Fn must never return. It should gogo(&g->sched)
   153  // to keep running g.
   154  TEXT runtime·mcall(SB), NOSPLIT, $0-4
   155  	MOVL	fn+0(FP), DI
   156  	
   157  	get_tls(CX)
   158  	MOVL	g(CX), AX	// save state in g->sched
   159  	MOVL	0(SP), BX	// caller's PC
   160  	MOVL	BX, (g_sched+gobuf_pc)(AX)
   161  	LEAL	fn+0(FP), BX	// caller's SP
   162  	MOVL	BX, (g_sched+gobuf_sp)(AX)
   163  	MOVL	AX, (g_sched+gobuf_g)(AX)
   164  
   165  	// switch to m->g0 & its stack, call fn
   166  	MOVL	g(CX), BX
   167  	MOVL	g_m(BX), BX
   168  	MOVL	m_g0(BX), SI
   169  	CMPL	SI, AX	// if g == m->g0 call badmcall
   170  	JNE	3(PC)
   171  	MOVL	$runtime·badmcall(SB), AX
   172  	JMP	AX
   173  	MOVL	SI, g(CX)	// g = m->g0
   174  	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   175  	PUSHQ	AX
   176  	MOVL	DI, DX
   177  	MOVL	0(DI), DI
   178  	CALL	DI
   179  	POPQ	AX
   180  	MOVL	$runtime·badmcall2(SB), AX
   181  	JMP	AX
   182  	RET
   183  
   184  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   185  // of the G stack. We need to distinguish the routine that
   186  // lives at the bottom of the G stack from the one that lives
   187  // at the top of the system stack because the one at the top of
   188  // the system stack terminates the stack walk (see topofstack()).
   189  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   190  	RET
   191  
   192  // func systemstack(fn func())
   193  TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   194  	MOVL	fn+0(FP), DI	// DI = fn
   195  	get_tls(CX)
   196  	MOVL	g(CX), AX	// AX = g
   197  	MOVL	g_m(AX), BX	// BX = m
   198  
   199  	MOVL	m_gsignal(BX), DX	// DX = gsignal
   200  	CMPL	AX, DX
   201  	JEQ	noswitch
   202  
   203  	MOVL	m_g0(BX), DX	// DX = g0
   204  	CMPL	AX, DX
   205  	JEQ	noswitch
   206  
   207  	MOVL	m_curg(BX), R8
   208  	CMPL	AX, R8
   209  	JEQ	switch
   210  	
   211  	// Not g0, not curg. Must be gsignal, but that's not allowed.
   212  	// Hide call from linker nosplit analysis.
   213  	MOVL	$runtime·badsystemstack(SB), AX
   214  	CALL	AX
   215  
   216  switch:
   217  	// save our state in g->sched. Pretend to
   218  	// be systemstack_switch if the G stack is scanned.
   219  	MOVL	$runtime·systemstack_switch(SB), SI
   220  	MOVL	SI, (g_sched+gobuf_pc)(AX)
   221  	MOVL	SP, (g_sched+gobuf_sp)(AX)
   222  	MOVL	AX, (g_sched+gobuf_g)(AX)
   223  
   224  	// switch to g0
   225  	MOVL	DX, g(CX)
   226  	MOVL	(g_sched+gobuf_sp)(DX), SP
   227  
   228  	// call target function
   229  	MOVL	DI, DX
   230  	MOVL	0(DI), DI
   231  	CALL	DI
   232  
   233  	// switch back to g
   234  	get_tls(CX)
   235  	MOVL	g(CX), AX
   236  	MOVL	g_m(AX), BX
   237  	MOVL	m_curg(BX), AX
   238  	MOVL	AX, g(CX)
   239  	MOVL	(g_sched+gobuf_sp)(AX), SP
   240  	MOVL	$0, (g_sched+gobuf_sp)(AX)
   241  	RET
   242  
   243  noswitch:
   244  	// already on m stack, just call directly
   245  	MOVL	DI, DX
   246  	MOVL	0(DI), DI
   247  	CALL	DI
   248  	RET
   249  
   250  /*
   251   * support for morestack
   252   */
   253  
   254  // Called during function prolog when more stack is needed.
   255  //
   256  // The traceback routines see morestack on a g0 as being
   257  // the top of a stack (for example, morestack calling newstack
   258  // calling the scheduler calling newm calling gc), so we must
   259  // record an argument size. For that purpose, it has no arguments.
   260  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   261  	get_tls(CX)
   262  	MOVL	g(CX), BX
   263  	MOVL	g_m(BX), BX
   264  
   265  	// Cannot grow scheduler stack (m->g0).
   266  	MOVL	m_g0(BX), SI
   267  	CMPL	g(CX), SI
   268  	JNE	3(PC)
   269  	CALL	runtime·badmorestackg0(SB)
   270  	MOVL	0, AX
   271  
   272  	// Cannot grow signal stack (m->gsignal).
   273  	MOVL	m_gsignal(BX), SI
   274  	CMPL	g(CX), SI
   275  	JNE	3(PC)
   276  	CALL	runtime·badmorestackgsignal(SB)
   277  	MOVL	0, AX
   278  
   279  	// Called from f.
   280  	// Set m->morebuf to f's caller.
   281  	MOVL	8(SP), AX	// f's caller's PC
   282  	MOVL	AX, (m_morebuf+gobuf_pc)(BX)
   283  	LEAL	16(SP), AX	// f's caller's SP
   284  	MOVL	AX, (m_morebuf+gobuf_sp)(BX)
   285  	get_tls(CX)
   286  	MOVL	g(CX), SI
   287  	MOVL	SI, (m_morebuf+gobuf_g)(BX)
   288  
   289  	// Set g->sched to context in f.
   290  	MOVL	0(SP), AX // f's PC
   291  	MOVL	AX, (g_sched+gobuf_pc)(SI)
   292  	MOVL	SI, (g_sched+gobuf_g)(SI)
   293  	LEAL	8(SP), AX // f's SP
   294  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   295  	// newstack will fill gobuf.ctxt.
   296  
   297  	// Call newstack on m->g0's stack.
   298  	MOVL	m_g0(BX), BX
   299  	MOVL	BX, g(CX)
   300  	MOVL	(g_sched+gobuf_sp)(BX), SP
   301  	PUSHQ	DX	// ctxt argument
   302  	CALL	runtime·newstack(SB)
   303  	MOVL	$0, 0x1003	// crash if newstack returns
   304  	POPQ	DX	// keep balance check happy
   305  	RET
   306  
   307  // morestack trampolines
   308  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   309  	MOVL	$0, DX
   310  	JMP	runtime·morestack(SB)
   311  
   312  TEXT runtime·stackBarrier(SB),NOSPLIT,$0
   313  	// We came here via a RET to an overwritten return PC.
   314  	// AX may be live. Other registers are available.
   315  
   316  	// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
   317  	get_tls(CX)
   318  	MOVL	g(CX), CX
   319  	MOVL	(g_stkbar+slice_array)(CX), DX
   320  	MOVL	g_stkbarPos(CX), BX
   321  	IMULL	$stkbar__size, BX	// Too big for SIB.
   322  	ADDL	DX, BX
   323  	MOVL	stkbar_savedLRVal(BX), BX
   324  	// Record that this stack barrier was hit.
   325  	ADDL	$1, g_stkbarPos(CX)
   326  	// Jump to the original return PC.
   327  	JMP	BX
   328  
   329  // reflectcall: call a function with the given argument list
   330  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   331  // we don't have variable-sized frames, so we use a small number
   332  // of constant-sized-frame functions to encode a few bits of size in the pc.
   333  // Caution: ugly multiline assembly macros in your future!
   334  
   335  #define DISPATCH(NAME,MAXSIZE)		\
   336  	CMPL	CX, $MAXSIZE;		\
   337  	JA	3(PC);			\
   338  	MOVL	$NAME(SB), AX;		\
   339  	JMP	AX
   340  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   341  
   342  TEXT reflect·call(SB), NOSPLIT, $0-0
   343  	JMP	·reflectcall(SB)
   344  
   345  TEXT ·reflectcall(SB), NOSPLIT, $0-20
   346  	MOVLQZX argsize+12(FP), CX
   347  	DISPATCH(runtime·call16, 16)
   348  	DISPATCH(runtime·call32, 32)
   349  	DISPATCH(runtime·call64, 64)
   350  	DISPATCH(runtime·call128, 128)
   351  	DISPATCH(runtime·call256, 256)
   352  	DISPATCH(runtime·call512, 512)
   353  	DISPATCH(runtime·call1024, 1024)
   354  	DISPATCH(runtime·call2048, 2048)
   355  	DISPATCH(runtime·call4096, 4096)
   356  	DISPATCH(runtime·call8192, 8192)
   357  	DISPATCH(runtime·call16384, 16384)
   358  	DISPATCH(runtime·call32768, 32768)
   359  	DISPATCH(runtime·call65536, 65536)
   360  	DISPATCH(runtime·call131072, 131072)
   361  	DISPATCH(runtime·call262144, 262144)
   362  	DISPATCH(runtime·call524288, 524288)
   363  	DISPATCH(runtime·call1048576, 1048576)
   364  	DISPATCH(runtime·call2097152, 2097152)
   365  	DISPATCH(runtime·call4194304, 4194304)
   366  	DISPATCH(runtime·call8388608, 8388608)
   367  	DISPATCH(runtime·call16777216, 16777216)
   368  	DISPATCH(runtime·call33554432, 33554432)
   369  	DISPATCH(runtime·call67108864, 67108864)
   370  	DISPATCH(runtime·call134217728, 134217728)
   371  	DISPATCH(runtime·call268435456, 268435456)
   372  	DISPATCH(runtime·call536870912, 536870912)
   373  	DISPATCH(runtime·call1073741824, 1073741824)
   374  	MOVL	$runtime·badreflectcall(SB), AX
   375  	JMP	AX
   376  
   377  #define CALLFN(NAME,MAXSIZE)			\
   378  TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   379  	NO_LOCAL_POINTERS;			\
   380  	/* copy arguments to stack */		\
   381  	MOVL	argptr+8(FP), SI;		\
   382  	MOVL	argsize+12(FP), CX;		\
   383  	MOVL	SP, DI;				\
   384  	REP;MOVSB;				\
   385  	/* call function */			\
   386  	MOVL	f+4(FP), DX;			\
   387  	MOVL	(DX), AX;			\
   388  	CALL	AX;				\
   389  	/* copy return values back */		\
   390  	MOVL	argtype+0(FP), DX;		\
   391  	MOVL	argptr+8(FP), DI;		\
   392  	MOVL	argsize+12(FP), CX;		\
   393  	MOVL	retoffset+16(FP), BX;		\
   394  	MOVL	SP, SI;				\
   395  	ADDL	BX, DI;				\
   396  	ADDL	BX, SI;				\
   397  	SUBL	BX, CX;				\
   398  	CALL	callRet<>(SB);			\
   399  	RET
   400  
   401  // callRet copies return values back at the end of call*. This is a
   402  // separate function so it can allocate stack space for the arguments
   403  // to reflectcallmove. It does not follow the Go ABI; it expects its
   404  // arguments in registers.
   405  TEXT callRet<>(SB), NOSPLIT, $16-0
   406  	MOVL	DX, 0(SP)
   407  	MOVL	DI, 4(SP)
   408  	MOVL	SI, 8(SP)
   409  	MOVL	CX, 12(SP)
   410  	CALL	runtime·reflectcallmove(SB)
   411  	RET
   412  
   413  CALLFN(·call16, 16)
   414  CALLFN(·call32, 32)
   415  CALLFN(·call64, 64)
   416  CALLFN(·call128, 128)
   417  CALLFN(·call256, 256)
   418  CALLFN(·call512, 512)
   419  CALLFN(·call1024, 1024)
   420  CALLFN(·call2048, 2048)
   421  CALLFN(·call4096, 4096)
   422  CALLFN(·call8192, 8192)
   423  CALLFN(·call16384, 16384)
   424  CALLFN(·call32768, 32768)
   425  CALLFN(·call65536, 65536)
   426  CALLFN(·call131072, 131072)
   427  CALLFN(·call262144, 262144)
   428  CALLFN(·call524288, 524288)
   429  CALLFN(·call1048576, 1048576)
   430  CALLFN(·call2097152, 2097152)
   431  CALLFN(·call4194304, 4194304)
   432  CALLFN(·call8388608, 8388608)
   433  CALLFN(·call16777216, 16777216)
   434  CALLFN(·call33554432, 33554432)
   435  CALLFN(·call67108864, 67108864)
   436  CALLFN(·call134217728, 134217728)
   437  CALLFN(·call268435456, 268435456)
   438  CALLFN(·call536870912, 536870912)
   439  CALLFN(·call1073741824, 1073741824)
   440  
   441  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   442  	MOVL	cycles+0(FP), AX
   443  again:
   444  	PAUSE
   445  	SUBL	$1, AX
   446  	JNZ	again
   447  	RET
   448  
   449  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   450  	// Stores are already ordered on x86, so this is just a
   451  	// compile barrier.
   452  	RET
   453  
   454  // void jmpdefer(fn, sp);
   455  // called from deferreturn.
   456  // 1. pop the caller
   457  // 2. sub 5 bytes from the callers return
   458  // 3. jmp to the argument
   459  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   460  	MOVL	fv+0(FP), DX
   461  	MOVL	argp+4(FP), BX
   462  	LEAL	-8(BX), SP	// caller sp after CALL
   463  	SUBL	$5, (SP)	// return to CALL again
   464  	MOVL	0(DX), BX
   465  	JMP	BX	// but first run the deferred function
   466  
   467  // func asmcgocall(fn, arg unsafe.Pointer) int32
   468  // Not implemented.
   469  TEXT runtime·asmcgocall(SB),NOSPLIT,$0-12
   470  	MOVL	0, AX
   471  	RET
   472  
   473  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   474  // Not implemented.
   475  TEXT runtime·cgocallback(SB),NOSPLIT,$0-16
   476  	MOVL	0, AX
   477  	RET
   478  
   479  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
   480  // Not implemented.
   481  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$0-16
   482  	MOVL	0, AX
   483  	RET
   484  
   485  // void setg(G*); set g. for use by needm.
   486  // Not implemented.
   487  TEXT runtime·setg(SB), NOSPLIT, $0-4
   488  	MOVL	0, AX
   489  	RET
   490  
   491  // check that SP is in range [g->stack.lo, g->stack.hi)
   492  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   493  	get_tls(CX)
   494  	MOVL	g(CX), AX
   495  	CMPL	(g_stack+stack_hi)(AX), SP
   496  	JHI	2(PC)
   497  	MOVL	0, AX
   498  	CMPL	SP, (g_stack+stack_lo)(AX)
   499  	JHI	2(PC)
   500  	MOVL	0, AX
   501  	RET
   502  
   503  TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-8
   504  	MOVL	ptr+0(FP), DI
   505  	MOVL	n+4(FP), CX
   506  	MOVQ	CX, BX
   507  	ANDQ	$3, BX
   508  	SHRQ	$2, CX
   509  	MOVQ	$0, AX
   510  	CLD
   511  	REP
   512  	STOSL
   513  	MOVQ	BX, CX
   514  	REP
   515  	STOSB
   516  	// Note: we zero only 4 bytes at a time so that the tail is at most
   517  	// 3 bytes. That guarantees that we aren't zeroing pointers with STOSB.
   518  	// See issue 13160.
   519  	RET
   520  
   521  TEXT runtime·getcallerpc(SB),NOSPLIT,$8-12
   522  	MOVL	argp+0(FP),AX		// addr of first arg
   523  	MOVL	-8(AX),AX		// get calling pc
   524  	CMPL	AX, runtime·stackBarrierPC(SB)
   525  	JNE	nobar
   526  	// Get original return PC.
   527  	CALL	runtime·nextBarrierPC(SB)
   528  	MOVL	0(SP), AX
   529  nobar:
   530  	MOVL	AX, ret+8(FP)
   531  	RET
   532  
   533  TEXT runtime·setcallerpc(SB),NOSPLIT,$8-8
   534  	MOVL	argp+0(FP),AX		// addr of first arg
   535  	MOVL	pc+4(FP), BX		// pc to set
   536  	MOVL	-8(AX), CX
   537  	CMPL	CX, runtime·stackBarrierPC(SB)
   538  	JEQ	setbar
   539  	MOVQ	BX, -8(AX)		// set calling pc
   540  	RET
   541  setbar:
   542  	// Set the stack barrier return PC.
   543  	MOVL	BX, 0(SP)
   544  	CALL	runtime·setNextBarrierPC(SB)
   545  	RET
   546  
   547  // int64 runtime·cputicks(void)
   548  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   549  	RDTSC
   550  	SHLQ	$32, DX
   551  	ADDQ	DX, AX
   552  	MOVQ	AX, ret+0(FP)
   553  	RET
   554  
   555  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   556  // redirects to memhash(p, h, size) using the size
   557  // stored in the closure.
   558  TEXT runtime·memhash_varlen(SB),NOSPLIT,$24-12
   559  	GO_ARGS
   560  	NO_LOCAL_POINTERS
   561  	MOVL	p+0(FP), AX
   562  	MOVL	h+4(FP), BX
   563  	MOVL	4(DX), CX
   564  	MOVL	AX, 0(SP)
   565  	MOVL	BX, 4(SP)
   566  	MOVL	CX, 8(SP)
   567  	CALL	runtime·memhash(SB)
   568  	MOVL	16(SP), AX
   569  	MOVL	AX, ret+8(FP)
   570  	RET
   571  
   572  // hash function using AES hardware instructions
   573  // For now, our one amd64p32 system (NaCl) does not
   574  // support using AES instructions, so have not bothered to
   575  // write the implementations. Can copy and adjust the ones
   576  // in asm_amd64.s when the time comes.
   577  
   578  TEXT runtime·aeshash(SB),NOSPLIT,$0-20
   579  	MOVL	AX, ret+16(FP)
   580  	RET
   581  
   582  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
   583  	MOVL	AX, ret+8(FP)
   584  	RET
   585  
   586  TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
   587  	MOVL	AX, ret+8(FP)
   588  	RET
   589  
   590  TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
   591  	MOVL	AX, ret+8(FP)
   592  	RET
   593  
   594  // memequal(p, q unsafe.Pointer, size uintptr) bool
   595  TEXT runtime·memequal(SB),NOSPLIT,$0-17
   596  	MOVL	a+0(FP), SI
   597  	MOVL	b+4(FP), DI
   598  	CMPL	SI, DI
   599  	JEQ	eq
   600  	MOVL	size+8(FP), BX
   601  	CALL	runtime·memeqbody(SB)
   602  	MOVB	AX, ret+16(FP)
   603  	RET
   604  eq:
   605  	MOVB    $1, ret+16(FP)
   606  	RET
   607  
   608  // memequal_varlen(a, b unsafe.Pointer) bool
   609  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
   610  	MOVL    a+0(FP), SI
   611  	MOVL    b+4(FP), DI
   612  	CMPL    SI, DI
   613  	JEQ     eq
   614  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
   615  	CALL    runtime·memeqbody(SB)
   616  	MOVB    AX, ret+8(FP)
   617  	RET
   618  eq:
   619  	MOVB    $1, ret+8(FP)
   620  	RET
   621  
   622  // eqstring tests whether two strings are equal.
   623  // The compiler guarantees that strings passed
   624  // to eqstring have equal length.
   625  // See runtime_test.go:eqstring_generic for
   626  // equivalent Go code.
   627  TEXT runtime·eqstring(SB),NOSPLIT,$0-17
   628  	MOVL	s1_base+0(FP), SI
   629  	MOVL	s2_base+8(FP), DI
   630  	CMPL	SI, DI
   631  	JEQ	same
   632  	MOVL	s1_len+4(FP), BX
   633  	CALL	runtime·memeqbody(SB)
   634  	MOVB	AX, ret+16(FP)
   635  	RET
   636  same:
   637  	MOVB	$1, ret+16(FP)
   638  	RET
   639  
   640  // a in SI
   641  // b in DI
   642  // count in BX
   643  TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
   644  	XORQ	AX, AX
   645  
   646  	CMPQ	BX, $8
   647  	JB	small
   648  	
   649  	// 64 bytes at a time using xmm registers
   650  hugeloop:
   651  	CMPQ	BX, $64
   652  	JB	bigloop
   653  	MOVOU	(SI), X0
   654  	MOVOU	(DI), X1
   655  	MOVOU	16(SI), X2
   656  	MOVOU	16(DI), X3
   657  	MOVOU	32(SI), X4
   658  	MOVOU	32(DI), X5
   659  	MOVOU	48(SI), X6
   660  	MOVOU	48(DI), X7
   661  	PCMPEQB	X1, X0
   662  	PCMPEQB	X3, X2
   663  	PCMPEQB	X5, X4
   664  	PCMPEQB	X7, X6
   665  	PAND	X2, X0
   666  	PAND	X6, X4
   667  	PAND	X4, X0
   668  	PMOVMSKB X0, DX
   669  	ADDQ	$64, SI
   670  	ADDQ	$64, DI
   671  	SUBQ	$64, BX
   672  	CMPL	DX, $0xffff
   673  	JEQ	hugeloop
   674  	RET
   675  
   676  	// 8 bytes at a time using 64-bit register
   677  bigloop:
   678  	CMPQ	BX, $8
   679  	JBE	leftover
   680  	MOVQ	(SI), CX
   681  	MOVQ	(DI), DX
   682  	ADDQ	$8, SI
   683  	ADDQ	$8, DI
   684  	SUBQ	$8, BX
   685  	CMPQ	CX, DX
   686  	JEQ	bigloop
   687  	RET
   688  
   689  	// remaining 0-8 bytes
   690  leftover:
   691  	ADDQ	BX, SI
   692  	ADDQ	BX, DI
   693  	MOVQ	-8(SI), CX
   694  	MOVQ	-8(DI), DX
   695  	CMPQ	CX, DX
   696  	SETEQ	AX
   697  	RET
   698  
   699  small:
   700  	CMPQ	BX, $0
   701  	JEQ	equal
   702  
   703  	LEAQ	0(BX*8), CX
   704  	NEGQ	CX
   705  
   706  	CMPB	SI, $0xf8
   707  	JA	si_high
   708  
   709  	// load at SI won't cross a page boundary.
   710  	MOVQ	(SI), SI
   711  	JMP	si_finish
   712  si_high:
   713  	// address ends in 11111xxx. Load up to bytes we want, move to correct position.
   714  	MOVQ	BX, DX
   715  	ADDQ	SI, DX
   716  	MOVQ	-8(DX), SI
   717  	SHRQ	CX, SI
   718  si_finish:
   719  
   720  	// same for DI.
   721  	CMPB	DI, $0xf8
   722  	JA	di_high
   723  	MOVQ	(DI), DI
   724  	JMP	di_finish
   725  di_high:
   726  	MOVQ	BX, DX
   727  	ADDQ	DI, DX
   728  	MOVQ	-8(DX), DI
   729  	SHRQ	CX, DI
   730  di_finish:
   731  
   732  	SUBQ	SI, DI
   733  	SHLQ	CX, DI
   734  equal:
   735  	SETEQ	AX
   736  	RET
   737  
   738  TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
   739  	MOVL	s1_base+0(FP), SI
   740  	MOVL	s1_len+4(FP), BX
   741  	MOVL	s2_base+8(FP), DI
   742  	MOVL	s2_len+12(FP), DX
   743  	CALL	runtime·cmpbody(SB)
   744  	MOVL	AX, ret+16(FP)
   745  	RET
   746  
   747  TEXT bytes·Compare(SB),NOSPLIT,$0-28
   748  	MOVL	s1+0(FP), SI
   749  	MOVL	s1+4(FP), BX
   750  	MOVL	s2+12(FP), DI
   751  	MOVL	s2+16(FP), DX
   752  	CALL	runtime·cmpbody(SB)
   753  	MOVL	AX, res+24(FP)
   754  	RET
   755  
   756  // input:
   757  //   SI = a
   758  //   DI = b
   759  //   BX = alen
   760  //   DX = blen
   761  // output:
   762  //   AX = 1/0/-1
   763  TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
   764  	CMPQ	SI, DI
   765  	JEQ	allsame
   766  	CMPQ	BX, DX
   767  	MOVQ	DX, R8
   768  	CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
   769  	CMPQ	R8, $8
   770  	JB	small
   771  
   772  loop:
   773  	CMPQ	R8, $16
   774  	JBE	_0through16
   775  	MOVOU	(SI), X0
   776  	MOVOU	(DI), X1
   777  	PCMPEQB X0, X1
   778  	PMOVMSKB X1, AX
   779  	XORQ	$0xffff, AX	// convert EQ to NE
   780  	JNE	diff16	// branch if at least one byte is not equal
   781  	ADDQ	$16, SI
   782  	ADDQ	$16, DI
   783  	SUBQ	$16, R8
   784  	JMP	loop
   785  	
   786  	// AX = bit mask of differences
   787  diff16:
   788  	BSFQ	AX, BX	// index of first byte that differs
   789  	XORQ	AX, AX
   790  	ADDQ	BX, SI
   791  	MOVB	(SI), CX
   792  	ADDQ	BX, DI
   793  	CMPB	CX, (DI)
   794  	SETHI	AX
   795  	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
   796  	RET
   797  
   798  	// 0 through 16 bytes left, alen>=8, blen>=8
   799  _0through16:
   800  	CMPQ	R8, $8
   801  	JBE	_0through8
   802  	MOVQ	(SI), AX
   803  	MOVQ	(DI), CX
   804  	CMPQ	AX, CX
   805  	JNE	diff8
   806  _0through8:
   807  	ADDQ	R8, SI
   808  	ADDQ	R8, DI
   809  	MOVQ	-8(SI), AX
   810  	MOVQ	-8(DI), CX
   811  	CMPQ	AX, CX
   812  	JEQ	allsame
   813  
   814  	// AX and CX contain parts of a and b that differ.
   815  diff8:
   816  	BSWAPQ	AX	// reverse order of bytes
   817  	BSWAPQ	CX
   818  	XORQ	AX, CX
   819  	BSRQ	CX, CX	// index of highest bit difference
   820  	SHRQ	CX, AX	// move a's bit to bottom
   821  	ANDQ	$1, AX	// mask bit
   822  	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
   823  	RET
   824  
   825  	// 0-7 bytes in common
   826  small:
   827  	LEAQ	(R8*8), CX	// bytes left -> bits left
   828  	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
   829  	JEQ	allsame
   830  
   831  	// load bytes of a into high bytes of AX
   832  	CMPB	SI, $0xf8
   833  	JA	si_high
   834  	MOVQ	(SI), SI
   835  	JMP	si_finish
   836  si_high:
   837  	ADDQ	R8, SI
   838  	MOVQ	-8(SI), SI
   839  	SHRQ	CX, SI
   840  si_finish:
   841  	SHLQ	CX, SI
   842  
   843  	// load bytes of b in to high bytes of BX
   844  	CMPB	DI, $0xf8
   845  	JA	di_high
   846  	MOVQ	(DI), DI
   847  	JMP	di_finish
   848  di_high:
   849  	ADDQ	R8, DI
   850  	MOVQ	-8(DI), DI
   851  	SHRQ	CX, DI
   852  di_finish:
   853  	SHLQ	CX, DI
   854  
   855  	BSWAPQ	SI	// reverse order of bytes
   856  	BSWAPQ	DI
   857  	XORQ	SI, DI	// find bit differences
   858  	JEQ	allsame
   859  	BSRQ	DI, CX	// index of highest bit difference
   860  	SHRQ	CX, SI	// move a's bit to bottom
   861  	ANDQ	$1, SI	// mask bit
   862  	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
   863  	RET
   864  
   865  allsame:
   866  	XORQ	AX, AX
   867  	XORQ	CX, CX
   868  	CMPQ	BX, DX
   869  	SETGT	AX	// 1 if alen > blen
   870  	SETEQ	CX	// 1 if alen == blen
   871  	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
   872  	RET
   873  
   874  TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
   875  	MOVL s+0(FP), SI
   876  	MOVL s_len+4(FP), BX
   877  	MOVB c+12(FP), AL
   878  	CALL runtime·indexbytebody(SB)
   879  	MOVL AX, ret+16(FP)
   880  	RET
   881  
   882  TEXT strings·IndexByte(SB),NOSPLIT,$0-20
   883  	MOVL s+0(FP), SI
   884  	MOVL s_len+4(FP), BX
   885  	MOVB c+8(FP), AL
   886  	CALL runtime·indexbytebody(SB)
   887  	MOVL AX, ret+16(FP)
   888  	RET
   889  
   890  // input:
   891  //   SI: data
   892  //   BX: data len
   893  //   AL: byte sought
   894  // output:
   895  //   AX
   896  TEXT runtime·indexbytebody(SB),NOSPLIT,$0
   897  	MOVL SI, DI
   898  
   899  	CMPL BX, $16
   900  	JLT small
   901  
   902  	// round up to first 16-byte boundary
   903  	TESTL $15, SI
   904  	JZ aligned
   905  	MOVL SI, CX
   906  	ANDL $~15, CX
   907  	ADDL $16, CX
   908  
   909  	// search the beginning
   910  	SUBL SI, CX
   911  	REPN; SCASB
   912  	JZ success
   913  
   914  // DI is 16-byte aligned; get ready to search using SSE instructions
   915  aligned:
   916  	// round down to last 16-byte boundary
   917  	MOVL BX, R11
   918  	ADDL SI, R11
   919  	ANDL $~15, R11
   920  
   921  	// shuffle X0 around so that each byte contains c
   922  	MOVD AX, X0
   923  	PUNPCKLBW X0, X0
   924  	PUNPCKLBW X0, X0
   925  	PSHUFL $0, X0, X0
   926  	JMP condition
   927  
   928  sse:
   929  	// move the next 16-byte chunk of the buffer into X1
   930  	MOVO (DI), X1
   931  	// compare bytes in X0 to X1
   932  	PCMPEQB X0, X1
   933  	// take the top bit of each byte in X1 and put the result in DX
   934  	PMOVMSKB X1, DX
   935  	TESTL DX, DX
   936  	JNZ ssesuccess
   937  	ADDL $16, DI
   938  
   939  condition:
   940  	CMPL DI, R11
   941  	JLT sse
   942  
   943  	// search the end
   944  	MOVL SI, CX
   945  	ADDL BX, CX
   946  	SUBL R11, CX
   947  	// if CX == 0, the zero flag will be set and we'll end up
   948  	// returning a false success
   949  	JZ failure
   950  	REPN; SCASB
   951  	JZ success
   952  
   953  failure:
   954  	MOVL $-1, AX
   955  	RET
   956  
   957  // handle for lengths < 16
   958  small:
   959  	MOVL BX, CX
   960  	REPN; SCASB
   961  	JZ success
   962  	MOVL $-1, AX
   963  	RET
   964  
   965  // we've found the chunk containing the byte
   966  // now just figure out which specific byte it is
   967  ssesuccess:
   968  	// get the index of the least significant set bit
   969  	BSFW DX, DX
   970  	SUBL SI, DI
   971  	ADDL DI, DX
   972  	MOVL DX, AX
   973  	RET
   974  
   975  success:
   976  	SUBL SI, DI
   977  	SUBL $1, DI
   978  	MOVL DI, AX
   979  	RET
   980  
   981  TEXT bytes·Equal(SB),NOSPLIT,$0-25
   982  	MOVL	a_len+4(FP), BX
   983  	MOVL	b_len+16(FP), CX
   984  	XORL	AX, AX
   985  	CMPL	BX, CX
   986  	JNE	eqret
   987  	MOVL	a+0(FP), SI
   988  	MOVL	b+12(FP), DI
   989  	CALL	runtime·memeqbody(SB)
   990  eqret:
   991  	MOVB	AX, ret+24(FP)
   992  	RET
   993  
   994  TEXT runtime·fastrand(SB), NOSPLIT, $0-4
   995  	get_tls(CX)
   996  	MOVL	g(CX), AX
   997  	MOVL	g_m(AX), AX
   998  	MOVL	m_fastrand(AX), DX
   999  	ADDL	DX, DX
  1000  	MOVL	DX, BX
  1001  	XORL	$0x88888eef, DX
  1002  	CMOVLMI	BX, DX
  1003  	MOVL	DX, m_fastrand(AX)
  1004  	MOVL	DX, ret+0(FP)
  1005  	RET
  1006  
  1007  TEXT runtime·return0(SB), NOSPLIT, $0
  1008  	MOVL	$0, AX
  1009  	RET
  1010  
  1011  // The top-most function running on a goroutine
  1012  // returns to goexit+PCQuantum.
  1013  TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1014  	BYTE	$0x90	// NOP
  1015  	CALL	runtime·goexit1(SB)	// does not return
  1016  	// traceback from goexit1 must hit code range of goexit
  1017  	BYTE	$0x90	// NOP
  1018  
  1019  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
  1020  	MOVL	addr+0(FP), AX
  1021  	PREFETCHT0	(AX)
  1022  	RET
  1023  
  1024  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
  1025  	MOVL	addr+0(FP), AX
  1026  	PREFETCHT1	(AX)
  1027  	RET
  1028  
  1029  
  1030  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
  1031  	MOVL	addr+0(FP), AX
  1032  	PREFETCHT2	(AX)
  1033  	RET
  1034  
  1035  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
  1036  	MOVL	addr+0(FP), AX
  1037  	PREFETCHNTA	(AX)
  1038  	RET
  1039  
  1040  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1041  	MOVB	$1, ret+0(FP)
  1042  	RET