github.com/aloncn/graphics-go@v0.0.1/src/runtime/asm_amd64p32.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// copy arguments forward on an even stack
    12  	MOVL	argc+0(FP), AX
    13  	MOVL	argv+4(FP), BX
    14  	MOVL	SP, CX
    15  	SUBL	$128, SP		// plenty of scratch
    16  	ANDL	$~15, CX
    17  	MOVL	CX, SP
    18  
    19  	MOVL	AX, 16(SP)
    20  	MOVL	BX, 24(SP)
    21  	
    22  	// create istack out of the given (operating system) stack.
    23  	MOVL	$runtime·g0(SB), DI
    24  	LEAL	(-64*1024+104)(SP), BX
    25  	MOVL	BX, g_stackguard0(DI)
    26  	MOVL	BX, g_stackguard1(DI)
    27  	MOVL	BX, (g_stack+stack_lo)(DI)
    28  	MOVL	SP, (g_stack+stack_hi)(DI)
    29  
    30  	// find out information about the processor we're on
    31  	MOVQ	$0, AX
    32  	CPUID
    33  	CMPQ	AX, $0
    34  	JE	nocpuinfo
    35  	MOVQ	$1, AX
    36  	CPUID
    37  	MOVL	CX, runtime·cpuid_ecx(SB)
    38  	MOVL	DX, runtime·cpuid_edx(SB)
    39  nocpuinfo:	
    40  	
    41  needtls:
    42  	LEAL	runtime·m0+m_tls(SB), DI
    43  	CALL	runtime·settls(SB)
    44  
    45  	// store through it, to make sure it works
    46  	get_tls(BX)
    47  	MOVQ	$0x123, g(BX)
    48  	MOVQ	runtime·m0+m_tls(SB), AX
    49  	CMPQ	AX, $0x123
    50  	JEQ 2(PC)
    51  	MOVL	AX, 0	// abort
    52  ok:
    53  	// set the per-goroutine and per-mach "registers"
    54  	get_tls(BX)
    55  	LEAL	runtime·g0(SB), CX
    56  	MOVL	CX, g(BX)
    57  	LEAL	runtime·m0(SB), AX
    58  
    59  	// save m->g0 = g0
    60  	MOVL	CX, m_g0(AX)
    61  	// save m0 to g0->m
    62  	MOVL	AX, g_m(CX)
    63  
    64  	CLD				// convention is D is always left cleared
    65  	CALL	runtime·check(SB)
    66  
    67  	MOVL	16(SP), AX		// copy argc
    68  	MOVL	AX, 0(SP)
    69  	MOVL	24(SP), AX		// copy argv
    70  	MOVL	AX, 4(SP)
    71  	CALL	runtime·args(SB)
    72  	CALL	runtime·osinit(SB)
    73  	CALL	runtime·schedinit(SB)
    74  
    75  	// create a new goroutine to start program
    76  	MOVL	$runtime·mainPC(SB), AX	// entry
    77  	MOVL	$0, 0(SP)
    78  	MOVL	AX, 4(SP)
    79  	CALL	runtime·newproc(SB)
    80  
    81  	// start this M
    82  	CALL	runtime·mstart(SB)
    83  
    84  	MOVL	$0xf1, 0xf1  // crash
    85  	RET
    86  
    87  DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
    88  GLOBL	runtime·mainPC(SB),RODATA,$4
    89  
    90  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
    91  	INT $3
    92  	RET
    93  
    94  TEXT runtime·asminit(SB),NOSPLIT,$0-0
    95  	// No per-thread init.
    96  	RET
    97  
    98  /*
    99   *  go-routine
   100   */
   101  
   102  // void gosave(Gobuf*)
   103  // save state in Gobuf; setjmp
   104  TEXT runtime·gosave(SB), NOSPLIT, $0-4
   105  	MOVL	buf+0(FP), AX	// gobuf
   106  	LEAL	buf+0(FP), BX	// caller's SP
   107  	MOVL	BX, gobuf_sp(AX)
   108  	MOVL	0(SP), BX		// caller's PC
   109  	MOVL	BX, gobuf_pc(AX)
   110  	MOVL	$0, gobuf_ctxt(AX)
   111  	MOVQ	$0, gobuf_ret(AX)
   112  	get_tls(CX)
   113  	MOVL	g(CX), BX
   114  	MOVL	BX, gobuf_g(AX)
   115  	RET
   116  
   117  // void gogo(Gobuf*)
   118  // restore state from Gobuf; longjmp
   119  TEXT runtime·gogo(SB), NOSPLIT, $0-4
   120  	MOVL	buf+0(FP), BX		// gobuf
   121  	MOVL	gobuf_g(BX), DX
   122  	MOVL	0(DX), CX		// make sure g != nil
   123  	get_tls(CX)
   124  	MOVL	DX, g(CX)
   125  	MOVL	gobuf_sp(BX), SP	// restore SP
   126  	MOVL	gobuf_ctxt(BX), DX
   127  	MOVQ	gobuf_ret(BX), AX
   128  	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   129  	MOVQ	$0, gobuf_ret(BX)
   130  	MOVL	$0, gobuf_ctxt(BX)
   131  	MOVL	gobuf_pc(BX), BX
   132  	JMP	BX
   133  
   134  // func mcall(fn func(*g))
   135  // Switch to m->g0's stack, call fn(g).
   136  // Fn must never return.  It should gogo(&g->sched)
   137  // to keep running g.
   138  TEXT runtime·mcall(SB), NOSPLIT, $0-4
   139  	MOVL	fn+0(FP), DI
   140  	
   141  	get_tls(CX)
   142  	MOVL	g(CX), AX	// save state in g->sched
   143  	MOVL	0(SP), BX	// caller's PC
   144  	MOVL	BX, (g_sched+gobuf_pc)(AX)
   145  	LEAL	fn+0(FP), BX	// caller's SP
   146  	MOVL	BX, (g_sched+gobuf_sp)(AX)
   147  	MOVL	AX, (g_sched+gobuf_g)(AX)
   148  
   149  	// switch to m->g0 & its stack, call fn
   150  	MOVL	g(CX), BX
   151  	MOVL	g_m(BX), BX
   152  	MOVL	m_g0(BX), SI
   153  	CMPL	SI, AX	// if g == m->g0 call badmcall
   154  	JNE	3(PC)
   155  	MOVL	$runtime·badmcall(SB), AX
   156  	JMP	AX
   157  	MOVL	SI, g(CX)	// g = m->g0
   158  	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   159  	PUSHQ	AX
   160  	MOVL	DI, DX
   161  	MOVL	0(DI), DI
   162  	CALL	DI
   163  	POPQ	AX
   164  	MOVL	$runtime·badmcall2(SB), AX
   165  	JMP	AX
   166  	RET
   167  
   168  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   169  // of the G stack.  We need to distinguish the routine that
   170  // lives at the bottom of the G stack from the one that lives
   171  // at the top of the system stack because the one at the top of
   172  // the system stack terminates the stack walk (see topofstack()).
   173  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   174  	RET
   175  
   176  // func systemstack(fn func())
   177  TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   178  	MOVL	fn+0(FP), DI	// DI = fn
   179  	get_tls(CX)
   180  	MOVL	g(CX), AX	// AX = g
   181  	MOVL	g_m(AX), BX	// BX = m
   182  
   183  	MOVL	m_gsignal(BX), DX	// DX = gsignal
   184  	CMPL	AX, DX
   185  	JEQ	noswitch
   186  
   187  	MOVL	m_g0(BX), DX	// DX = g0
   188  	CMPL	AX, DX
   189  	JEQ	noswitch
   190  
   191  	MOVL	m_curg(BX), R8
   192  	CMPL	AX, R8
   193  	JEQ	switch
   194  	
   195  	// Not g0, not curg. Must be gsignal, but that's not allowed.
   196  	// Hide call from linker nosplit analysis.
   197  	MOVL	$runtime·badsystemstack(SB), AX
   198  	CALL	AX
   199  
   200  switch:
   201  	// save our state in g->sched.  Pretend to
   202  	// be systemstack_switch if the G stack is scanned.
   203  	MOVL	$runtime·systemstack_switch(SB), SI
   204  	MOVL	SI, (g_sched+gobuf_pc)(AX)
   205  	MOVL	SP, (g_sched+gobuf_sp)(AX)
   206  	MOVL	AX, (g_sched+gobuf_g)(AX)
   207  
   208  	// switch to g0
   209  	MOVL	DX, g(CX)
   210  	MOVL	(g_sched+gobuf_sp)(DX), SP
   211  
   212  	// call target function
   213  	MOVL	DI, DX
   214  	MOVL	0(DI), DI
   215  	CALL	DI
   216  
   217  	// switch back to g
   218  	get_tls(CX)
   219  	MOVL	g(CX), AX
   220  	MOVL	g_m(AX), BX
   221  	MOVL	m_curg(BX), AX
   222  	MOVL	AX, g(CX)
   223  	MOVL	(g_sched+gobuf_sp)(AX), SP
   224  	MOVL	$0, (g_sched+gobuf_sp)(AX)
   225  	RET
   226  
   227  noswitch:
   228  	// already on m stack, just call directly
   229  	MOVL	DI, DX
   230  	MOVL	0(DI), DI
   231  	CALL	DI
   232  	RET
   233  
   234  /*
   235   * support for morestack
   236   */
   237  
   238  // Called during function prolog when more stack is needed.
   239  //
   240  // The traceback routines see morestack on a g0 as being
   241  // the top of a stack (for example, morestack calling newstack
   242  // calling the scheduler calling newm calling gc), so we must
   243  // record an argument size. For that purpose, it has no arguments.
   244  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   245  	get_tls(CX)
   246  	MOVL	g(CX), BX
   247  	MOVL	g_m(BX), BX
   248  
   249  	// Cannot grow scheduler stack (m->g0).
   250  	MOVL	m_g0(BX), SI
   251  	CMPL	g(CX), SI
   252  	JNE	2(PC)
   253  	MOVL	0, AX
   254  
   255  	// Cannot grow signal stack (m->gsignal).
   256  	MOVL	m_gsignal(BX), SI
   257  	CMPL	g(CX), SI
   258  	JNE	2(PC)
   259  	MOVL	0, AX
   260  
   261  	// Called from f.
   262  	// Set m->morebuf to f's caller.
   263  	MOVL	8(SP), AX	// f's caller's PC
   264  	MOVL	AX, (m_morebuf+gobuf_pc)(BX)
   265  	LEAL	16(SP), AX	// f's caller's SP
   266  	MOVL	AX, (m_morebuf+gobuf_sp)(BX)
   267  	get_tls(CX)
   268  	MOVL	g(CX), SI
   269  	MOVL	SI, (m_morebuf+gobuf_g)(BX)
   270  
   271  	// Set g->sched to context in f.
   272  	MOVL	0(SP), AX // f's PC
   273  	MOVL	AX, (g_sched+gobuf_pc)(SI)
   274  	MOVL	SI, (g_sched+gobuf_g)(SI)
   275  	LEAL	8(SP), AX // f's SP
   276  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   277  	MOVL	DX, (g_sched+gobuf_ctxt)(SI)
   278  
   279  	// Call newstack on m->g0's stack.
   280  	MOVL	m_g0(BX), BX
   281  	MOVL	BX, g(CX)
   282  	MOVL	(g_sched+gobuf_sp)(BX), SP
   283  	CALL	runtime·newstack(SB)
   284  	MOVL	$0, 0x1003	// crash if newstack returns
   285  	RET
   286  
   287  // morestack trampolines
   288  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   289  	MOVL	$0, DX
   290  	JMP	runtime·morestack(SB)
   291  
   292  TEXT runtime·stackBarrier(SB),NOSPLIT,$0
   293  	// We came here via a RET to an overwritten return PC.
   294  	// AX may be live. Other registers are available.
   295  
   296  	// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
   297  	get_tls(CX)
   298  	MOVL	g(CX), CX
   299  	MOVL	(g_stkbar+slice_array)(CX), DX
   300  	MOVL	g_stkbarPos(CX), BX
   301  	IMULL	$stkbar__size, BX	// Too big for SIB.
   302  	ADDL	DX, BX
   303  	MOVL	stkbar_savedLRVal(BX), BX
   304  	// Record that this stack barrier was hit.
   305  	ADDL	$1, g_stkbarPos(CX)
   306  	// Jump to the original return PC.
   307  	JMP	BX
   308  
   309  // reflectcall: call a function with the given argument list
   310  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   311  // we don't have variable-sized frames, so we use a small number
   312  // of constant-sized-frame functions to encode a few bits of size in the pc.
   313  // Caution: ugly multiline assembly macros in your future!
   314  
   315  #define DISPATCH(NAME,MAXSIZE)		\
   316  	CMPL	CX, $MAXSIZE;		\
   317  	JA	3(PC);			\
   318  	MOVL	$NAME(SB), AX;		\
   319  	JMP	AX
   320  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   321  
   322  TEXT reflect·call(SB), NOSPLIT, $0-0
   323  	JMP	·reflectcall(SB)
   324  
   325  TEXT ·reflectcall(SB), NOSPLIT, $0-20
   326  	MOVLQZX argsize+12(FP), CX
   327  	DISPATCH(runtime·call16, 16)
   328  	DISPATCH(runtime·call32, 32)
   329  	DISPATCH(runtime·call64, 64)
   330  	DISPATCH(runtime·call128, 128)
   331  	DISPATCH(runtime·call256, 256)
   332  	DISPATCH(runtime·call512, 512)
   333  	DISPATCH(runtime·call1024, 1024)
   334  	DISPATCH(runtime·call2048, 2048)
   335  	DISPATCH(runtime·call4096, 4096)
   336  	DISPATCH(runtime·call8192, 8192)
   337  	DISPATCH(runtime·call16384, 16384)
   338  	DISPATCH(runtime·call32768, 32768)
   339  	DISPATCH(runtime·call65536, 65536)
   340  	DISPATCH(runtime·call131072, 131072)
   341  	DISPATCH(runtime·call262144, 262144)
   342  	DISPATCH(runtime·call524288, 524288)
   343  	DISPATCH(runtime·call1048576, 1048576)
   344  	DISPATCH(runtime·call2097152, 2097152)
   345  	DISPATCH(runtime·call4194304, 4194304)
   346  	DISPATCH(runtime·call8388608, 8388608)
   347  	DISPATCH(runtime·call16777216, 16777216)
   348  	DISPATCH(runtime·call33554432, 33554432)
   349  	DISPATCH(runtime·call67108864, 67108864)
   350  	DISPATCH(runtime·call134217728, 134217728)
   351  	DISPATCH(runtime·call268435456, 268435456)
   352  	DISPATCH(runtime·call536870912, 536870912)
   353  	DISPATCH(runtime·call1073741824, 1073741824)
   354  	MOVL	$runtime·badreflectcall(SB), AX
   355  	JMP	AX
   356  
   357  #define CALLFN(NAME,MAXSIZE)			\
   358  TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   359  	NO_LOCAL_POINTERS;			\
   360  	/* copy arguments to stack */		\
   361  	MOVL	argptr+8(FP), SI;		\
   362  	MOVL	argsize+12(FP), CX;		\
   363  	MOVL	SP, DI;				\
   364  	REP;MOVSB;				\
   365  	/* call function */			\
   366  	MOVL	f+4(FP), DX;			\
   367  	MOVL	(DX), AX;			\
   368  	CALL	AX;				\
   369  	/* copy return values back */		\
   370  	MOVL	argptr+8(FP), DI;		\
   371  	MOVL	argsize+12(FP), CX;		\
   372  	MOVL	retoffset+16(FP), BX;		\
   373  	MOVL	SP, SI;				\
   374  	ADDL	BX, DI;				\
   375  	ADDL	BX, SI;				\
   376  	SUBL	BX, CX;				\
   377  	REP;MOVSB;				\
   378  	/* execute write barrier updates */	\
   379  	MOVL	argtype+0(FP), DX;		\
   380  	MOVL	argptr+8(FP), DI;		\
   381  	MOVL	argsize+12(FP), CX;		\
   382  	MOVL	retoffset+16(FP), BX;		\
   383  	MOVL	DX, 0(SP);			\
   384  	MOVL	DI, 4(SP);			\
   385  	MOVL	CX, 8(SP);			\
   386  	MOVL	BX, 12(SP);			\
   387  	CALL	runtime·callwritebarrier(SB);	\
   388  	RET
   389  
   390  CALLFN(·call16, 16)
   391  CALLFN(·call32, 32)
   392  CALLFN(·call64, 64)
   393  CALLFN(·call128, 128)
   394  CALLFN(·call256, 256)
   395  CALLFN(·call512, 512)
   396  CALLFN(·call1024, 1024)
   397  CALLFN(·call2048, 2048)
   398  CALLFN(·call4096, 4096)
   399  CALLFN(·call8192, 8192)
   400  CALLFN(·call16384, 16384)
   401  CALLFN(·call32768, 32768)
   402  CALLFN(·call65536, 65536)
   403  CALLFN(·call131072, 131072)
   404  CALLFN(·call262144, 262144)
   405  CALLFN(·call524288, 524288)
   406  CALLFN(·call1048576, 1048576)
   407  CALLFN(·call2097152, 2097152)
   408  CALLFN(·call4194304, 4194304)
   409  CALLFN(·call8388608, 8388608)
   410  CALLFN(·call16777216, 16777216)
   411  CALLFN(·call33554432, 33554432)
   412  CALLFN(·call67108864, 67108864)
   413  CALLFN(·call134217728, 134217728)
   414  CALLFN(·call268435456, 268435456)
   415  CALLFN(·call536870912, 536870912)
   416  CALLFN(·call1073741824, 1073741824)
   417  
   418  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   419  	MOVL	cycles+0(FP), AX
   420  again:
   421  	PAUSE
   422  	SUBL	$1, AX
   423  	JNZ	again
   424  	RET
   425  
   426  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   427  	// Stores are already ordered on x86, so this is just a
   428  	// compile barrier.
   429  	RET
   430  
   431  // void jmpdefer(fn, sp);
   432  // called from deferreturn.
   433  // 1. pop the caller
   434  // 2. sub 5 bytes from the callers return
   435  // 3. jmp to the argument
   436  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   437  	MOVL	fv+0(FP), DX
   438  	MOVL	argp+4(FP), BX
   439  	LEAL	-8(BX), SP	// caller sp after CALL
   440  	SUBL	$5, (SP)	// return to CALL again
   441  	MOVL	0(DX), BX
   442  	JMP	BX	// but first run the deferred function
   443  
   444  // func asmcgocall(fn, arg unsafe.Pointer) int32
   445  // Not implemented.
   446  TEXT runtime·asmcgocall(SB),NOSPLIT,$0-12
   447  	MOVL	0, AX
   448  	RET
   449  
   450  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   451  // Not implemented.
   452  TEXT runtime·cgocallback(SB),NOSPLIT,$0-12
   453  	MOVL	0, AX
   454  	RET
   455  
   456  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
   457  // Not implemented.
   458  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$0-12
   459  	MOVL	0, AX
   460  	RET
   461  
   462  // void setg(G*); set g. for use by needm.
   463  // Not implemented.
   464  TEXT runtime·setg(SB), NOSPLIT, $0-4
   465  	MOVL	0, AX
   466  	RET
   467  
   468  // check that SP is in range [g->stack.lo, g->stack.hi)
   469  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   470  	get_tls(CX)
   471  	MOVL	g(CX), AX
   472  	CMPL	(g_stack+stack_hi)(AX), SP
   473  	JHI	2(PC)
   474  	MOVL	0, AX
   475  	CMPL	SP, (g_stack+stack_lo)(AX)
   476  	JHI	2(PC)
   477  	MOVL	0, AX
   478  	RET
   479  
   480  TEXT runtime·memclr(SB),NOSPLIT,$0-8
   481  	MOVL	ptr+0(FP), DI
   482  	MOVL	n+4(FP), CX
   483  	MOVQ	CX, BX
   484  	ANDQ	$3, BX
   485  	SHRQ	$2, CX
   486  	MOVQ	$0, AX
   487  	CLD
   488  	REP
   489  	STOSL
   490  	MOVQ	BX, CX
   491  	REP
   492  	STOSB
   493  	// Note: we zero only 4 bytes at a time so that the tail is at most
   494  	// 3 bytes.  That guarantees that we aren't zeroing pointers with STOSB.
   495  	// See issue 13160.
   496  	RET
   497  
   498  TEXT runtime·getcallerpc(SB),NOSPLIT,$8-12
   499  	MOVL	argp+0(FP),AX		// addr of first arg
   500  	MOVL	-8(AX),AX		// get calling pc
   501  	CMPL	AX, runtime·stackBarrierPC(SB)
   502  	JNE	nobar
   503  	// Get original return PC.
   504  	CALL	runtime·nextBarrierPC(SB)
   505  	MOVL	0(SP), AX
   506  nobar:
   507  	MOVL	AX, ret+8(FP)
   508  	RET
   509  
   510  TEXT runtime·setcallerpc(SB),NOSPLIT,$8-8
   511  	MOVL	argp+0(FP),AX		// addr of first arg
   512  	MOVL	pc+4(FP), BX		// pc to set
   513  	MOVL	-8(AX), CX
   514  	CMPL	CX, runtime·stackBarrierPC(SB)
   515  	JEQ	setbar
   516  	MOVQ	BX, -8(AX)		// set calling pc
   517  	RET
   518  setbar:
   519  	// Set the stack barrier return PC.
   520  	MOVL	BX, 0(SP)
   521  	CALL	runtime·setNextBarrierPC(SB)
   522  	RET
   523  
   524  TEXT runtime·getcallersp(SB),NOSPLIT,$0-12
   525  	MOVL	argp+0(FP), AX
   526  	MOVL	AX, ret+8(FP)
   527  	RET
   528  
   529  // int64 runtime·cputicks(void)
   530  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   531  	RDTSC
   532  	SHLQ	$32, DX
   533  	ADDQ	DX, AX
   534  	MOVQ	AX, ret+0(FP)
   535  	RET
   536  
   537  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   538  // redirects to memhash(p, h, size) using the size
   539  // stored in the closure.
   540  TEXT runtime·memhash_varlen(SB),NOSPLIT,$24-12
   541  	GO_ARGS
   542  	NO_LOCAL_POINTERS
   543  	MOVL	p+0(FP), AX
   544  	MOVL	h+4(FP), BX
   545  	MOVL	4(DX), CX
   546  	MOVL	AX, 0(SP)
   547  	MOVL	BX, 4(SP)
   548  	MOVL	CX, 8(SP)
   549  	CALL	runtime·memhash(SB)
   550  	MOVL	16(SP), AX
   551  	MOVL	AX, ret+8(FP)
   552  	RET
   553  
   554  // hash function using AES hardware instructions
   555  // For now, our one amd64p32 system (NaCl) does not
   556  // support using AES instructions, so have not bothered to
   557  // write the implementations. Can copy and adjust the ones
   558  // in asm_amd64.s when the time comes.
   559  
   560  TEXT runtime·aeshash(SB),NOSPLIT,$0-20
   561  	MOVL	AX, ret+16(FP)
   562  	RET
   563  
   564  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-20
   565  	MOVL	AX, ret+16(FP)
   566  	RET
   567  
   568  TEXT runtime·aeshash32(SB),NOSPLIT,$0-20
   569  	MOVL	AX, ret+16(FP)
   570  	RET
   571  
   572  TEXT runtime·aeshash64(SB),NOSPLIT,$0-20
   573  	MOVL	AX, ret+16(FP)
   574  	RET
   575  
   576  TEXT runtime·memeq(SB),NOSPLIT,$0-17
   577  	MOVL	a+0(FP), SI
   578  	MOVL	b+4(FP), DI
   579  	MOVL	size+8(FP), BX
   580  	CALL	runtime·memeqbody(SB)
   581  	MOVB	AX, ret+16(FP)
   582  	RET
   583  
   584  // memequal_varlen(a, b unsafe.Pointer) bool
   585  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
   586  	MOVL    a+0(FP), SI
   587  	MOVL    b+4(FP), DI
   588  	CMPL    SI, DI
   589  	JEQ     eq
   590  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
   591  	CALL    runtime·memeqbody(SB)
   592  	MOVB    AX, ret+8(FP)
   593  	RET
   594  eq:
   595  	MOVB    $1, ret+8(FP)
   596  	RET
   597  
   598  // eqstring tests whether two strings are equal.
   599  // The compiler guarantees that strings passed
   600  // to eqstring have equal length.
   601  // See runtime_test.go:eqstring_generic for
   602  // equivalent Go code.
   603  TEXT runtime·eqstring(SB),NOSPLIT,$0-17
   604  	MOVL	s1str+0(FP), SI
   605  	MOVL	s2str+8(FP), DI
   606  	CMPL	SI, DI
   607  	JEQ	same
   608  	MOVL	s1len+4(FP), BX
   609  	CALL	runtime·memeqbody(SB)
   610  	MOVB	AX, v+16(FP)
   611  	RET
   612  same:
   613  	MOVB	$1, v+16(FP)
   614  	RET
   615  
   616  // a in SI
   617  // b in DI
   618  // count in BX
   619  TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
   620  	XORQ	AX, AX
   621  
   622  	CMPQ	BX, $8
   623  	JB	small
   624  	
   625  	// 64 bytes at a time using xmm registers
   626  hugeloop:
   627  	CMPQ	BX, $64
   628  	JB	bigloop
   629  	MOVOU	(SI), X0
   630  	MOVOU	(DI), X1
   631  	MOVOU	16(SI), X2
   632  	MOVOU	16(DI), X3
   633  	MOVOU	32(SI), X4
   634  	MOVOU	32(DI), X5
   635  	MOVOU	48(SI), X6
   636  	MOVOU	48(DI), X7
   637  	PCMPEQB	X1, X0
   638  	PCMPEQB	X3, X2
   639  	PCMPEQB	X5, X4
   640  	PCMPEQB	X7, X6
   641  	PAND	X2, X0
   642  	PAND	X6, X4
   643  	PAND	X4, X0
   644  	PMOVMSKB X0, DX
   645  	ADDQ	$64, SI
   646  	ADDQ	$64, DI
   647  	SUBQ	$64, BX
   648  	CMPL	DX, $0xffff
   649  	JEQ	hugeloop
   650  	RET
   651  
   652  	// 8 bytes at a time using 64-bit register
   653  bigloop:
   654  	CMPQ	BX, $8
   655  	JBE	leftover
   656  	MOVQ	(SI), CX
   657  	MOVQ	(DI), DX
   658  	ADDQ	$8, SI
   659  	ADDQ	$8, DI
   660  	SUBQ	$8, BX
   661  	CMPQ	CX, DX
   662  	JEQ	bigloop
   663  	RET
   664  
   665  	// remaining 0-8 bytes
   666  leftover:
   667  	ADDQ	BX, SI
   668  	ADDQ	BX, DI
   669  	MOVQ	-8(SI), CX
   670  	MOVQ	-8(DI), DX
   671  	CMPQ	CX, DX
   672  	SETEQ	AX
   673  	RET
   674  
   675  small:
   676  	CMPQ	BX, $0
   677  	JEQ	equal
   678  
   679  	LEAQ	0(BX*8), CX
   680  	NEGQ	CX
   681  
   682  	CMPB	SI, $0xf8
   683  	JA	si_high
   684  
   685  	// load at SI won't cross a page boundary.
   686  	MOVQ	(SI), SI
   687  	JMP	si_finish
   688  si_high:
   689  	// address ends in 11111xxx.  Load up to bytes we want, move to correct position.
   690  	MOVQ	BX, DX
   691  	ADDQ	SI, DX
   692  	MOVQ	-8(DX), SI
   693  	SHRQ	CX, SI
   694  si_finish:
   695  
   696  	// same for DI.
   697  	CMPB	DI, $0xf8
   698  	JA	di_high
   699  	MOVQ	(DI), DI
   700  	JMP	di_finish
   701  di_high:
   702  	MOVQ	BX, DX
   703  	ADDQ	DI, DX
   704  	MOVQ	-8(DX), DI
   705  	SHRQ	CX, DI
   706  di_finish:
   707  
   708  	SUBQ	SI, DI
   709  	SHLQ	CX, DI
   710  equal:
   711  	SETEQ	AX
   712  	RET
   713  
   714  TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
   715  	MOVL	s1_base+0(FP), SI
   716  	MOVL	s1_len+4(FP), BX
   717  	MOVL	s2_base+8(FP), DI
   718  	MOVL	s2_len+12(FP), DX
   719  	CALL	runtime·cmpbody(SB)
   720  	MOVL	AX, ret+16(FP)
   721  	RET
   722  
   723  TEXT bytes·Compare(SB),NOSPLIT,$0-28
   724  	MOVL	s1+0(FP), SI
   725  	MOVL	s1+4(FP), BX
   726  	MOVL	s2+12(FP), DI
   727  	MOVL	s2+16(FP), DX
   728  	CALL	runtime·cmpbody(SB)
   729  	MOVL	AX, res+24(FP)
   730  	RET
   731  
   732  // input:
   733  //   SI = a
   734  //   DI = b
   735  //   BX = alen
   736  //   DX = blen
   737  // output:
   738  //   AX = 1/0/-1
   739  TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
   740  	CMPQ	SI, DI
   741  	JEQ	allsame
   742  	CMPQ	BX, DX
   743  	MOVQ	DX, R8
   744  	CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
   745  	CMPQ	R8, $8
   746  	JB	small
   747  
   748  loop:
   749  	CMPQ	R8, $16
   750  	JBE	_0through16
   751  	MOVOU	(SI), X0
   752  	MOVOU	(DI), X1
   753  	PCMPEQB X0, X1
   754  	PMOVMSKB X1, AX
   755  	XORQ	$0xffff, AX	// convert EQ to NE
   756  	JNE	diff16	// branch if at least one byte is not equal
   757  	ADDQ	$16, SI
   758  	ADDQ	$16, DI
   759  	SUBQ	$16, R8
   760  	JMP	loop
   761  	
   762  	// AX = bit mask of differences
   763  diff16:
   764  	BSFQ	AX, BX	// index of first byte that differs
   765  	XORQ	AX, AX
   766  	ADDQ	BX, SI
   767  	MOVB	(SI), CX
   768  	ADDQ	BX, DI
   769  	CMPB	CX, (DI)
   770  	SETHI	AX
   771  	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
   772  	RET
   773  
   774  	// 0 through 16 bytes left, alen>=8, blen>=8
   775  _0through16:
   776  	CMPQ	R8, $8
   777  	JBE	_0through8
   778  	MOVQ	(SI), AX
   779  	MOVQ	(DI), CX
   780  	CMPQ	AX, CX
   781  	JNE	diff8
   782  _0through8:
   783  	ADDQ	R8, SI
   784  	ADDQ	R8, DI
   785  	MOVQ	-8(SI), AX
   786  	MOVQ	-8(DI), CX
   787  	CMPQ	AX, CX
   788  	JEQ	allsame
   789  
   790  	// AX and CX contain parts of a and b that differ.
   791  diff8:
   792  	BSWAPQ	AX	// reverse order of bytes
   793  	BSWAPQ	CX
   794  	XORQ	AX, CX
   795  	BSRQ	CX, CX	// index of highest bit difference
   796  	SHRQ	CX, AX	// move a's bit to bottom
   797  	ANDQ	$1, AX	// mask bit
   798  	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
   799  	RET
   800  
   801  	// 0-7 bytes in common
   802  small:
   803  	LEAQ	(R8*8), CX	// bytes left -> bits left
   804  	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
   805  	JEQ	allsame
   806  
   807  	// load bytes of a into high bytes of AX
   808  	CMPB	SI, $0xf8
   809  	JA	si_high
   810  	MOVQ	(SI), SI
   811  	JMP	si_finish
   812  si_high:
   813  	ADDQ	R8, SI
   814  	MOVQ	-8(SI), SI
   815  	SHRQ	CX, SI
   816  si_finish:
   817  	SHLQ	CX, SI
   818  
   819  	// load bytes of b in to high bytes of BX
   820  	CMPB	DI, $0xf8
   821  	JA	di_high
   822  	MOVQ	(DI), DI
   823  	JMP	di_finish
   824  di_high:
   825  	ADDQ	R8, DI
   826  	MOVQ	-8(DI), DI
   827  	SHRQ	CX, DI
   828  di_finish:
   829  	SHLQ	CX, DI
   830  
   831  	BSWAPQ	SI	// reverse order of bytes
   832  	BSWAPQ	DI
   833  	XORQ	SI, DI	// find bit differences
   834  	JEQ	allsame
   835  	BSRQ	DI, CX	// index of highest bit difference
   836  	SHRQ	CX, SI	// move a's bit to bottom
   837  	ANDQ	$1, SI	// mask bit
   838  	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
   839  	RET
   840  
   841  allsame:
   842  	XORQ	AX, AX
   843  	XORQ	CX, CX
   844  	CMPQ	BX, DX
   845  	SETGT	AX	// 1 if alen > blen
   846  	SETEQ	CX	// 1 if alen == blen
   847  	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
   848  	RET
   849  
   850  TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
   851  	MOVL s+0(FP), SI
   852  	MOVL s_len+4(FP), BX
   853  	MOVB c+12(FP), AL
   854  	CALL runtime·indexbytebody(SB)
   855  	MOVL AX, ret+16(FP)
   856  	RET
   857  
   858  TEXT strings·IndexByte(SB),NOSPLIT,$0-20
   859  	MOVL s+0(FP), SI
   860  	MOVL s_len+4(FP), BX
   861  	MOVB c+8(FP), AL
   862  	CALL runtime·indexbytebody(SB)
   863  	MOVL AX, ret+16(FP)
   864  	RET
   865  
   866  // input:
   867  //   SI: data
   868  //   BX: data len
   869  //   AL: byte sought
   870  // output:
   871  //   AX
   872  TEXT runtime·indexbytebody(SB),NOSPLIT,$0
   873  	MOVL SI, DI
   874  
   875  	CMPL BX, $16
   876  	JLT small
   877  
   878  	// round up to first 16-byte boundary
   879  	TESTL $15, SI
   880  	JZ aligned
   881  	MOVL SI, CX
   882  	ANDL $~15, CX
   883  	ADDL $16, CX
   884  
   885  	// search the beginning
   886  	SUBL SI, CX
   887  	REPN; SCASB
   888  	JZ success
   889  
   890  // DI is 16-byte aligned; get ready to search using SSE instructions
   891  aligned:
   892  	// round down to last 16-byte boundary
   893  	MOVL BX, R11
   894  	ADDL SI, R11
   895  	ANDL $~15, R11
   896  
   897  	// shuffle X0 around so that each byte contains c
   898  	MOVD AX, X0
   899  	PUNPCKLBW X0, X0
   900  	PUNPCKLBW X0, X0
   901  	PSHUFL $0, X0, X0
   902  	JMP condition
   903  
   904  sse:
   905  	// move the next 16-byte chunk of the buffer into X1
   906  	MOVO (DI), X1
   907  	// compare bytes in X0 to X1
   908  	PCMPEQB X0, X1
   909  	// take the top bit of each byte in X1 and put the result in DX
   910  	PMOVMSKB X1, DX
   911  	TESTL DX, DX
   912  	JNZ ssesuccess
   913  	ADDL $16, DI
   914  
   915  condition:
   916  	CMPL DI, R11
   917  	JLT sse
   918  
   919  	// search the end
   920  	MOVL SI, CX
   921  	ADDL BX, CX
   922  	SUBL R11, CX
   923  	// if CX == 0, the zero flag will be set and we'll end up
   924  	// returning a false success
   925  	JZ failure
   926  	REPN; SCASB
   927  	JZ success
   928  
   929  failure:
   930  	MOVL $-1, AX
   931  	RET
   932  
   933  // handle for lengths < 16
   934  small:
   935  	MOVL BX, CX
   936  	REPN; SCASB
   937  	JZ success
   938  	MOVL $-1, AX
   939  	RET
   940  
   941  // we've found the chunk containing the byte
   942  // now just figure out which specific byte it is
   943  ssesuccess:
   944  	// get the index of the least significant set bit
   945  	BSFW DX, DX
   946  	SUBL SI, DI
   947  	ADDL DI, DX
   948  	MOVL DX, AX
   949  	RET
   950  
   951  success:
   952  	SUBL SI, DI
   953  	SUBL $1, DI
   954  	MOVL DI, AX
   955  	RET
   956  
   957  TEXT bytes·Equal(SB),NOSPLIT,$0-25
   958  	MOVL	a_len+4(FP), BX
   959  	MOVL	b_len+16(FP), CX
   960  	XORL	AX, AX
   961  	CMPL	BX, CX
   962  	JNE	eqret
   963  	MOVL	a+0(FP), SI
   964  	MOVL	b+12(FP), DI
   965  	CALL	runtime·memeqbody(SB)
   966  eqret:
   967  	MOVB	AX, ret+24(FP)
   968  	RET
   969  
   970  TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
   971  	get_tls(CX)
   972  	MOVL	g(CX), AX
   973  	MOVL	g_m(AX), AX
   974  	MOVL	m_fastrand(AX), DX
   975  	ADDL	DX, DX
   976  	MOVL	DX, BX
   977  	XORL	$0x88888eef, DX
   978  	CMOVLMI	BX, DX
   979  	MOVL	DX, m_fastrand(AX)
   980  	MOVL	DX, ret+0(FP)
   981  	RET
   982  
   983  TEXT runtime·return0(SB), NOSPLIT, $0
   984  	MOVL	$0, AX
   985  	RET
   986  
   987  // The top-most function running on a goroutine
   988  // returns to goexit+PCQuantum.
   989  TEXT runtime·goexit(SB),NOSPLIT,$0-0
   990  	BYTE	$0x90	// NOP
   991  	CALL	runtime·goexit1(SB)	// does not return
   992  	// traceback from goexit1 must hit code range of goexit
   993  	BYTE	$0x90	// NOP
   994  
   995  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
   996  	MOVL	addr+0(FP), AX
   997  	PREFETCHT0	(AX)
   998  	RET
   999  
  1000  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
  1001  	MOVL	addr+0(FP), AX
  1002  	PREFETCHT1	(AX)
  1003  	RET
  1004  
  1005  
  1006  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
  1007  	MOVL	addr+0(FP), AX
  1008  	PREFETCHT2	(AX)
  1009  	RET
  1010  
  1011  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
  1012  	MOVL	addr+0(FP), AX
  1013  	PREFETCHNTA	(AX)
  1014  	RET
  1015  
  1016  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1017  	MOVB	$1, ret+0(FP)
  1018  	RET