github.com/c0deoo1/golang1.5@v0.0.0-20220525150107-c87c805d4593/src/runtime/asm_amd64p32.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// copy arguments forward on an even stack
    12  	MOVL	argc+0(FP), AX
    13  	MOVL	argv+4(FP), BX
    14  	MOVL	SP, CX
    15  	SUBL	$128, SP		// plenty of scratch
    16  	ANDL	$~15, CX
    17  	MOVL	CX, SP
    18  
    19  	MOVL	AX, 16(SP)
    20  	MOVL	BX, 24(SP)
    21  	
    22  	// create istack out of the given (operating system) stack.
    23  	MOVL	$runtime·g0(SB), DI
    24  	LEAL	(-64*1024+104)(SP), BX
    25  	MOVL	BX, g_stackguard0(DI)
    26  	MOVL	BX, g_stackguard1(DI)
    27  	MOVL	BX, (g_stack+stack_lo)(DI)
    28  	MOVL	SP, (g_stack+stack_hi)(DI)
    29  
    30  	// find out information about the processor we're on
    31  	MOVQ	$0, AX
    32  	CPUID
    33  	CMPQ	AX, $0
    34  	JE	nocpuinfo
    35  	MOVQ	$1, AX
    36  	CPUID
    37  	MOVL	CX, runtime·cpuid_ecx(SB)
    38  	MOVL	DX, runtime·cpuid_edx(SB)
    39  nocpuinfo:	
    40  	
    41  needtls:
    42  	LEAL	runtime·tls0(SB), DI
    43  	CALL	runtime·settls(SB)
    44  
    45  	// store through it, to make sure it works
    46  	get_tls(BX)
    47  	MOVQ	$0x123, g(BX)
    48  	MOVQ	runtime·tls0(SB), AX
    49  	CMPQ	AX, $0x123
    50  	JEQ 2(PC)
    51  	MOVL	AX, 0	// abort
    52  ok:
    53  	// set the per-goroutine and per-mach "registers"
    54  	get_tls(BX)
    55  	LEAL	runtime·g0(SB), CX
    56  	MOVL	CX, g(BX)
    57  	LEAL	runtime·m0(SB), AX
    58  
    59  	// save m->g0 = g0
    60  	MOVL	CX, m_g0(AX)
    61  	// save m0 to g0->m
    62  	MOVL	AX, g_m(CX)
    63  
    64  	CLD				// convention is D is always left cleared
    65  	CALL	runtime·check(SB)
    66  
    67  	MOVL	16(SP), AX		// copy argc
    68  	MOVL	AX, 0(SP)
    69  	MOVL	24(SP), AX		// copy argv
    70  	MOVL	AX, 4(SP)
    71  	CALL	runtime·args(SB)
    72  	CALL	runtime·osinit(SB)
    73  	CALL	runtime·schedinit(SB)
    74  
    75  	// create a new goroutine to start program
    76  	MOVL	$runtime·mainPC(SB), AX	// entry
    77  	MOVL	$0, 0(SP)
    78  	MOVL	AX, 4(SP)
    79  	CALL	runtime·newproc(SB)
    80  
    81  	// start this M
    82  	CALL	runtime·mstart(SB)
    83  
    84  	MOVL	$0xf1, 0xf1  // crash
    85  	RET
    86  
    87  DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
    88  GLOBL	runtime·mainPC(SB),RODATA,$4
    89  
    90  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
    91  	INT $3
    92  	RET
    93  
    94  TEXT runtime·asminit(SB),NOSPLIT,$0-0
    95  	// No per-thread init.
    96  	RET
    97  
    98  /*
    99   *  go-routine
   100   */
   101  
   102  // void gosave(Gobuf*)
   103  // save state in Gobuf; setjmp
   104  TEXT runtime·gosave(SB), NOSPLIT, $0-4
   105  	MOVL	buf+0(FP), AX	// gobuf
   106  	LEAL	buf+0(FP), BX	// caller's SP
   107  	MOVL	BX, gobuf_sp(AX)
   108  	MOVL	0(SP), BX		// caller's PC
   109  	MOVL	BX, gobuf_pc(AX)
   110  	MOVL	$0, gobuf_ctxt(AX)
   111  	MOVQ	$0, gobuf_ret(AX)
   112  	get_tls(CX)
   113  	MOVL	g(CX), BX
   114  	MOVL	BX, gobuf_g(AX)
   115  	RET
   116  
   117  // void gogo(Gobuf*)
   118  // restore state from Gobuf; longjmp
   119  TEXT runtime·gogo(SB), NOSPLIT, $0-4
   120  	MOVL	buf+0(FP), BX		// gobuf
   121  	MOVL	gobuf_g(BX), DX
   122  	MOVL	0(DX), CX		// make sure g != nil
   123  	get_tls(CX)
   124  	MOVL	DX, g(CX)
   125  	MOVL	gobuf_sp(BX), SP	// restore SP
   126  	MOVL	gobuf_ctxt(BX), DX
   127  	MOVQ	gobuf_ret(BX), AX
   128  	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   129  	MOVQ	$0, gobuf_ret(BX)
   130  	MOVL	$0, gobuf_ctxt(BX)
   131  	MOVL	gobuf_pc(BX), BX
   132  	JMP	BX
   133  
   134  // func mcall(fn func(*g))
   135  // Switch to m->g0's stack, call fn(g).
   136  // Fn must never return.  It should gogo(&g->sched)
   137  // to keep running g.
   138  TEXT runtime·mcall(SB), NOSPLIT, $0-4
   139  	MOVL	fn+0(FP), DI
   140  	
   141  	get_tls(CX)
   142  	MOVL	g(CX), AX	// save state in g->sched
   143  	MOVL	0(SP), BX	// caller's PC
   144  	MOVL	BX, (g_sched+gobuf_pc)(AX)
   145  	LEAL	fn+0(FP), BX	// caller's SP
   146  	MOVL	BX, (g_sched+gobuf_sp)(AX)
   147  	MOVL	AX, (g_sched+gobuf_g)(AX)
   148  
   149  	// switch to m->g0 & its stack, call fn
   150  	MOVL	g(CX), BX
   151  	MOVL	g_m(BX), BX
   152  	MOVL	m_g0(BX), SI
   153  	CMPL	SI, AX	// if g == m->g0 call badmcall
   154  	JNE	3(PC)
   155  	MOVL	$runtime·badmcall(SB), AX
   156  	JMP	AX
   157  	MOVL	SI, g(CX)	// g = m->g0
   158  	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   159  	PUSHQ	AX
   160  	MOVL	DI, DX
   161  	MOVL	0(DI), DI
   162  	CALL	DI
   163  	POPQ	AX
   164  	MOVL	$runtime·badmcall2(SB), AX
   165  	JMP	AX
   166  	RET
   167  
   168  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   169  // of the G stack.  We need to distinguish the routine that
   170  // lives at the bottom of the G stack from the one that lives
   171  // at the top of the system stack because the one at the top of
   172  // the system stack terminates the stack walk (see topofstack()).
   173  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   174  	RET
   175  
   176  // func systemstack(fn func())
   177  TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   178  	MOVL	fn+0(FP), DI	// DI = fn
   179  	get_tls(CX)
   180  	MOVL	g(CX), AX	// AX = g
   181  	MOVL	g_m(AX), BX	// BX = m
   182  
   183  	MOVL	m_gsignal(BX), DX	// DX = gsignal
   184  	CMPL	AX, DX
   185  	JEQ	noswitch
   186  
   187  	MOVL	m_g0(BX), DX	// DX = g0
   188  	CMPL	AX, DX
   189  	JEQ	noswitch
   190  
   191  	MOVL	m_curg(BX), R8
   192  	CMPL	AX, R8
   193  	JEQ	switch
   194  	
   195  	// Not g0, not curg. Must be gsignal, but that's not allowed.
   196  	// Hide call from linker nosplit analysis.
   197  	MOVL	$runtime·badsystemstack(SB), AX
   198  	CALL	AX
   199  
   200  switch:
   201  	// save our state in g->sched.  Pretend to
   202  	// be systemstack_switch if the G stack is scanned.
   203  	MOVL	$runtime·systemstack_switch(SB), SI
   204  	MOVL	SI, (g_sched+gobuf_pc)(AX)
   205  	MOVL	SP, (g_sched+gobuf_sp)(AX)
   206  	MOVL	AX, (g_sched+gobuf_g)(AX)
   207  
   208  	// switch to g0
   209  	MOVL	DX, g(CX)
   210  	MOVL	(g_sched+gobuf_sp)(DX), SP
   211  
   212  	// call target function
   213  	MOVL	DI, DX
   214  	MOVL	0(DI), DI
   215  	CALL	DI
   216  
   217  	// switch back to g
   218  	get_tls(CX)
   219  	MOVL	g(CX), AX
   220  	MOVL	g_m(AX), BX
   221  	MOVL	m_curg(BX), AX
   222  	MOVL	AX, g(CX)
   223  	MOVL	(g_sched+gobuf_sp)(AX), SP
   224  	MOVL	$0, (g_sched+gobuf_sp)(AX)
   225  	RET
   226  
   227  noswitch:
   228  	// already on m stack, just call directly
   229  	MOVL	DI, DX
   230  	MOVL	0(DI), DI
   231  	CALL	DI
   232  	RET
   233  
   234  /*
   235   * support for morestack
   236   */
   237  
   238  // Called during function prolog when more stack is needed.
   239  //
   240  // The traceback routines see morestack on a g0 as being
   241  // the top of a stack (for example, morestack calling newstack
   242  // calling the scheduler calling newm calling gc), so we must
   243  // record an argument size. For that purpose, it has no arguments.
   244  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   245  	get_tls(CX)
   246  	MOVL	g(CX), BX
   247  	MOVL	g_m(BX), BX
   248  
   249  	// Cannot grow scheduler stack (m->g0).
   250  	MOVL	m_g0(BX), SI
   251  	CMPL	g(CX), SI
   252  	JNE	2(PC)
   253  	MOVL	0, AX
   254  
   255  	// Cannot grow signal stack (m->gsignal).
   256  	MOVL	m_gsignal(BX), SI
   257  	CMPL	g(CX), SI
   258  	JNE	2(PC)
   259  	MOVL	0, AX
   260  
   261  	// Called from f.
   262  	// Set m->morebuf to f's caller.
   263  	MOVL	8(SP), AX	// f's caller's PC
   264  	MOVL	AX, (m_morebuf+gobuf_pc)(BX)
   265  	LEAL	16(SP), AX	// f's caller's SP
   266  	MOVL	AX, (m_morebuf+gobuf_sp)(BX)
   267  	get_tls(CX)
   268  	MOVL	g(CX), SI
   269  	MOVL	SI, (m_morebuf+gobuf_g)(BX)
   270  
   271  	// Set g->sched to context in f.
   272  	MOVL	0(SP), AX // f's PC
   273  	MOVL	AX, (g_sched+gobuf_pc)(SI)
   274  	MOVL	SI, (g_sched+gobuf_g)(SI)
   275  	LEAL	8(SP), AX // f's SP
   276  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   277  	MOVL	DX, (g_sched+gobuf_ctxt)(SI)
   278  
   279  	// Call newstack on m->g0's stack.
   280  	MOVL	m_g0(BX), BX
   281  	MOVL	BX, g(CX)
   282  	MOVL	(g_sched+gobuf_sp)(BX), SP
   283  	CALL	runtime·newstack(SB)
   284  	MOVL	$0, 0x1003	// crash if newstack returns
   285  	RET
   286  
   287  // morestack trampolines
   288  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   289  	MOVL	$0, DX
   290  	JMP	runtime·morestack(SB)
   291  
   292  TEXT runtime·stackBarrier(SB),NOSPLIT,$0
   293  	// We came here via a RET to an overwritten return PC.
   294  	// AX may be live. Other registers are available.
   295  
   296  	// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
   297  	get_tls(CX)
   298  	MOVL	g(CX), CX
   299  	MOVL	(g_stkbar+slice_array)(CX), DX
   300  	MOVL	g_stkbarPos(CX), BX
   301  	IMULL	$stkbar__size, BX	// Too big for SIB.
   302  	ADDL	DX, BX
   303  	MOVL	stkbar_savedLRVal(BX), BX
   304  	// Record that this stack barrier was hit.
   305  	ADDL	$1, g_stkbarPos(CX)
   306  	// Jump to the original return PC.
   307  	JMP	BX
   308  
   309  // reflectcall: call a function with the given argument list
   310  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   311  // we don't have variable-sized frames, so we use a small number
   312  // of constant-sized-frame functions to encode a few bits of size in the pc.
   313  // Caution: ugly multiline assembly macros in your future!
   314  
   315  #define DISPATCH(NAME,MAXSIZE)		\
   316  	CMPL	CX, $MAXSIZE;		\
   317  	JA	3(PC);			\
   318  	MOVL	$NAME(SB), AX;		\
   319  	JMP	AX
   320  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   321  
   322  TEXT reflect·call(SB), NOSPLIT, $0-0
   323  	JMP	·reflectcall(SB)
   324  
   325  TEXT ·reflectcall(SB), NOSPLIT, $0-20
   326  	MOVLQZX argsize+12(FP), CX
   327  	DISPATCH(runtime·call16, 16)
   328  	DISPATCH(runtime·call32, 32)
   329  	DISPATCH(runtime·call64, 64)
   330  	DISPATCH(runtime·call128, 128)
   331  	DISPATCH(runtime·call256, 256)
   332  	DISPATCH(runtime·call512, 512)
   333  	DISPATCH(runtime·call1024, 1024)
   334  	DISPATCH(runtime·call2048, 2048)
   335  	DISPATCH(runtime·call4096, 4096)
   336  	DISPATCH(runtime·call8192, 8192)
   337  	DISPATCH(runtime·call16384, 16384)
   338  	DISPATCH(runtime·call32768, 32768)
   339  	DISPATCH(runtime·call65536, 65536)
   340  	DISPATCH(runtime·call131072, 131072)
   341  	DISPATCH(runtime·call262144, 262144)
   342  	DISPATCH(runtime·call524288, 524288)
   343  	DISPATCH(runtime·call1048576, 1048576)
   344  	DISPATCH(runtime·call2097152, 2097152)
   345  	DISPATCH(runtime·call4194304, 4194304)
   346  	DISPATCH(runtime·call8388608, 8388608)
   347  	DISPATCH(runtime·call16777216, 16777216)
   348  	DISPATCH(runtime·call33554432, 33554432)
   349  	DISPATCH(runtime·call67108864, 67108864)
   350  	DISPATCH(runtime·call134217728, 134217728)
   351  	DISPATCH(runtime·call268435456, 268435456)
   352  	DISPATCH(runtime·call536870912, 536870912)
   353  	DISPATCH(runtime·call1073741824, 1073741824)
   354  	MOVL	$runtime·badreflectcall(SB), AX
   355  	JMP	AX
   356  
   357  #define CALLFN(NAME,MAXSIZE)			\
   358  TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   359  	NO_LOCAL_POINTERS;			\
   360  	/* copy arguments to stack */		\
   361  	MOVL	argptr+8(FP), SI;		\
   362  	MOVL	argsize+12(FP), CX;		\
   363  	MOVL	SP, DI;				\
   364  	REP;MOVSB;				\
   365  	/* call function */			\
   366  	MOVL	f+4(FP), DX;			\
   367  	MOVL	(DX), AX;			\
   368  	CALL	AX;				\
   369  	/* copy return values back */		\
   370  	MOVL	argptr+8(FP), DI;		\
   371  	MOVL	argsize+12(FP), CX;		\
   372  	MOVL	retoffset+16(FP), BX;		\
   373  	MOVL	SP, SI;				\
   374  	ADDL	BX, DI;				\
   375  	ADDL	BX, SI;				\
   376  	SUBL	BX, CX;				\
   377  	REP;MOVSB;				\
   378  	/* execute write barrier updates */	\
   379  	MOVL	argtype+0(FP), DX;		\
   380  	MOVL	argptr+8(FP), DI;		\
   381  	MOVL	argsize+12(FP), CX;		\
   382  	MOVL	retoffset+16(FP), BX;		\
   383  	MOVL	DX, 0(SP);			\
   384  	MOVL	DI, 4(SP);			\
   385  	MOVL	CX, 8(SP);			\
   386  	MOVL	BX, 12(SP);			\
   387  	CALL	runtime·callwritebarrier(SB);	\
   388  	RET
   389  
   390  CALLFN(·call16, 16)
   391  CALLFN(·call32, 32)
   392  CALLFN(·call64, 64)
   393  CALLFN(·call128, 128)
   394  CALLFN(·call256, 256)
   395  CALLFN(·call512, 512)
   396  CALLFN(·call1024, 1024)
   397  CALLFN(·call2048, 2048)
   398  CALLFN(·call4096, 4096)
   399  CALLFN(·call8192, 8192)
   400  CALLFN(·call16384, 16384)
   401  CALLFN(·call32768, 32768)
   402  CALLFN(·call65536, 65536)
   403  CALLFN(·call131072, 131072)
   404  CALLFN(·call262144, 262144)
   405  CALLFN(·call524288, 524288)
   406  CALLFN(·call1048576, 1048576)
   407  CALLFN(·call2097152, 2097152)
   408  CALLFN(·call4194304, 4194304)
   409  CALLFN(·call8388608, 8388608)
   410  CALLFN(·call16777216, 16777216)
   411  CALLFN(·call33554432, 33554432)
   412  CALLFN(·call67108864, 67108864)
   413  CALLFN(·call134217728, 134217728)
   414  CALLFN(·call268435456, 268435456)
   415  CALLFN(·call536870912, 536870912)
   416  CALLFN(·call1073741824, 1073741824)
   417  
   418  // bool cas(int32 *val, int32 old, int32 new)
   419  // Atomically:
   420  //	if(*val == old){
   421  //		*val = new;
   422  //		return 1;
   423  //	} else
   424  //		return 0;
   425  TEXT runtime·cas(SB), NOSPLIT, $0-17
   426  	MOVL	ptr+0(FP), BX
   427  	MOVL	old+4(FP), AX
   428  	MOVL	new+8(FP), CX
   429  	LOCK
   430  	CMPXCHGL	CX, 0(BX)
   431  	SETEQ	ret+16(FP)
   432  	RET
   433  
   434  TEXT runtime·casuintptr(SB), NOSPLIT, $0-17
   435  	JMP	runtime·cas(SB)
   436  
   437  TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-12
   438  	JMP	runtime·atomicload(SB)
   439  
   440  TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-12
   441  	JMP	runtime·atomicload(SB)
   442  
   443  TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-12
   444  	JMP	runtime·atomicstore(SB)
   445  
   446  // bool	runtime·cas64(uint64 *val, uint64 old, uint64 new)
   447  // Atomically:
   448  //	if(*val == *old){
   449  //		*val = new;
   450  //		return 1;
   451  //	} else {
   452  //		return 0;
   453  //	}
   454  TEXT runtime·cas64(SB), NOSPLIT, $0-25
   455  	MOVL	ptr+0(FP), BX
   456  	MOVQ	old+8(FP), AX
   457  	MOVQ	new+16(FP), CX
   458  	LOCK
   459  	CMPXCHGQ	CX, 0(BX)
   460  	SETEQ	ret+24(FP)
   461  	RET
   462  
   463  // bool casp(void **val, void *old, void *new)
   464  // Atomically:
   465  //	if(*val == old){
   466  //		*val = new;
   467  //		return 1;
   468  //	} else
   469  //		return 0;
   470  TEXT runtime·casp1(SB), NOSPLIT, $0-17
   471  	MOVL	ptr+0(FP), BX
   472  	MOVL	old+4(FP), AX
   473  	MOVL	new+8(FP), CX
   474  	LOCK
   475  	CMPXCHGL	CX, 0(BX)
   476  	SETEQ	ret+16(FP)
   477  	RET
   478  
   479  // uint32 xadd(uint32 volatile *val, int32 delta)
   480  // Atomically:
   481  //	*val += delta;
   482  //	return *val;
   483  TEXT runtime·xadd(SB), NOSPLIT, $0-12
   484  	MOVL	ptr+0(FP), BX
   485  	MOVL	delta+4(FP), AX
   486  	MOVL	AX, CX
   487  	LOCK
   488  	XADDL	AX, 0(BX)
   489  	ADDL	CX, AX
   490  	MOVL	AX, ret+8(FP)
   491  	RET
   492  
   493  TEXT runtime·xadd64(SB), NOSPLIT, $0-24
   494  	MOVL	ptr+0(FP), BX
   495  	MOVQ	delta+8(FP), AX
   496  	MOVQ	AX, CX
   497  	LOCK
   498  	XADDQ	AX, 0(BX)
   499  	ADDQ	CX, AX
   500  	MOVQ	AX, ret+16(FP)
   501  	RET
   502  
   503  TEXT runtime·xadduintptr(SB), NOSPLIT, $0-12
   504  	JMP	runtime·xadd(SB)
   505  
   506  TEXT runtime·xchg(SB), NOSPLIT, $0-12
   507  	MOVL	ptr+0(FP), BX
   508  	MOVL	new+4(FP), AX
   509  	XCHGL	AX, 0(BX)
   510  	MOVL	AX, ret+8(FP)
   511  	RET
   512  
   513  TEXT runtime·xchg64(SB), NOSPLIT, $0-24
   514  	MOVL	ptr+0(FP), BX
   515  	MOVQ	new+8(FP), AX
   516  	XCHGQ	AX, 0(BX)
   517  	MOVQ	AX, ret+16(FP)
   518  	RET
   519  
   520  TEXT runtime·xchgp1(SB), NOSPLIT, $0-12
   521  	MOVL	ptr+0(FP), BX
   522  	MOVL	new+4(FP), AX
   523  	XCHGL	AX, 0(BX)
   524  	MOVL	AX, ret+8(FP)
   525  	RET
   526  
   527  TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12
   528  	JMP	runtime·xchg(SB)
   529  
   530  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   531  	MOVL	cycles+0(FP), AX
   532  again:
   533  	PAUSE
   534  	SUBL	$1, AX
   535  	JNZ	again
   536  	RET
   537  
   538  TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8
   539  	MOVL	ptr+0(FP), BX
   540  	MOVL	val+4(FP), AX
   541  	XCHGL	AX, 0(BX)
   542  	RET
   543  
   544  TEXT runtime·atomicstore(SB), NOSPLIT, $0-8
   545  	MOVL	ptr+0(FP), BX
   546  	MOVL	val+4(FP), AX
   547  	XCHGL	AX, 0(BX)
   548  	RET
   549  
   550  TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16
   551  	MOVL	ptr+0(FP), BX
   552  	MOVQ	val+8(FP), AX
   553  	XCHGQ	AX, 0(BX)
   554  	RET
   555  
   556  // void	runtime·atomicor8(byte volatile*, byte);
   557  TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
   558  	MOVL	ptr+0(FP), BX
   559  	MOVB	val+4(FP), AX
   560  	LOCK
   561  	ORB	AX, 0(BX)
   562  	RET
   563  
   564  // void	runtime·atomicand8(byte volatile*, byte);
   565  TEXT runtime·atomicand8(SB), NOSPLIT, $0-5
   566  	MOVL	ptr+0(FP), BX
   567  	MOVB	val+4(FP), AX
   568  	LOCK
   569  	ANDB	AX, 0(BX)
   570  	RET
   571  
   572  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   573  	// Stores are already ordered on x86, so this is just a
   574  	// compile barrier.
   575  	RET
   576  
   577  // void jmpdefer(fn, sp);
   578  // called from deferreturn.
   579  // 1. pop the caller
   580  // 2. sub 5 bytes from the callers return
   581  // 3. jmp to the argument
   582  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   583  	MOVL	fv+0(FP), DX
   584  	MOVL	argp+4(FP), BX
   585  	LEAL	-8(BX), SP	// caller sp after CALL
   586  	SUBL	$5, (SP)	// return to CALL again
   587  	MOVL	0(DX), BX
   588  	JMP	BX	// but first run the deferred function
   589  
   590  // func asmcgocall(fn, arg unsafe.Pointer) int32
   591  // Not implemented.
   592  TEXT runtime·asmcgocall(SB),NOSPLIT,$0-12
   593  	MOVL	0, AX
   594  	RET
   595  
   596  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   597  // Not implemented.
   598  TEXT runtime·cgocallback(SB),NOSPLIT,$0-12
   599  	MOVL	0, AX
   600  	RET
   601  
   602  // void setg(G*); set g. for use by needm.
   603  // Not implemented.
   604  TEXT runtime·setg(SB), NOSPLIT, $0-4
   605  	MOVL	0, AX
   606  	RET
   607  
   608  // check that SP is in range [g->stack.lo, g->stack.hi)
   609  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   610  	get_tls(CX)
   611  	MOVL	g(CX), AX
   612  	CMPL	(g_stack+stack_hi)(AX), SP
   613  	JHI	2(PC)
   614  	MOVL	0, AX
   615  	CMPL	SP, (g_stack+stack_lo)(AX)
   616  	JHI	2(PC)
   617  	MOVL	0, AX
   618  	RET
   619  
   620  TEXT runtime·memclr(SB),NOSPLIT,$0-8
   621  	MOVL	ptr+0(FP), DI
   622  	MOVL	n+4(FP), CX
   623  	MOVQ	CX, BX
   624  	ANDQ	$7, BX
   625  	SHRQ	$3, CX
   626  	MOVQ	$0, AX
   627  	CLD
   628  	REP
   629  	STOSQ
   630  	MOVQ	BX, CX
   631  	REP
   632  	STOSB
   633  	RET
   634  
   635  TEXT runtime·getcallerpc(SB),NOSPLIT,$8-12
   636  	MOVL	argp+0(FP),AX		// addr of first arg
   637  	MOVL	-8(AX),AX		// get calling pc
   638  	CMPL	AX, runtime·stackBarrierPC(SB)
   639  	JNE	nobar
   640  	// Get original return PC.
   641  	CALL	runtime·nextBarrierPC(SB)
   642  	MOVL	0(SP), AX
   643  nobar:
   644  	MOVL	AX, ret+8(FP)
   645  	RET
   646  
   647  TEXT runtime·setcallerpc(SB),NOSPLIT,$8-8
   648  	MOVL	argp+0(FP),AX		// addr of first arg
   649  	MOVL	pc+4(FP), BX		// pc to set
   650  	MOVL	-8(AX), CX
   651  	CMPL	CX, runtime·stackBarrierPC(SB)
   652  	JEQ	setbar
   653  	MOVQ	BX, -8(AX)		// set calling pc
   654  	RET
   655  setbar:
   656  	// Set the stack barrier return PC.
   657  	MOVL	BX, 0(SP)
   658  	CALL	runtime·setNextBarrierPC(SB)
   659  	RET
   660  
   661  TEXT runtime·getcallersp(SB),NOSPLIT,$0-12
   662  	MOVL	argp+0(FP), AX
   663  	MOVL	AX, ret+8(FP)
   664  	RET
   665  
   666  // int64 runtime·cputicks(void)
   667  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   668  	RDTSC
   669  	SHLQ	$32, DX
   670  	ADDQ	DX, AX
   671  	MOVQ	AX, ret+0(FP)
   672  	RET
   673  
   674  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   675  // redirects to memhash(p, h, size) using the size
   676  // stored in the closure.
   677  TEXT runtime·memhash_varlen(SB),NOSPLIT,$24-12
   678  	GO_ARGS
   679  	NO_LOCAL_POINTERS
   680  	MOVL	p+0(FP), AX
   681  	MOVL	h+4(FP), BX
   682  	MOVL	4(DX), CX
   683  	MOVL	AX, 0(SP)
   684  	MOVL	BX, 4(SP)
   685  	MOVL	CX, 8(SP)
   686  	CALL	runtime·memhash(SB)
   687  	MOVL	16(SP), AX
   688  	MOVL	AX, ret+8(FP)
   689  	RET
   690  
   691  // hash function using AES hardware instructions
   692  // For now, our one amd64p32 system (NaCl) does not
   693  // support using AES instructions, so have not bothered to
   694  // write the implementations. Can copy and adjust the ones
   695  // in asm_amd64.s when the time comes.
   696  
   697  TEXT runtime·aeshash(SB),NOSPLIT,$0-20
   698  	MOVL	AX, ret+16(FP)
   699  	RET
   700  
   701  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-20
   702  	MOVL	AX, ret+16(FP)
   703  	RET
   704  
   705  TEXT runtime·aeshash32(SB),NOSPLIT,$0-20
   706  	MOVL	AX, ret+16(FP)
   707  	RET
   708  
   709  TEXT runtime·aeshash64(SB),NOSPLIT,$0-20
   710  	MOVL	AX, ret+16(FP)
   711  	RET
   712  
   713  TEXT runtime·memeq(SB),NOSPLIT,$0-17
   714  	MOVL	a+0(FP), SI
   715  	MOVL	b+4(FP), DI
   716  	MOVL	size+8(FP), BX
   717  	CALL	runtime·memeqbody(SB)
   718  	MOVB	AX, ret+16(FP)
   719  	RET
   720  
   721  // memequal_varlen(a, b unsafe.Pointer) bool
   722  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
   723  	MOVL    a+0(FP), SI
   724  	MOVL    b+4(FP), DI
   725  	CMPL    SI, DI
   726  	JEQ     eq
   727  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
   728  	CALL    runtime·memeqbody(SB)
   729  	MOVB    AX, ret+8(FP)
   730  	RET
   731  eq:
   732  	MOVB    $1, ret+8(FP)
   733  	RET
   734  
   735  // eqstring tests whether two strings are equal.
   736  // The compiler guarantees that strings passed
   737  // to eqstring have equal length.
   738  // See runtime_test.go:eqstring_generic for
   739  // equivalent Go code.
   740  TEXT runtime·eqstring(SB),NOSPLIT,$0-17
   741  	MOVL	s1str+0(FP), SI
   742  	MOVL	s2str+8(FP), DI
   743  	CMPL	SI, DI
   744  	JEQ	same
   745  	MOVL	s1len+4(FP), BX
   746  	CALL	runtime·memeqbody(SB)
   747  	MOVB	AX, v+16(FP)
   748  	RET
   749  same:
   750  	MOVB	$1, v+16(FP)
   751  	RET
   752  
   753  // a in SI
   754  // b in DI
   755  // count in BX
   756  TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
   757  	XORQ	AX, AX
   758  
   759  	CMPQ	BX, $8
   760  	JB	small
   761  	
   762  	// 64 bytes at a time using xmm registers
   763  hugeloop:
   764  	CMPQ	BX, $64
   765  	JB	bigloop
   766  	MOVOU	(SI), X0
   767  	MOVOU	(DI), X1
   768  	MOVOU	16(SI), X2
   769  	MOVOU	16(DI), X3
   770  	MOVOU	32(SI), X4
   771  	MOVOU	32(DI), X5
   772  	MOVOU	48(SI), X6
   773  	MOVOU	48(DI), X7
   774  	PCMPEQB	X1, X0
   775  	PCMPEQB	X3, X2
   776  	PCMPEQB	X5, X4
   777  	PCMPEQB	X7, X6
   778  	PAND	X2, X0
   779  	PAND	X6, X4
   780  	PAND	X4, X0
   781  	PMOVMSKB X0, DX
   782  	ADDQ	$64, SI
   783  	ADDQ	$64, DI
   784  	SUBQ	$64, BX
   785  	CMPL	DX, $0xffff
   786  	JEQ	hugeloop
   787  	RET
   788  
   789  	// 8 bytes at a time using 64-bit register
   790  bigloop:
   791  	CMPQ	BX, $8
   792  	JBE	leftover
   793  	MOVQ	(SI), CX
   794  	MOVQ	(DI), DX
   795  	ADDQ	$8, SI
   796  	ADDQ	$8, DI
   797  	SUBQ	$8, BX
   798  	CMPQ	CX, DX
   799  	JEQ	bigloop
   800  	RET
   801  
   802  	// remaining 0-8 bytes
   803  leftover:
   804  	ADDQ	BX, SI
   805  	ADDQ	BX, DI
   806  	MOVQ	-8(SI), CX
   807  	MOVQ	-8(DI), DX
   808  	CMPQ	CX, DX
   809  	SETEQ	AX
   810  	RET
   811  
   812  small:
   813  	CMPQ	BX, $0
   814  	JEQ	equal
   815  
   816  	LEAQ	0(BX*8), CX
   817  	NEGQ	CX
   818  
   819  	CMPB	SI, $0xf8
   820  	JA	si_high
   821  
   822  	// load at SI won't cross a page boundary.
   823  	MOVQ	(SI), SI
   824  	JMP	si_finish
   825  si_high:
   826  	// address ends in 11111xxx.  Load up to bytes we want, move to correct position.
   827  	MOVQ	BX, DX
   828  	ADDQ	SI, DX
   829  	MOVQ	-8(DX), SI
   830  	SHRQ	CX, SI
   831  si_finish:
   832  
   833  	// same for DI.
   834  	CMPB	DI, $0xf8
   835  	JA	di_high
   836  	MOVQ	(DI), DI
   837  	JMP	di_finish
   838  di_high:
   839  	MOVQ	BX, DX
   840  	ADDQ	DI, DX
   841  	MOVQ	-8(DX), DI
   842  	SHRQ	CX, DI
   843  di_finish:
   844  
   845  	SUBQ	SI, DI
   846  	SHLQ	CX, DI
   847  equal:
   848  	SETEQ	AX
   849  	RET
   850  
   851  TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
   852  	MOVL	s1_base+0(FP), SI
   853  	MOVL	s1_len+4(FP), BX
   854  	MOVL	s2_base+8(FP), DI
   855  	MOVL	s2_len+12(FP), DX
   856  	CALL	runtime·cmpbody(SB)
   857  	MOVL	AX, ret+16(FP)
   858  	RET
   859  
   860  TEXT bytes·Compare(SB),NOSPLIT,$0-28
   861  	MOVL	s1+0(FP), SI
   862  	MOVL	s1+4(FP), BX
   863  	MOVL	s2+12(FP), DI
   864  	MOVL	s2+16(FP), DX
   865  	CALL	runtime·cmpbody(SB)
   866  	MOVL	AX, res+24(FP)
   867  	RET
   868  
   869  // input:
   870  //   SI = a
   871  //   DI = b
   872  //   BX = alen
   873  //   DX = blen
   874  // output:
   875  //   AX = 1/0/-1
   876  TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
   877  	CMPQ	SI, DI
   878  	JEQ	allsame
   879  	CMPQ	BX, DX
   880  	MOVQ	DX, R8
   881  	CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
   882  	CMPQ	R8, $8
   883  	JB	small
   884  
   885  loop:
   886  	CMPQ	R8, $16
   887  	JBE	_0through16
   888  	MOVOU	(SI), X0
   889  	MOVOU	(DI), X1
   890  	PCMPEQB X0, X1
   891  	PMOVMSKB X1, AX
   892  	XORQ	$0xffff, AX	// convert EQ to NE
   893  	JNE	diff16	// branch if at least one byte is not equal
   894  	ADDQ	$16, SI
   895  	ADDQ	$16, DI
   896  	SUBQ	$16, R8
   897  	JMP	loop
   898  	
   899  	// AX = bit mask of differences
   900  diff16:
   901  	BSFQ	AX, BX	// index of first byte that differs
   902  	XORQ	AX, AX
   903  	ADDQ	BX, SI
   904  	MOVB	(SI), CX
   905  	ADDQ	BX, DI
   906  	CMPB	CX, (DI)
   907  	SETHI	AX
   908  	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
   909  	RET
   910  
   911  	// 0 through 16 bytes left, alen>=8, blen>=8
   912  _0through16:
   913  	CMPQ	R8, $8
   914  	JBE	_0through8
   915  	MOVQ	(SI), AX
   916  	MOVQ	(DI), CX
   917  	CMPQ	AX, CX
   918  	JNE	diff8
   919  _0through8:
   920  	ADDQ	R8, SI
   921  	ADDQ	R8, DI
   922  	MOVQ	-8(SI), AX
   923  	MOVQ	-8(DI), CX
   924  	CMPQ	AX, CX
   925  	JEQ	allsame
   926  
   927  	// AX and CX contain parts of a and b that differ.
   928  diff8:
   929  	BSWAPQ	AX	// reverse order of bytes
   930  	BSWAPQ	CX
   931  	XORQ	AX, CX
   932  	BSRQ	CX, CX	// index of highest bit difference
   933  	SHRQ	CX, AX	// move a's bit to bottom
   934  	ANDQ	$1, AX	// mask bit
   935  	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
   936  	RET
   937  
   938  	// 0-7 bytes in common
   939  small:
   940  	LEAQ	(R8*8), CX	// bytes left -> bits left
   941  	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
   942  	JEQ	allsame
   943  
   944  	// load bytes of a into high bytes of AX
   945  	CMPB	SI, $0xf8
   946  	JA	si_high
   947  	MOVQ	(SI), SI
   948  	JMP	si_finish
   949  si_high:
   950  	ADDQ	R8, SI
   951  	MOVQ	-8(SI), SI
   952  	SHRQ	CX, SI
   953  si_finish:
   954  	SHLQ	CX, SI
   955  
   956  	// load bytes of b in to high bytes of BX
   957  	CMPB	DI, $0xf8
   958  	JA	di_high
   959  	MOVQ	(DI), DI
   960  	JMP	di_finish
   961  di_high:
   962  	ADDQ	R8, DI
   963  	MOVQ	-8(DI), DI
   964  	SHRQ	CX, DI
   965  di_finish:
   966  	SHLQ	CX, DI
   967  
   968  	BSWAPQ	SI	// reverse order of bytes
   969  	BSWAPQ	DI
   970  	XORQ	SI, DI	// find bit differences
   971  	JEQ	allsame
   972  	BSRQ	DI, CX	// index of highest bit difference
   973  	SHRQ	CX, SI	// move a's bit to bottom
   974  	ANDQ	$1, SI	// mask bit
   975  	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
   976  	RET
   977  
   978  allsame:
   979  	XORQ	AX, AX
   980  	XORQ	CX, CX
   981  	CMPQ	BX, DX
   982  	SETGT	AX	// 1 if alen > blen
   983  	SETEQ	CX	// 1 if alen == blen
   984  	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
   985  	RET
   986  
   987  TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
   988  	MOVL s+0(FP), SI
   989  	MOVL s_len+4(FP), BX
   990  	MOVB c+12(FP), AL
   991  	CALL runtime·indexbytebody(SB)
   992  	MOVL AX, ret+16(FP)
   993  	RET
   994  
   995  TEXT strings·IndexByte(SB),NOSPLIT,$0-20
   996  	MOVL s+0(FP), SI
   997  	MOVL s_len+4(FP), BX
   998  	MOVB c+8(FP), AL
   999  	CALL runtime·indexbytebody(SB)
  1000  	MOVL AX, ret+16(FP)
  1001  	RET
  1002  
  1003  // input:
  1004  //   SI: data
  1005  //   BX: data len
  1006  //   AL: byte sought
  1007  // output:
  1008  //   AX
  1009  TEXT runtime·indexbytebody(SB),NOSPLIT,$0
  1010  	MOVL SI, DI
  1011  
  1012  	CMPL BX, $16
  1013  	JLT small
  1014  
  1015  	// round up to first 16-byte boundary
  1016  	TESTL $15, SI
  1017  	JZ aligned
  1018  	MOVL SI, CX
  1019  	ANDL $~15, CX
  1020  	ADDL $16, CX
  1021  
  1022  	// search the beginning
  1023  	SUBL SI, CX
  1024  	REPN; SCASB
  1025  	JZ success
  1026  
  1027  // DI is 16-byte aligned; get ready to search using SSE instructions
  1028  aligned:
  1029  	// round down to last 16-byte boundary
  1030  	MOVL BX, R11
  1031  	ADDL SI, R11
  1032  	ANDL $~15, R11
  1033  
  1034  	// shuffle X0 around so that each byte contains c
  1035  	MOVD AX, X0
  1036  	PUNPCKLBW X0, X0
  1037  	PUNPCKLBW X0, X0
  1038  	PSHUFL $0, X0, X0
  1039  	JMP condition
  1040  
  1041  sse:
  1042  	// move the next 16-byte chunk of the buffer into X1
  1043  	MOVO (DI), X1
  1044  	// compare bytes in X0 to X1
  1045  	PCMPEQB X0, X1
  1046  	// take the top bit of each byte in X1 and put the result in DX
  1047  	PMOVMSKB X1, DX
  1048  	TESTL DX, DX
  1049  	JNZ ssesuccess
  1050  	ADDL $16, DI
  1051  
  1052  condition:
  1053  	CMPL DI, R11
  1054  	JLT sse
  1055  
  1056  	// search the end
  1057  	MOVL SI, CX
  1058  	ADDL BX, CX
  1059  	SUBL R11, CX
  1060  	// if CX == 0, the zero flag will be set and we'll end up
  1061  	// returning a false success
  1062  	JZ failure
  1063  	REPN; SCASB
  1064  	JZ success
  1065  
  1066  failure:
  1067  	MOVL $-1, AX
  1068  	RET
  1069  
  1070  // handle for lengths < 16
  1071  small:
  1072  	MOVL BX, CX
  1073  	REPN; SCASB
  1074  	JZ success
  1075  	MOVL $-1, AX
  1076  	RET
  1077  
  1078  // we've found the chunk containing the byte
  1079  // now just figure out which specific byte it is
  1080  ssesuccess:
  1081  	// get the index of the least significant set bit
  1082  	BSFW DX, DX
  1083  	SUBL SI, DI
  1084  	ADDL DI, DX
  1085  	MOVL DX, AX
  1086  	RET
  1087  
  1088  success:
  1089  	SUBL SI, DI
  1090  	SUBL $1, DI
  1091  	MOVL DI, AX
  1092  	RET
  1093  
  1094  TEXT bytes·Equal(SB),NOSPLIT,$0-25
  1095  	MOVL	a_len+4(FP), BX
  1096  	MOVL	b_len+16(FP), CX
  1097  	XORL	AX, AX
  1098  	CMPL	BX, CX
  1099  	JNE	eqret
  1100  	MOVL	a+0(FP), SI
  1101  	MOVL	b+12(FP), DI
  1102  	CALL	runtime·memeqbody(SB)
  1103  eqret:
  1104  	MOVB	AX, ret+24(FP)
  1105  	RET
  1106  
  1107  TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
  1108  	get_tls(CX)
  1109  	MOVL	g(CX), AX
  1110  	MOVL	g_m(AX), AX
  1111  	MOVL	m_fastrand(AX), DX
  1112  	ADDL	DX, DX
  1113  	MOVL	DX, BX
  1114  	XORL	$0x88888eef, DX
  1115  	CMOVLMI	BX, DX
  1116  	MOVL	DX, m_fastrand(AX)
  1117  	MOVL	DX, ret+0(FP)
  1118  	RET
  1119  
  1120  TEXT runtime·return0(SB), NOSPLIT, $0
  1121  	MOVL	$0, AX
  1122  	RET
  1123  
  1124  // The top-most function running on a goroutine
  1125  // returns to goexit+PCQuantum.
  1126  TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1127  	BYTE	$0x90	// NOP
  1128  	CALL	runtime·goexit1(SB)	// does not return
  1129  	// traceback from goexit1 must hit code range of goexit
  1130  	BYTE	$0x90	// NOP
  1131  
  1132  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
  1133  	MOVL	addr+0(FP), AX
  1134  	PREFETCHT0	(AX)
  1135  	RET
  1136  
  1137  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
  1138  	MOVL	addr+0(FP), AX
  1139  	PREFETCHT1	(AX)
  1140  	RET
  1141  
  1142  
  1143  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
  1144  	MOVL	addr+0(FP), AX
  1145  	PREFETCHT2	(AX)
  1146  	RET
  1147  
  1148  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
  1149  	MOVL	addr+0(FP), AX
  1150  	PREFETCHNTA	(AX)
  1151  	RET