github.com/reiver/go@v0.0.0-20150109200633-1d0c7792f172/src/runtime/asm_amd64p32.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// copy arguments forward on an even stack
    12  	MOVL	argc+0(FP), AX
    13  	MOVL	argv+4(FP), BX
    14  	MOVL	SP, CX
    15  	SUBL	$128, SP		// plenty of scratch
    16  	ANDL	$~15, CX
    17  	MOVL	CX, SP
    18  
    19  	MOVL	AX, 16(SP)
    20  	MOVL	BX, 24(SP)
    21  	
    22  	// create istack out of the given (operating system) stack.
    23  	MOVL	$runtime·g0(SB), DI
    24  	LEAL	(-64*1024+104)(SP), BX
    25  	MOVL	BX, g_stackguard0(DI)
    26  	MOVL	BX, g_stackguard1(DI)
    27  	MOVL	BX, (g_stack+stack_lo)(DI)
    28  	MOVL	SP, (g_stack+stack_hi)(DI)
    29  
    30  	// find out information about the processor we're on
    31  	MOVQ	$0, AX
    32  	CPUID
    33  	CMPQ	AX, $0
    34  	JE	nocpuinfo
    35  	MOVQ	$1, AX
    36  	CPUID
    37  	MOVL	CX, runtime·cpuid_ecx(SB)
    38  	MOVL	DX, runtime·cpuid_edx(SB)
    39  nocpuinfo:	
    40  	
    41  needtls:
    42  	LEAL	runtime·tls0(SB), DI
    43  	CALL	runtime·settls(SB)
    44  
    45  	// store through it, to make sure it works
    46  	get_tls(BX)
    47  	MOVQ	$0x123, g(BX)
    48  	MOVQ	runtime·tls0(SB), AX
    49  	CMPQ	AX, $0x123
    50  	JEQ 2(PC)
    51  	MOVL	AX, 0	// abort
    52  ok:
    53  	// set the per-goroutine and per-mach "registers"
    54  	get_tls(BX)
    55  	LEAL	runtime·g0(SB), CX
    56  	MOVL	CX, g(BX)
    57  	LEAL	runtime·m0(SB), AX
    58  
    59  	// save m->g0 = g0
    60  	MOVL	CX, m_g0(AX)
    61  	// save m0 to g0->m
    62  	MOVL	AX, g_m(CX)
    63  
    64  	CLD				// convention is D is always left cleared
    65  	CALL	runtime·check(SB)
    66  
    67  	MOVL	16(SP), AX		// copy argc
    68  	MOVL	AX, 0(SP)
    69  	MOVL	24(SP), AX		// copy argv
    70  	MOVL	AX, 4(SP)
    71  	CALL	runtime·args(SB)
    72  	CALL	runtime·osinit(SB)
    73  	CALL	runtime·schedinit(SB)
    74  
    75  	// create a new goroutine to start program
    76  	MOVL	$runtime·main·f(SB), AX	// entry
    77  	MOVL	$0, 0(SP)
    78  	MOVL	AX, 4(SP)
    79  	CALL	runtime·newproc(SB)
    80  
    81  	// start this M
    82  	CALL	runtime·mstart(SB)
    83  
    84  	MOVL	$0xf1, 0xf1  // crash
    85  	RET
    86  
    87  DATA	runtime·main·f+0(SB)/4,$runtime·main(SB)
    88  GLOBL	runtime·main·f(SB),RODATA,$4
    89  
    90  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
    91  	INT $3
    92  	RET
    93  
    94  TEXT runtime·asminit(SB),NOSPLIT,$0-0
    95  	// No per-thread init.
    96  	RET
    97  
    98  /*
    99   *  go-routine
   100   */
   101  
   102  // void gosave(Gobuf*)
   103  // save state in Gobuf; setjmp
   104  TEXT runtime·gosave(SB), NOSPLIT, $0-4
   105  	MOVL	buf+0(FP), AX	// gobuf
   106  	LEAL	buf+0(FP), BX	// caller's SP
   107  	MOVL	BX, gobuf_sp(AX)
   108  	MOVL	0(SP), BX		// caller's PC
   109  	MOVL	BX, gobuf_pc(AX)
   110  	MOVL	$0, gobuf_ctxt(AX)
   111  	MOVQ	$0, gobuf_ret(AX)
   112  	get_tls(CX)
   113  	MOVL	g(CX), BX
   114  	MOVL	BX, gobuf_g(AX)
   115  	RET
   116  
   117  // void gogo(Gobuf*)
   118  // restore state from Gobuf; longjmp
   119  TEXT runtime·gogo(SB), NOSPLIT, $0-4
   120  	MOVL	buf+0(FP), BX		// gobuf
   121  	MOVL	gobuf_g(BX), DX
   122  	MOVL	0(DX), CX		// make sure g != nil
   123  	get_tls(CX)
   124  	MOVL	DX, g(CX)
   125  	MOVL	gobuf_sp(BX), SP	// restore SP
   126  	MOVL	gobuf_ctxt(BX), DX
   127  	MOVQ	gobuf_ret(BX), AX
   128  	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   129  	MOVQ	$0, gobuf_ret(BX)
   130  	MOVL	$0, gobuf_ctxt(BX)
   131  	MOVL	gobuf_pc(BX), BX
   132  	JMP	BX
   133  
   134  // func mcall(fn func(*g))
   135  // Switch to m->g0's stack, call fn(g).
   136  // Fn must never return.  It should gogo(&g->sched)
   137  // to keep running g.
   138  TEXT runtime·mcall(SB), NOSPLIT, $0-4
   139  	MOVL	fn+0(FP), DI
   140  	
   141  	get_tls(CX)
   142  	MOVL	g(CX), AX	// save state in g->sched
   143  	MOVL	0(SP), BX	// caller's PC
   144  	MOVL	BX, (g_sched+gobuf_pc)(AX)
   145  	LEAL	fn+0(FP), BX	// caller's SP
   146  	MOVL	BX, (g_sched+gobuf_sp)(AX)
   147  	MOVL	AX, (g_sched+gobuf_g)(AX)
   148  
   149  	// switch to m->g0 & its stack, call fn
   150  	MOVL	g(CX), BX
   151  	MOVL	g_m(BX), BX
   152  	MOVL	m_g0(BX), SI
   153  	CMPL	SI, AX	// if g == m->g0 call badmcall
   154  	JNE	3(PC)
   155  	MOVL	$runtime·badmcall(SB), AX
   156  	JMP	AX
   157  	MOVL	SI, g(CX)	// g = m->g0
   158  	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   159  	PUSHQ	AX
   160  	MOVL	DI, DX
   161  	MOVL	0(DI), DI
   162  	CALL	DI
   163  	POPQ	AX
   164  	MOVL	$runtime·badmcall2(SB), AX
   165  	JMP	AX
   166  	RET
   167  
   168  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   169  // of the G stack.  We need to distinguish the routine that
   170  // lives at the bottom of the G stack from the one that lives
   171  // at the top of the system stack because the one at the top of
   172  // the system stack terminates the stack walk (see topofstack()).
   173  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   174  	RET
   175  
   176  // func systemstack(fn func())
   177  TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   178  	MOVL	fn+0(FP), DI	// DI = fn
   179  	get_tls(CX)
   180  	MOVL	g(CX), AX	// AX = g
   181  	MOVL	g_m(AX), BX	// BX = m
   182  
   183  	MOVL	m_gsignal(BX), DX	// DX = gsignal
   184  	CMPL	AX, DX
   185  	JEQ	noswitch
   186  
   187  	MOVL	m_g0(BX), DX	// DX = g0
   188  	CMPL	AX, DX
   189  	JEQ	noswitch
   190  
   191  	MOVL	m_curg(BX), R8
   192  	CMPL	AX, R8
   193  	JEQ	switch
   194  	
   195  	// Not g0, not curg. Must be gsignal, but that's not allowed.
   196  	// Hide call from linker nosplit analysis.
   197  	MOVL	$runtime·badsystemstack(SB), AX
   198  	CALL	AX
   199  
   200  switch:
   201  	// save our state in g->sched.  Pretend to
   202  	// be systemstack_switch if the G stack is scanned.
   203  	MOVL	$runtime·systemstack_switch(SB), SI
   204  	MOVL	SI, (g_sched+gobuf_pc)(AX)
   205  	MOVL	SP, (g_sched+gobuf_sp)(AX)
   206  	MOVL	AX, (g_sched+gobuf_g)(AX)
   207  
   208  	// switch to g0
   209  	MOVL	DX, g(CX)
   210  	MOVL	(g_sched+gobuf_sp)(DX), SP
   211  
   212  	// call target function
   213  	MOVL	DI, DX
   214  	MOVL	0(DI), DI
   215  	CALL	DI
   216  
   217  	// switch back to g
   218  	get_tls(CX)
   219  	MOVL	g(CX), AX
   220  	MOVL	g_m(AX), BX
   221  	MOVL	m_curg(BX), AX
   222  	MOVL	AX, g(CX)
   223  	MOVL	(g_sched+gobuf_sp)(AX), SP
   224  	MOVL	$0, (g_sched+gobuf_sp)(AX)
   225  	RET
   226  
   227  noswitch:
   228  	// already on m stack, just call directly
   229  	MOVL	DI, DX
   230  	MOVL	0(DI), DI
   231  	CALL	DI
   232  	RET
   233  
   234  /*
   235   * support for morestack
   236   */
   237  
   238  // Called during function prolog when more stack is needed.
   239  //
   240  // The traceback routines see morestack on a g0 as being
   241  // the top of a stack (for example, morestack calling newstack
   242  // calling the scheduler calling newm calling gc), so we must
   243  // record an argument size. For that purpose, it has no arguments.
   244  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   245  	get_tls(CX)
   246  	MOVL	g(CX), BX
   247  	MOVL	g_m(BX), BX
   248  
   249  	// Cannot grow scheduler stack (m->g0).
   250  	MOVL	m_g0(BX), SI
   251  	CMPL	g(CX), SI
   252  	JNE	2(PC)
   253  	MOVL	0, AX
   254  
   255  	// Cannot grow signal stack (m->gsignal).
   256  	MOVL	m_gsignal(BX), SI
   257  	CMPL	g(CX), SI
   258  	JNE	2(PC)
   259  	MOVL	0, AX
   260  
   261  	// Called from f.
   262  	// Set m->morebuf to f's caller.
   263  	MOVL	8(SP), AX	// f's caller's PC
   264  	MOVL	AX, (m_morebuf+gobuf_pc)(BX)
   265  	LEAL	16(SP), AX	// f's caller's SP
   266  	MOVL	AX, (m_morebuf+gobuf_sp)(BX)
   267  	get_tls(CX)
   268  	MOVL	g(CX), SI
   269  	MOVL	SI, (m_morebuf+gobuf_g)(BX)
   270  
   271  	// Set g->sched to context in f.
   272  	MOVL	0(SP), AX // f's PC
   273  	MOVL	AX, (g_sched+gobuf_pc)(SI)
   274  	MOVL	SI, (g_sched+gobuf_g)(SI)
   275  	LEAL	8(SP), AX // f's SP
   276  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   277  	MOVL	DX, (g_sched+gobuf_ctxt)(SI)
   278  
   279  	// Call newstack on m->g0's stack.
   280  	MOVL	m_g0(BX), BX
   281  	MOVL	BX, g(CX)
   282  	MOVL	(g_sched+gobuf_sp)(BX), SP
   283  	CALL	runtime·newstack(SB)
   284  	MOVL	$0, 0x1003	// crash if newstack returns
   285  	RET
   286  
   287  // morestack trampolines
   288  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   289  	MOVL	$0, DX
   290  	JMP	runtime·morestack(SB)
   291  
   292  // reflectcall: call a function with the given argument list
   293  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   294  // we don't have variable-sized frames, so we use a small number
   295  // of constant-sized-frame functions to encode a few bits of size in the pc.
   296  // Caution: ugly multiline assembly macros in your future!
   297  
   298  #define DISPATCH(NAME,MAXSIZE)		\
   299  	CMPL	CX, $MAXSIZE;		\
   300  	JA	3(PC);			\
   301  	MOVL	$NAME(SB), AX;		\
   302  	JMP	AX
   303  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   304  
   305  TEXT reflect·call(SB), NOSPLIT, $0-0
   306  	JMP	·reflectcall(SB)
   307  
   308  TEXT ·reflectcall(SB), NOSPLIT, $0-20
   309  	MOVLQZX argsize+12(FP), CX
   310  	DISPATCH(runtime·call16, 16)
   311  	DISPATCH(runtime·call32, 32)
   312  	DISPATCH(runtime·call64, 64)
   313  	DISPATCH(runtime·call128, 128)
   314  	DISPATCH(runtime·call256, 256)
   315  	DISPATCH(runtime·call512, 512)
   316  	DISPATCH(runtime·call1024, 1024)
   317  	DISPATCH(runtime·call2048, 2048)
   318  	DISPATCH(runtime·call4096, 4096)
   319  	DISPATCH(runtime·call8192, 8192)
   320  	DISPATCH(runtime·call16384, 16384)
   321  	DISPATCH(runtime·call32768, 32768)
   322  	DISPATCH(runtime·call65536, 65536)
   323  	DISPATCH(runtime·call131072, 131072)
   324  	DISPATCH(runtime·call262144, 262144)
   325  	DISPATCH(runtime·call524288, 524288)
   326  	DISPATCH(runtime·call1048576, 1048576)
   327  	DISPATCH(runtime·call2097152, 2097152)
   328  	DISPATCH(runtime·call4194304, 4194304)
   329  	DISPATCH(runtime·call8388608, 8388608)
   330  	DISPATCH(runtime·call16777216, 16777216)
   331  	DISPATCH(runtime·call33554432, 33554432)
   332  	DISPATCH(runtime·call67108864, 67108864)
   333  	DISPATCH(runtime·call134217728, 134217728)
   334  	DISPATCH(runtime·call268435456, 268435456)
   335  	DISPATCH(runtime·call536870912, 536870912)
   336  	DISPATCH(runtime·call1073741824, 1073741824)
   337  	MOVL	$runtime·badreflectcall(SB), AX
   338  	JMP	AX
   339  
   340  #define CALLFN(NAME,MAXSIZE)			\
   341  TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   342  	NO_LOCAL_POINTERS;			\
   343  	/* copy arguments to stack */		\
   344  	MOVL	argptr+8(FP), SI;		\
   345  	MOVL	argsize+12(FP), CX;		\
   346  	MOVL	SP, DI;				\
   347  	REP;MOVSB;				\
   348  	/* call function */			\
   349  	MOVL	f+4(FP), DX;			\
   350  	MOVL	(DX), AX;			\
   351  	CALL	AX;				\
   352  	/* copy return values back */		\
   353  	MOVL	argptr+8(FP), DI;		\
   354  	MOVL	argsize+12(FP), CX;		\
   355  	MOVL	retoffset+16(FP), BX;		\
   356  	MOVL	SP, SI;				\
   357  	ADDL	BX, DI;				\
   358  	ADDL	BX, SI;				\
   359  	SUBL	BX, CX;				\
   360  	REP;MOVSB;				\
   361  	/* execute write barrier updates */	\
   362  	MOVL	argtype+0(FP), DX;		\
   363  	MOVL	argptr+8(FP), DI;		\
   364  	MOVL	argsize+12(FP), CX;		\
   365  	MOVL	retoffset+16(FP), BX;		\
   366  	MOVL	DX, 0(SP);			\
   367  	MOVL	DI, 4(SP);			\
   368  	MOVL	CX, 8(SP);			\
   369  	MOVL	BX, 12(SP);			\
   370  	CALL	runtime·callwritebarrier(SB);	\
   371  	RET
   372  
   373  CALLFN(·call16, 16)
   374  CALLFN(·call32, 32)
   375  CALLFN(·call64, 64)
   376  CALLFN(·call128, 128)
   377  CALLFN(·call256, 256)
   378  CALLFN(·call512, 512)
   379  CALLFN(·call1024, 1024)
   380  CALLFN(·call2048, 2048)
   381  CALLFN(·call4096, 4096)
   382  CALLFN(·call8192, 8192)
   383  CALLFN(·call16384, 16384)
   384  CALLFN(·call32768, 32768)
   385  CALLFN(·call65536, 65536)
   386  CALLFN(·call131072, 131072)
   387  CALLFN(·call262144, 262144)
   388  CALLFN(·call524288, 524288)
   389  CALLFN(·call1048576, 1048576)
   390  CALLFN(·call2097152, 2097152)
   391  CALLFN(·call4194304, 4194304)
   392  CALLFN(·call8388608, 8388608)
   393  CALLFN(·call16777216, 16777216)
   394  CALLFN(·call33554432, 33554432)
   395  CALLFN(·call67108864, 67108864)
   396  CALLFN(·call134217728, 134217728)
   397  CALLFN(·call268435456, 268435456)
   398  CALLFN(·call536870912, 536870912)
   399  CALLFN(·call1073741824, 1073741824)
   400  
   401  // bool cas(int32 *val, int32 old, int32 new)
   402  // Atomically:
   403  //	if(*val == old){
   404  //		*val = new;
   405  //		return 1;
   406  //	} else
   407  //		return 0;
   408  TEXT runtime·cas(SB), NOSPLIT, $0-17
   409  	MOVL	ptr+0(FP), BX
   410  	MOVL	old+4(FP), AX
   411  	MOVL	new+8(FP), CX
   412  	LOCK
   413  	CMPXCHGL	CX, 0(BX)
   414  	SETEQ	ret+16(FP)
   415  	RET
   416  
   417  TEXT runtime·casuintptr(SB), NOSPLIT, $0-17
   418  	JMP	runtime·cas(SB)
   419  
   420  TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-12
   421  	JMP	runtime·atomicload(SB)
   422  
   423  TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-12
   424  	JMP	runtime·atomicload(SB)
   425  
   426  TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-12
   427  	JMP	runtime·atomicstore(SB)
   428  
   429  // bool	runtime·cas64(uint64 *val, uint64 old, uint64 new)
   430  // Atomically:
   431  //	if(*val == *old){
   432  //		*val = new;
   433  //		return 1;
   434  //	} else {
   435  //		return 0;
   436  //	}
   437  TEXT runtime·cas64(SB), NOSPLIT, $0-25
   438  	MOVL	ptr+0(FP), BX
   439  	MOVQ	old+8(FP), AX
   440  	MOVQ	new+16(FP), CX
   441  	LOCK
   442  	CMPXCHGQ	CX, 0(BX)
   443  	SETEQ	ret+24(FP)
   444  	RET
   445  
   446  // bool casp(void **val, void *old, void *new)
   447  // Atomically:
   448  //	if(*val == old){
   449  //		*val = new;
   450  //		return 1;
   451  //	} else
   452  //		return 0;
   453  TEXT runtime·casp1(SB), NOSPLIT, $0-17
   454  	MOVL	ptr+0(FP), BX
   455  	MOVL	old+4(FP), AX
   456  	MOVL	new+8(FP), CX
   457  	LOCK
   458  	CMPXCHGL	CX, 0(BX)
   459  	SETEQ	ret+16(FP)
   460  	RET
   461  
   462  // uint32 xadd(uint32 volatile *val, int32 delta)
   463  // Atomically:
   464  //	*val += delta;
   465  //	return *val;
   466  TEXT runtime·xadd(SB), NOSPLIT, $0-12
   467  	MOVL	ptr+0(FP), BX
   468  	MOVL	delta+4(FP), AX
   469  	MOVL	AX, CX
   470  	LOCK
   471  	XADDL	AX, 0(BX)
   472  	ADDL	CX, AX
   473  	MOVL	AX, ret+8(FP)
   474  	RET
   475  
   476  TEXT runtime·xadd64(SB), NOSPLIT, $0-24
   477  	MOVL	ptr+0(FP), BX
   478  	MOVQ	delta+8(FP), AX
   479  	MOVQ	AX, CX
   480  	LOCK
   481  	XADDQ	AX, 0(BX)
   482  	ADDQ	CX, AX
   483  	MOVQ	AX, ret+16(FP)
   484  	RET
   485  
   486  TEXT runtime·xchg(SB), NOSPLIT, $0-12
   487  	MOVL	ptr+0(FP), BX
   488  	MOVL	new+4(FP), AX
   489  	XCHGL	AX, 0(BX)
   490  	MOVL	AX, ret+8(FP)
   491  	RET
   492  
   493  TEXT runtime·xchg64(SB), NOSPLIT, $0-24
   494  	MOVL	ptr+0(FP), BX
   495  	MOVQ	new+8(FP), AX
   496  	XCHGQ	AX, 0(BX)
   497  	MOVQ	AX, ret+16(FP)
   498  	RET
   499  
   500  TEXT runtime·xchgp1(SB), NOSPLIT, $0-12
   501  	MOVL	ptr+0(FP), BX
   502  	MOVL	new+4(FP), AX
   503  	XCHGL	AX, 0(BX)
   504  	MOVL	AX, ret+8(FP)
   505  	RET
   506  
   507  TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12
   508  	JMP	runtime·xchg(SB)
   509  
   510  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   511  	MOVL	cycles+0(FP), AX
   512  again:
   513  	PAUSE
   514  	SUBL	$1, AX
   515  	JNZ	again
   516  	RET
   517  
   518  TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8
   519  	MOVL	ptr+0(FP), BX
   520  	MOVL	val+4(FP), AX
   521  	XCHGL	AX, 0(BX)
   522  	RET
   523  
   524  TEXT runtime·atomicstore(SB), NOSPLIT, $0-8
   525  	MOVL	ptr+0(FP), BX
   526  	MOVL	val+4(FP), AX
   527  	XCHGL	AX, 0(BX)
   528  	RET
   529  
   530  TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16
   531  	MOVL	ptr+0(FP), BX
   532  	MOVQ	val+8(FP), AX
   533  	XCHGQ	AX, 0(BX)
   534  	RET
   535  
   536  // void	runtime·atomicor8(byte volatile*, byte);
   537  TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
   538  	MOVL	ptr+0(FP), BX
   539  	MOVB	val+4(FP), AX
   540  	LOCK
   541  	ORB	AX, 0(BX)
   542  	RET
   543  
   544  // void jmpdefer(fn, sp);
   545  // called from deferreturn.
   546  // 1. pop the caller
   547  // 2. sub 5 bytes from the callers return
   548  // 3. jmp to the argument
   549  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   550  	MOVL	fv+0(FP), DX
   551  	MOVL	argp+4(FP), BX
   552  	LEAL	-8(BX), SP	// caller sp after CALL
   553  	SUBL	$5, (SP)	// return to CALL again
   554  	MOVL	0(DX), BX
   555  	JMP	BX	// but first run the deferred function
   556  
   557  // asmcgocall(void(*fn)(void*), void *arg)
   558  // Not implemented.
   559  TEXT runtime·asmcgocall(SB),NOSPLIT,$0-8
   560  	MOVL	0, AX
   561  	RET
   562  
   563  // asmcgocall(void(*fn)(void*), void *arg)
   564  // Not implemented.
   565  TEXT runtime·asmcgocall_errno(SB),NOSPLIT,$0-12
   566  	MOVL	0, AX
   567  	RET
   568  
   569  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   570  // Not implemented.
   571  TEXT runtime·cgocallback(SB),NOSPLIT,$0-12
   572  	MOVL	0, AX
   573  	RET
   574  
   575  // void setg(G*); set g. for use by needm.
   576  // Not implemented.
   577  TEXT runtime·setg(SB), NOSPLIT, $0-4
   578  	MOVL	0, AX
   579  	RET
   580  
   581  // check that SP is in range [g->stack.lo, g->stack.hi)
   582  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   583  	get_tls(CX)
   584  	MOVL	g(CX), AX
   585  	CMPL	(g_stack+stack_hi)(AX), SP
   586  	JHI	2(PC)
   587  	MOVL	0, AX
   588  	CMPL	SP, (g_stack+stack_lo)(AX)
   589  	JHI	2(PC)
   590  	MOVL	0, AX
   591  	RET
   592  
   593  TEXT runtime·memclr(SB),NOSPLIT,$0-8
   594  	MOVL	ptr+0(FP), DI
   595  	MOVL	n+4(FP), CX
   596  	MOVQ	CX, BX
   597  	ANDQ	$7, BX
   598  	SHRQ	$3, CX
   599  	MOVQ	$0, AX
   600  	CLD
   601  	REP
   602  	STOSQ
   603  	MOVQ	BX, CX
   604  	REP
   605  	STOSB
   606  	RET
   607  
   608  TEXT runtime·getcallerpc(SB),NOSPLIT,$0-12
   609  	MOVL	argp+0(FP),AX		// addr of first arg
   610  	MOVL	-8(AX),AX		// get calling pc
   611  	MOVL	AX, ret+8(FP)
   612  	RET
   613  
   614  TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-12
   615  	MOVL	p+0(FP),AX		// addr of first arg
   616  	MOVL	-8(AX),AX		// get calling pc
   617  	MOVL	AX, ret+8(FP)
   618  	RET
   619  
   620  TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8
   621  	MOVL	argp+0(FP),AX		// addr of first arg
   622  	MOVL	pc+4(FP), BX		// pc to set
   623  	MOVQ	BX, -8(AX)		// set calling pc
   624  	RET
   625  
   626  TEXT runtime·getcallersp(SB),NOSPLIT,$0-12
   627  	MOVL	argp+0(FP), AX
   628  	MOVL	AX, ret+8(FP)
   629  	RET
   630  
   631  // func gogetcallersp(p unsafe.Pointer) uintptr
   632  TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-12
   633  	MOVL	p+0(FP),AX		// addr of first arg
   634  	MOVL	AX, ret+8(FP)
   635  	RET
   636  
   637  // int64 runtime·cputicks(void)
   638  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   639  	RDTSC
   640  	SHLQ	$32, DX
   641  	ADDQ	DX, AX
   642  	MOVQ	AX, ret+0(FP)
   643  	RET
   644  
   645  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   646  // redirects to memhash(p, h, size) using the size
   647  // stored in the closure.
   648  TEXT runtime·memhash_varlen(SB),NOSPLIT,$20-12
   649  	GO_ARGS
   650  	NO_LOCAL_POINTERS
   651  	MOVL	p+0(FP), AX
   652  	MOVL	h+4(FP), BX
   653  	MOVL	4(DX), CX
   654  	MOVL	AX, 0(SP)
   655  	MOVL	BX, 4(SP)
   656  	MOVL	CX, 8(SP)
   657  	CALL	runtime·memhash(SB)
   658  	MOVL	16(SP), AX
   659  	MOVL	AX, ret+8(FP)
   660  	RET
   661  
   662  // hash function using AES hardware instructions
   663  // For now, our one amd64p32 system (NaCl) does not
   664  // support using AES instructions, so have not bothered to
   665  // write the implementations. Can copy and adjust the ones
   666  // in asm_amd64.s when the time comes.
   667  
   668  TEXT runtime·aeshash(SB),NOSPLIT,$0-20
   669  	MOVL	AX, ret+16(FP)
   670  	RET
   671  
   672  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-20
   673  	MOVL	AX, ret+16(FP)
   674  	RET
   675  
   676  TEXT runtime·aeshash32(SB),NOSPLIT,$0-20
   677  	MOVL	AX, ret+16(FP)
   678  	RET
   679  
   680  TEXT runtime·aeshash64(SB),NOSPLIT,$0-20
   681  	MOVL	AX, ret+16(FP)
   682  	RET
   683  
   684  TEXT runtime·memeq(SB),NOSPLIT,$0-17
   685  	MOVL	a+0(FP), SI
   686  	MOVL	b+4(FP), DI
   687  	MOVL	size+8(FP), BX
   688  	CALL	runtime·memeqbody(SB)
   689  	MOVB	AX, ret+16(FP)
   690  	RET
   691  
   692  // memequal_varlen(a, b unsafe.Pointer) bool
   693  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
   694  	MOVL    a+0(FP), SI
   695  	MOVL    b+4(FP), DI
   696  	CMPL    SI, DI
   697  	JEQ     eq
   698  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
   699  	CALL    runtime·memeqbody(SB)
   700  	MOVB    AX, ret+8(FP)
   701  	RET
   702  eq:
   703  	MOVB    $1, ret+8(FP)
   704  	RET
   705  
   706  // eqstring tests whether two strings are equal.
   707  // See runtime_test.go:eqstring_generic for
   708  // equivalent Go code.
   709  TEXT runtime·eqstring(SB),NOSPLIT,$0-17
   710  	MOVL	s1len+4(FP), AX
   711  	MOVL	s2len+12(FP), BX
   712  	CMPL	AX, BX
   713  	JNE	different
   714  	MOVL	s1str+0(FP), SI
   715  	MOVL	s2str+8(FP), DI
   716  	CMPL	SI, DI
   717  	JEQ	same
   718  	CALL	runtime·memeqbody(SB)
   719  	MOVB	AX, v+16(FP)
   720  	RET
   721  same:
   722  	MOVB	$1, v+16(FP)
   723  	RET
   724  different:
   725  	MOVB	$0, v+16(FP)
   726  	RET
   727  
   728  // a in SI
   729  // b in DI
   730  // count in BX
   731  TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
   732  	XORQ	AX, AX
   733  
   734  	CMPQ	BX, $8
   735  	JB	small
   736  	
   737  	// 64 bytes at a time using xmm registers
   738  hugeloop:
   739  	CMPQ	BX, $64
   740  	JB	bigloop
   741  	MOVOU	(SI), X0
   742  	MOVOU	(DI), X1
   743  	MOVOU	16(SI), X2
   744  	MOVOU	16(DI), X3
   745  	MOVOU	32(SI), X4
   746  	MOVOU	32(DI), X5
   747  	MOVOU	48(SI), X6
   748  	MOVOU	48(DI), X7
   749  	PCMPEQB	X1, X0
   750  	PCMPEQB	X3, X2
   751  	PCMPEQB	X5, X4
   752  	PCMPEQB	X7, X6
   753  	PAND	X2, X0
   754  	PAND	X6, X4
   755  	PAND	X4, X0
   756  	PMOVMSKB X0, DX
   757  	ADDQ	$64, SI
   758  	ADDQ	$64, DI
   759  	SUBQ	$64, BX
   760  	CMPL	DX, $0xffff
   761  	JEQ	hugeloop
   762  	RET
   763  
   764  	// 8 bytes at a time using 64-bit register
   765  bigloop:
   766  	CMPQ	BX, $8
   767  	JBE	leftover
   768  	MOVQ	(SI), CX
   769  	MOVQ	(DI), DX
   770  	ADDQ	$8, SI
   771  	ADDQ	$8, DI
   772  	SUBQ	$8, BX
   773  	CMPQ	CX, DX
   774  	JEQ	bigloop
   775  	RET
   776  
   777  	// remaining 0-8 bytes
   778  leftover:
   779  	ADDQ	BX, SI
   780  	ADDQ	BX, DI
   781  	MOVQ	-8(SI), CX
   782  	MOVQ	-8(DI), DX
   783  	CMPQ	CX, DX
   784  	SETEQ	AX
   785  	RET
   786  
   787  small:
   788  	CMPQ	BX, $0
   789  	JEQ	equal
   790  
   791  	LEAQ	0(BX*8), CX
   792  	NEGQ	CX
   793  
   794  	CMPB	SI, $0xf8
   795  	JA	si_high
   796  
   797  	// load at SI won't cross a page boundary.
   798  	MOVQ	(SI), SI
   799  	JMP	si_finish
   800  si_high:
   801  	// address ends in 11111xxx.  Load up to bytes we want, move to correct position.
   802  	MOVQ	BX, DX
   803  	ADDQ	SI, DX
   804  	MOVQ	-8(DX), SI
   805  	SHRQ	CX, SI
   806  si_finish:
   807  
   808  	// same for DI.
   809  	CMPB	DI, $0xf8
   810  	JA	di_high
   811  	MOVQ	(DI), DI
   812  	JMP	di_finish
   813  di_high:
   814  	MOVQ	BX, DX
   815  	ADDQ	DI, DX
   816  	MOVQ	-8(DX), DI
   817  	SHRQ	CX, DI
   818  di_finish:
   819  
   820  	SUBQ	SI, DI
   821  	SHLQ	CX, DI
   822  equal:
   823  	SETEQ	AX
   824  	RET
   825  
   826  TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
   827  	MOVL	s1_base+0(FP), SI
   828  	MOVL	s1_len+4(FP), BX
   829  	MOVL	s2_base+8(FP), DI
   830  	MOVL	s2_len+12(FP), DX
   831  	CALL	runtime·cmpbody(SB)
   832  	MOVL	AX, ret+16(FP)
   833  	RET
   834  
   835  TEXT bytes·Compare(SB),NOSPLIT,$0-28
   836  	MOVL	s1+0(FP), SI
   837  	MOVL	s1+4(FP), BX
   838  	MOVL	s2+12(FP), DI
   839  	MOVL	s2+16(FP), DX
   840  	CALL	runtime·cmpbody(SB)
   841  	MOVQ	AX, res+24(FP)
   842  	RET
   843  
   844  // input:
   845  //   SI = a
   846  //   DI = b
   847  //   BX = alen
   848  //   DX = blen
   849  // output:
   850  //   AX = 1/0/-1
   851  TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
   852  	CMPQ	SI, DI
   853  	JEQ	allsame
   854  	CMPQ	BX, DX
   855  	MOVQ	DX, R8
   856  	CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
   857  	CMPQ	R8, $8
   858  	JB	small
   859  
   860  loop:
   861  	CMPQ	R8, $16
   862  	JBE	_0through16
   863  	MOVOU	(SI), X0
   864  	MOVOU	(DI), X1
   865  	PCMPEQB X0, X1
   866  	PMOVMSKB X1, AX
   867  	XORQ	$0xffff, AX	// convert EQ to NE
   868  	JNE	diff16	// branch if at least one byte is not equal
   869  	ADDQ	$16, SI
   870  	ADDQ	$16, DI
   871  	SUBQ	$16, R8
   872  	JMP	loop
   873  	
   874  	// AX = bit mask of differences
   875  diff16:
   876  	BSFQ	AX, BX	// index of first byte that differs
   877  	XORQ	AX, AX
   878  	ADDQ	BX, SI
   879  	MOVB	(SI), CX
   880  	ADDQ	BX, DI
   881  	CMPB	CX, (DI)
   882  	SETHI	AX
   883  	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
   884  	RET
   885  
   886  	// 0 through 16 bytes left, alen>=8, blen>=8
   887  _0through16:
   888  	CMPQ	R8, $8
   889  	JBE	_0through8
   890  	MOVQ	(SI), AX
   891  	MOVQ	(DI), CX
   892  	CMPQ	AX, CX
   893  	JNE	diff8
   894  _0through8:
   895  	ADDQ	R8, SI
   896  	ADDQ	R8, DI
   897  	MOVQ	-8(SI), AX
   898  	MOVQ	-8(DI), CX
   899  	CMPQ	AX, CX
   900  	JEQ	allsame
   901  
   902  	// AX and CX contain parts of a and b that differ.
   903  diff8:
   904  	BSWAPQ	AX	// reverse order of bytes
   905  	BSWAPQ	CX
   906  	XORQ	AX, CX
   907  	BSRQ	CX, CX	// index of highest bit difference
   908  	SHRQ	CX, AX	// move a's bit to bottom
   909  	ANDQ	$1, AX	// mask bit
   910  	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
   911  	RET
   912  
   913  	// 0-7 bytes in common
   914  small:
   915  	LEAQ	(R8*8), CX	// bytes left -> bits left
   916  	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
   917  	JEQ	allsame
   918  
   919  	// load bytes of a into high bytes of AX
   920  	CMPB	SI, $0xf8
   921  	JA	si_high
   922  	MOVQ	(SI), SI
   923  	JMP	si_finish
   924  si_high:
   925  	ADDQ	R8, SI
   926  	MOVQ	-8(SI), SI
   927  	SHRQ	CX, SI
   928  si_finish:
   929  	SHLQ	CX, SI
   930  
   931  	// load bytes of b in to high bytes of BX
   932  	CMPB	DI, $0xf8
   933  	JA	di_high
   934  	MOVQ	(DI), DI
   935  	JMP	di_finish
   936  di_high:
   937  	ADDQ	R8, DI
   938  	MOVQ	-8(DI), DI
   939  	SHRQ	CX, DI
   940  di_finish:
   941  	SHLQ	CX, DI
   942  
   943  	BSWAPQ	SI	// reverse order of bytes
   944  	BSWAPQ	DI
   945  	XORQ	SI, DI	// find bit differences
   946  	JEQ	allsame
   947  	BSRQ	DI, CX	// index of highest bit difference
   948  	SHRQ	CX, SI	// move a's bit to bottom
   949  	ANDQ	$1, SI	// mask bit
   950  	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
   951  	RET
   952  
   953  allsame:
   954  	XORQ	AX, AX
   955  	XORQ	CX, CX
   956  	CMPQ	BX, DX
   957  	SETGT	AX	// 1 if alen > blen
   958  	SETEQ	CX	// 1 if alen == blen
   959  	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
   960  	RET
   961  
   962  TEXT bytes·IndexByte(SB),NOSPLIT,$0
   963  	MOVL s+0(FP), SI
   964  	MOVL s_len+4(FP), BX
   965  	MOVB c+12(FP), AL
   966  	CALL runtime·indexbytebody(SB)
   967  	MOVL AX, ret+16(FP)
   968  	RET
   969  
   970  TEXT strings·IndexByte(SB),NOSPLIT,$0
   971  	MOVL s+0(FP), SI
   972  	MOVL s_len+4(FP), BX
   973  	MOVB c+8(FP), AL
   974  	CALL runtime·indexbytebody(SB)
   975  	MOVL AX, ret+16(FP)
   976  	RET
   977  
   978  // input:
   979  //   SI: data
   980  //   BX: data len
   981  //   AL: byte sought
   982  // output:
   983  //   AX
   984  TEXT runtime·indexbytebody(SB),NOSPLIT,$0
   985  	MOVL SI, DI
   986  
   987  	CMPL BX, $16
   988  	JLT small
   989  
   990  	// round up to first 16-byte boundary
   991  	TESTL $15, SI
   992  	JZ aligned
   993  	MOVL SI, CX
   994  	ANDL $~15, CX
   995  	ADDL $16, CX
   996  
   997  	// search the beginning
   998  	SUBL SI, CX
   999  	REPN; SCASB
  1000  	JZ success
  1001  
  1002  // DI is 16-byte aligned; get ready to search using SSE instructions
  1003  aligned:
  1004  	// round down to last 16-byte boundary
  1005  	MOVL BX, R11
  1006  	ADDL SI, R11
  1007  	ANDL $~15, R11
  1008  
  1009  	// shuffle X0 around so that each byte contains c
  1010  	MOVD AX, X0
  1011  	PUNPCKLBW X0, X0
  1012  	PUNPCKLBW X0, X0
  1013  	PSHUFL $0, X0, X0
  1014  	JMP condition
  1015  
  1016  sse:
  1017  	// move the next 16-byte chunk of the buffer into X1
  1018  	MOVO (DI), X1
  1019  	// compare bytes in X0 to X1
  1020  	PCMPEQB X0, X1
  1021  	// take the top bit of each byte in X1 and put the result in DX
  1022  	PMOVMSKB X1, DX
  1023  	TESTL DX, DX
  1024  	JNZ ssesuccess
  1025  	ADDL $16, DI
  1026  
  1027  condition:
  1028  	CMPL DI, R11
  1029  	JLT sse
  1030  
  1031  	// search the end
  1032  	MOVL SI, CX
  1033  	ADDL BX, CX
  1034  	SUBL R11, CX
  1035  	// if CX == 0, the zero flag will be set and we'll end up
  1036  	// returning a false success
  1037  	JZ failure
  1038  	REPN; SCASB
  1039  	JZ success
  1040  
  1041  failure:
  1042  	MOVL $-1, AX
  1043  	RET
  1044  
  1045  // handle for lengths < 16
  1046  small:
  1047  	MOVL BX, CX
  1048  	REPN; SCASB
  1049  	JZ success
  1050  	MOVL $-1, AX
  1051  	RET
  1052  
  1053  // we've found the chunk containing the byte
  1054  // now just figure out which specific byte it is
  1055  ssesuccess:
  1056  	// get the index of the least significant set bit
  1057  	BSFW DX, DX
  1058  	SUBL SI, DI
  1059  	ADDL DI, DX
  1060  	MOVL DX, AX
  1061  	RET
  1062  
  1063  success:
  1064  	SUBL SI, DI
  1065  	SUBL $1, DI
  1066  	MOVL DI, AX
  1067  	RET
  1068  
  1069  TEXT bytes·Equal(SB),NOSPLIT,$0-25
  1070  	MOVL	a_len+4(FP), BX
  1071  	MOVL	b_len+16(FP), CX
  1072  	XORL	AX, AX
  1073  	CMPL	BX, CX
  1074  	JNE	eqret
  1075  	MOVL	a+0(FP), SI
  1076  	MOVL	b+12(FP), DI
  1077  	CALL	runtime·memeqbody(SB)
  1078  eqret:
  1079  	MOVB	AX, ret+24(FP)
  1080  	RET
  1081  
  1082  TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
  1083  	get_tls(CX)
  1084  	MOVL	g(CX), AX
  1085  	MOVL	g_m(AX), AX
  1086  	MOVL	m_fastrand(AX), DX
  1087  	ADDL	DX, DX
  1088  	MOVL	DX, BX
  1089  	XORL	$0x88888eef, DX
  1090  	CMOVLMI	BX, DX
  1091  	MOVL	DX, m_fastrand(AX)
  1092  	MOVL	DX, ret+0(FP)
  1093  	RET
  1094  
  1095  TEXT runtime·return0(SB), NOSPLIT, $0
  1096  	MOVL	$0, AX
  1097  	RET
  1098  
  1099  // The top-most function running on a goroutine
  1100  // returns to goexit+PCQuantum.
  1101  TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1102  	BYTE	$0x90	// NOP
  1103  	CALL	runtime·goexit1(SB)	// does not return
  1104  
  1105  TEXT runtime·getg(SB),NOSPLIT,$0-4
  1106  	get_tls(CX)
  1107  	MOVL	g(CX), AX
  1108  	MOVL	AX, ret+0(FP)
  1109  	RET
  1110  
  1111  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
  1112  	MOVL	addr+0(FP), AX
  1113  	PREFETCHT0	(AX)
  1114  	RET
  1115  
  1116  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
  1117  	MOVL	addr+0(FP), AX
  1118  	PREFETCHT1	(AX)
  1119  	RET
  1120  
  1121  
  1122  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
  1123  	MOVL	addr+0(FP), AX
  1124  	PREFETCHT2	(AX)
  1125  	RET
  1126  
  1127  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
  1128  	MOVL	addr+0(FP), AX
  1129  	PREFETCHNTA	(AX)
  1130  	RET