github.com/fenixara/go@v0.0.0-20170127160404-96ea0918e670/src/runtime/asm_386.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// copy arguments forward on an even stack
    12  	MOVL	argc+0(FP), AX
    13  	MOVL	argv+4(FP), BX
    14  	SUBL	$128, SP		// plenty of scratch
    15  	ANDL	$~15, SP
    16  	MOVL	AX, 120(SP)		// save argc, argv away
    17  	MOVL	BX, 124(SP)
    18  
    19  	// set default stack bounds.
    20  	// _cgo_init may update stackguard.
    21  	MOVL	$runtime·g0(SB), BP
    22  	LEAL	(-64*1024+104)(SP), BX
    23  	MOVL	BX, g_stackguard0(BP)
    24  	MOVL	BX, g_stackguard1(BP)
    25  	MOVL	BX, (g_stack+stack_lo)(BP)
    26  	MOVL	SP, (g_stack+stack_hi)(BP)
    27  	
    28  	// find out information about the processor we're on
    29  #ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL
    30  	JMP 	has_cpuid
    31  #else
    32  	// first see if CPUID instruction is supported.
    33  	PUSHFL
    34  	PUSHFL
    35  	XORL	$(1<<21), 0(SP) // flip ID bit
    36  	POPFL
    37  	PUSHFL
    38  	POPL	AX
    39  	XORL	0(SP), AX
    40  	POPFL	// restore EFLAGS
    41  	TESTL	$(1<<21), AX
    42  	JNE 	has_cpuid
    43  #endif
    44  
    45  bad_proc: // show that the program requires MMX.
    46  	MOVL	$2, 0(SP)
    47  	MOVL	$bad_proc_msg<>(SB), 4(SP)
    48  	MOVL	$0x3d, 8(SP)
    49  	CALL	runtime·write(SB)
    50  	MOVL	$1, 0(SP)
    51  	CALL	runtime·exit(SB)
    52  	INT	$3
    53  
    54  has_cpuid:
    55  	MOVL	$0, AX
    56  	CPUID
    57  	MOVL	AX, SI
    58  	CMPL	AX, $0
    59  	JE	nocpuinfo
    60  
    61  	// Figure out how to serialize RDTSC.
    62  	// On Intel processors LFENCE is enough. AMD requires MFENCE.
    63  	// Don't know about the rest, so let's do MFENCE.
    64  	CMPL	BX, $0x756E6547  // "Genu"
    65  	JNE	notintel
    66  	CMPL	DX, $0x49656E69  // "ineI"
    67  	JNE	notintel
    68  	CMPL	CX, $0x6C65746E  // "ntel"
    69  	JNE	notintel
    70  	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
    71  notintel:
    72  
    73  	// Load EAX=1 cpuid flags
    74  	MOVL	$1, AX
    75  	CPUID
    76  	MOVL	CX, AX // Move to global variable clobbers CX when generating PIC
    77  	MOVL	AX, runtime·cpuid_ecx(SB)
    78  	MOVL	DX, runtime·cpuid_edx(SB)
    79  
    80  	// Check for MMX support
    81  	TESTL	$(1<<23), DX	// MMX
    82  	JZ 	bad_proc
    83  
    84  	// Load EAX=7/ECX=0 cpuid flags
    85  	CMPL	SI, $7
    86  	JLT	nocpuinfo
    87  	MOVL	$7, AX
    88  	MOVL	$0, CX
    89  	CPUID
    90  	MOVL	BX, runtime·cpuid_ebx7(SB)
    91  
    92  nocpuinfo:	
    93  
    94  	// if there is an _cgo_init, call it to let it
    95  	// initialize and to set up GS.  if not,
    96  	// we set up GS ourselves.
    97  	MOVL	_cgo_init(SB), AX
    98  	TESTL	AX, AX
    99  	JZ	needtls
   100  	MOVL	$setg_gcc<>(SB), BX
   101  	MOVL	BX, 4(SP)
   102  	MOVL	BP, 0(SP)
   103  	CALL	AX
   104  
   105  	// update stackguard after _cgo_init
   106  	MOVL	$runtime·g0(SB), CX
   107  	MOVL	(g_stack+stack_lo)(CX), AX
   108  	ADDL	$const__StackGuard, AX
   109  	MOVL	AX, g_stackguard0(CX)
   110  	MOVL	AX, g_stackguard1(CX)
   111  
   112  #ifndef GOOS_windows
   113  	// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
   114  	JMP ok
   115  #endif
   116  needtls:
   117  #ifdef GOOS_plan9
   118  	// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
   119  	JMP	ok
   120  #endif
   121  
   122  	// set up %gs
   123  	CALL	runtime·ldt0setup(SB)
   124  
   125  	// store through it, to make sure it works
   126  	get_tls(BX)
   127  	MOVL	$0x123, g(BX)
   128  	MOVL	runtime·m0+m_tls(SB), AX
   129  	CMPL	AX, $0x123
   130  	JEQ	ok
   131  	MOVL	AX, 0	// abort
   132  ok:
   133  	// set up m and g "registers"
   134  	get_tls(BX)
   135  	LEAL	runtime·g0(SB), DX
   136  	MOVL	DX, g(BX)
   137  	LEAL	runtime·m0(SB), AX
   138  
   139  	// save m->g0 = g0
   140  	MOVL	DX, m_g0(AX)
   141  	// save g0->m = m0
   142  	MOVL	AX, g_m(DX)
   143  
   144  	CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
   145  
   146  	// convention is D is always cleared
   147  	CLD
   148  
   149  	CALL	runtime·check(SB)
   150  
   151  	// saved argc, argv
   152  	MOVL	120(SP), AX
   153  	MOVL	AX, 0(SP)
   154  	MOVL	124(SP), AX
   155  	MOVL	AX, 4(SP)
   156  	CALL	runtime·args(SB)
   157  	CALL	runtime·osinit(SB)
   158  	CALL	runtime·schedinit(SB)
   159  
   160  	// create a new goroutine to start program
   161  	PUSHL	$runtime·mainPC(SB)	// entry
   162  	PUSHL	$0	// arg size
   163  	CALL	runtime·newproc(SB)
   164  	POPL	AX
   165  	POPL	AX
   166  
   167  	// start this M
   168  	CALL	runtime·mstart(SB)
   169  
   170  	INT $3
   171  	RET
   172  
   173  DATA	bad_proc_msg<>+0x00(SB)/8, $"This pro"
   174  DATA	bad_proc_msg<>+0x08(SB)/8, $"gram can"
   175  DATA	bad_proc_msg<>+0x10(SB)/8, $" only be"
   176  DATA	bad_proc_msg<>+0x18(SB)/8, $" run on "
   177  DATA	bad_proc_msg<>+0x20(SB)/8, $"processo"
   178  DATA	bad_proc_msg<>+0x28(SB)/8, $"rs with "
   179  DATA	bad_proc_msg<>+0x30(SB)/8, $"MMX supp"
   180  DATA	bad_proc_msg<>+0x38(SB)/4, $"ort."
   181  DATA	bad_proc_msg<>+0x3c(SB)/1, $0xa
   182  GLOBL	bad_proc_msg<>(SB), RODATA, $0x3d
   183  
   184  DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
   185  GLOBL	runtime·mainPC(SB),RODATA,$4
   186  
   187  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   188  	INT $3
   189  	RET
   190  
   191  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   192  	// Linux and MinGW start the FPU in extended double precision.
   193  	// Other operating systems use double precision.
   194  	// Change to double precision to match them,
   195  	// and to match other hardware that only has double.
   196  	FLDCW	runtime·controlWord64(SB)
   197  	RET
   198  
   199  /*
   200   *  go-routine
   201   */
   202  
   203  // void gosave(Gobuf*)
   204  // save state in Gobuf; setjmp
   205  TEXT runtime·gosave(SB), NOSPLIT, $0-4
   206  	MOVL	buf+0(FP), AX		// gobuf
   207  	LEAL	buf+0(FP), BX		// caller's SP
   208  	MOVL	BX, gobuf_sp(AX)
   209  	MOVL	0(SP), BX		// caller's PC
   210  	MOVL	BX, gobuf_pc(AX)
   211  	MOVL	$0, gobuf_ret(AX)
   212  	// Assert ctxt is zero. See func save.
   213  	MOVL	gobuf_ctxt(AX), BX
   214  	TESTL	BX, BX
   215  	JZ	2(PC)
   216  	CALL	runtime·badctxt(SB)
   217  	get_tls(CX)
   218  	MOVL	g(CX), BX
   219  	MOVL	BX, gobuf_g(AX)
   220  	RET
   221  
   222  // void gogo(Gobuf*)
   223  // restore state from Gobuf; longjmp
   224  TEXT runtime·gogo(SB), NOSPLIT, $8-4
   225  	MOVL	buf+0(FP), BX		// gobuf
   226  
   227  	// If ctxt is not nil, invoke deletion barrier before overwriting.
   228  	MOVL	gobuf_ctxt(BX), DX
   229  	TESTL	DX, DX
   230  	JZ	nilctxt
   231  	LEAL	gobuf_ctxt(BX), AX
   232  	MOVL	AX, 0(SP)
   233  	MOVL	$0, 4(SP)
   234  	CALL	runtime·writebarrierptr_prewrite(SB)
   235  	MOVL	buf+0(FP), BX
   236  
   237  nilctxt:
   238  	MOVL	gobuf_g(BX), DX
   239  	MOVL	0(DX), CX		// make sure g != nil
   240  	get_tls(CX)
   241  	MOVL	DX, g(CX)
   242  	MOVL	gobuf_sp(BX), SP	// restore SP
   243  	MOVL	gobuf_ret(BX), AX
   244  	MOVL	gobuf_ctxt(BX), DX
   245  	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   246  	MOVL	$0, gobuf_ret(BX)
   247  	MOVL	$0, gobuf_ctxt(BX)
   248  	MOVL	gobuf_pc(BX), BX
   249  	JMP	BX
   250  
   251  // func mcall(fn func(*g))
   252  // Switch to m->g0's stack, call fn(g).
   253  // Fn must never return. It should gogo(&g->sched)
   254  // to keep running g.
   255  TEXT runtime·mcall(SB), NOSPLIT, $0-4
   256  	MOVL	fn+0(FP), DI
   257  
   258  	get_tls(DX)
   259  	MOVL	g(DX), AX	// save state in g->sched
   260  	MOVL	0(SP), BX	// caller's PC
   261  	MOVL	BX, (g_sched+gobuf_pc)(AX)
   262  	LEAL	fn+0(FP), BX	// caller's SP
   263  	MOVL	BX, (g_sched+gobuf_sp)(AX)
   264  	MOVL	AX, (g_sched+gobuf_g)(AX)
   265  
   266  	// switch to m->g0 & its stack, call fn
   267  	MOVL	g(DX), BX
   268  	MOVL	g_m(BX), BX
   269  	MOVL	m_g0(BX), SI
   270  	CMPL	SI, AX	// if g == m->g0 call badmcall
   271  	JNE	3(PC)
   272  	MOVL	$runtime·badmcall(SB), AX
   273  	JMP	AX
   274  	MOVL	SI, g(DX)	// g = m->g0
   275  	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   276  	PUSHL	AX
   277  	MOVL	DI, DX
   278  	MOVL	0(DI), DI
   279  	CALL	DI
   280  	POPL	AX
   281  	MOVL	$runtime·badmcall2(SB), AX
   282  	JMP	AX
   283  	RET
   284  
   285  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   286  // of the G stack. We need to distinguish the routine that
   287  // lives at the bottom of the G stack from the one that lives
   288  // at the top of the system stack because the one at the top of
   289  // the system stack terminates the stack walk (see topofstack()).
   290  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   291  	RET
   292  
   293  // func systemstack(fn func())
   294  TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   295  	MOVL	fn+0(FP), DI	// DI = fn
   296  	get_tls(CX)
   297  	MOVL	g(CX), AX	// AX = g
   298  	MOVL	g_m(AX), BX	// BX = m
   299  
   300  	MOVL	m_gsignal(BX), DX	// DX = gsignal
   301  	CMPL	AX, DX
   302  	JEQ	noswitch
   303  
   304  	MOVL	m_g0(BX), DX	// DX = g0
   305  	CMPL	AX, DX
   306  	JEQ	noswitch
   307  
   308  	MOVL	m_curg(BX), BP
   309  	CMPL	AX, BP
   310  	JEQ	switch
   311  	
   312  	// Bad: g is not gsignal, not g0, not curg. What is it?
   313  	// Hide call from linker nosplit analysis.
   314  	MOVL	$runtime·badsystemstack(SB), AX
   315  	CALL	AX
   316  
   317  switch:
   318  	// save our state in g->sched. Pretend to
   319  	// be systemstack_switch if the G stack is scanned.
   320  	MOVL	$runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
   321  	MOVL	SP, (g_sched+gobuf_sp)(AX)
   322  	MOVL	AX, (g_sched+gobuf_g)(AX)
   323  
   324  	// switch to g0
   325  	get_tls(CX)
   326  	MOVL	DX, g(CX)
   327  	MOVL	(g_sched+gobuf_sp)(DX), BX
   328  	// make it look like mstart called systemstack on g0, to stop traceback
   329  	SUBL	$4, BX
   330  	MOVL	$runtime·mstart(SB), DX
   331  	MOVL	DX, 0(BX)
   332  	MOVL	BX, SP
   333  
   334  	// call target function
   335  	MOVL	DI, DX
   336  	MOVL	0(DI), DI
   337  	CALL	DI
   338  
   339  	// switch back to g
   340  	get_tls(CX)
   341  	MOVL	g(CX), AX
   342  	MOVL	g_m(AX), BX
   343  	MOVL	m_curg(BX), AX
   344  	MOVL	AX, g(CX)
   345  	MOVL	(g_sched+gobuf_sp)(AX), SP
   346  	MOVL	$0, (g_sched+gobuf_sp)(AX)
   347  	RET
   348  
   349  noswitch:
   350  	// already on system stack, just call directly
   351  	MOVL	DI, DX
   352  	MOVL	0(DI), DI
   353  	CALL	DI
   354  	RET
   355  
   356  /*
   357   * support for morestack
   358   */
   359  
   360  // Called during function prolog when more stack is needed.
   361  //
   362  // The traceback routines see morestack on a g0 as being
   363  // the top of a stack (for example, morestack calling newstack
   364  // calling the scheduler calling newm calling gc), so we must
   365  // record an argument size. For that purpose, it has no arguments.
   366  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   367  	// Cannot grow scheduler stack (m->g0).
   368  	get_tls(CX)
   369  	MOVL	g(CX), BX
   370  	MOVL	g_m(BX), BX
   371  	MOVL	m_g0(BX), SI
   372  	CMPL	g(CX), SI
   373  	JNE	3(PC)
   374  	CALL	runtime·badmorestackg0(SB)
   375  	INT	$3
   376  
   377  	// Cannot grow signal stack.
   378  	MOVL	m_gsignal(BX), SI
   379  	CMPL	g(CX), SI
   380  	JNE	3(PC)
   381  	CALL	runtime·badmorestackgsignal(SB)
   382  	INT	$3
   383  
   384  	// Called from f.
   385  	// Set m->morebuf to f's caller.
   386  	MOVL	4(SP), DI	// f's caller's PC
   387  	MOVL	DI, (m_morebuf+gobuf_pc)(BX)
   388  	LEAL	8(SP), CX	// f's caller's SP
   389  	MOVL	CX, (m_morebuf+gobuf_sp)(BX)
   390  	get_tls(CX)
   391  	MOVL	g(CX), SI
   392  	MOVL	SI, (m_morebuf+gobuf_g)(BX)
   393  
   394  	// Set g->sched to context in f.
   395  	MOVL	0(SP), AX	// f's PC
   396  	MOVL	AX, (g_sched+gobuf_pc)(SI)
   397  	MOVL	SI, (g_sched+gobuf_g)(SI)
   398  	LEAL	4(SP), AX	// f's SP
   399  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   400  	// newstack will fill gobuf.ctxt.
   401  
   402  	// Call newstack on m->g0's stack.
   403  	MOVL	m_g0(BX), BP
   404  	MOVL	BP, g(CX)
   405  	MOVL	(g_sched+gobuf_sp)(BP), AX
   406  	MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
   407  	MOVL	AX, SP
   408  	PUSHL	DX	// ctxt argument
   409  	CALL	runtime·newstack(SB)
   410  	MOVL	$0, 0x1003	// crash if newstack returns
   411  	POPL	DX	// keep balance check happy
   412  	RET
   413  
   414  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
   415  	MOVL	$0, DX
   416  	JMP runtime·morestack(SB)
   417  
   418  TEXT runtime·stackBarrier(SB),NOSPLIT,$0
   419  	// We came here via a RET to an overwritten return PC.
   420  	// AX may be live. Other registers are available.
   421  
   422  	// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
   423  	get_tls(CX)
   424  	MOVL	g(CX), CX
   425  	MOVL	(g_stkbar+slice_array)(CX), DX
   426  	MOVL	g_stkbarPos(CX), BX
   427  	IMULL	$stkbar__size, BX	// Too big for SIB.
   428  	MOVL	stkbar_savedLRVal(DX)(BX*1), BX
   429  	// Record that this stack barrier was hit.
   430  	ADDL	$1, g_stkbarPos(CX)
   431  	// Jump to the original return PC.
   432  	JMP	BX
   433  
   434  // reflectcall: call a function with the given argument list
   435  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   436  // we don't have variable-sized frames, so we use a small number
   437  // of constant-sized-frame functions to encode a few bits of size in the pc.
   438  // Caution: ugly multiline assembly macros in your future!
   439  
   440  #define DISPATCH(NAME,MAXSIZE)		\
   441  	CMPL	CX, $MAXSIZE;		\
   442  	JA	3(PC);			\
   443  	MOVL	$NAME(SB), AX;		\
   444  	JMP	AX
   445  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   446  
   447  TEXT reflect·call(SB), NOSPLIT, $0-0
   448  	JMP	·reflectcall(SB)
   449  
   450  TEXT ·reflectcall(SB), NOSPLIT, $0-20
   451  	MOVL	argsize+12(FP), CX
   452  	DISPATCH(runtime·call16, 16)
   453  	DISPATCH(runtime·call32, 32)
   454  	DISPATCH(runtime·call64, 64)
   455  	DISPATCH(runtime·call128, 128)
   456  	DISPATCH(runtime·call256, 256)
   457  	DISPATCH(runtime·call512, 512)
   458  	DISPATCH(runtime·call1024, 1024)
   459  	DISPATCH(runtime·call2048, 2048)
   460  	DISPATCH(runtime·call4096, 4096)
   461  	DISPATCH(runtime·call8192, 8192)
   462  	DISPATCH(runtime·call16384, 16384)
   463  	DISPATCH(runtime·call32768, 32768)
   464  	DISPATCH(runtime·call65536, 65536)
   465  	DISPATCH(runtime·call131072, 131072)
   466  	DISPATCH(runtime·call262144, 262144)
   467  	DISPATCH(runtime·call524288, 524288)
   468  	DISPATCH(runtime·call1048576, 1048576)
   469  	DISPATCH(runtime·call2097152, 2097152)
   470  	DISPATCH(runtime·call4194304, 4194304)
   471  	DISPATCH(runtime·call8388608, 8388608)
   472  	DISPATCH(runtime·call16777216, 16777216)
   473  	DISPATCH(runtime·call33554432, 33554432)
   474  	DISPATCH(runtime·call67108864, 67108864)
   475  	DISPATCH(runtime·call134217728, 134217728)
   476  	DISPATCH(runtime·call268435456, 268435456)
   477  	DISPATCH(runtime·call536870912, 536870912)
   478  	DISPATCH(runtime·call1073741824, 1073741824)
   479  	MOVL	$runtime·badreflectcall(SB), AX
   480  	JMP	AX
   481  
   482  #define CALLFN(NAME,MAXSIZE)			\
   483  TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   484  	NO_LOCAL_POINTERS;			\
   485  	/* copy arguments to stack */		\
   486  	MOVL	argptr+8(FP), SI;		\
   487  	MOVL	argsize+12(FP), CX;		\
   488  	MOVL	SP, DI;				\
   489  	REP;MOVSB;				\
   490  	/* call function */			\
   491  	MOVL	f+4(FP), DX;			\
   492  	MOVL	(DX), AX; 			\
   493  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   494  	CALL	AX;				\
   495  	/* copy return values back */		\
   496  	MOVL	argtype+0(FP), DX;		\
   497  	MOVL	argptr+8(FP), DI;		\
   498  	MOVL	argsize+12(FP), CX;		\
   499  	MOVL	retoffset+16(FP), BX;		\
   500  	MOVL	SP, SI;				\
   501  	ADDL	BX, DI;				\
   502  	ADDL	BX, SI;				\
   503  	SUBL	BX, CX;				\
   504  	CALL	callRet<>(SB);			\
   505  	RET
   506  
   507  // callRet copies return values back at the end of call*. This is a
   508  // separate function so it can allocate stack space for the arguments
   509  // to reflectcallmove. It does not follow the Go ABI; it expects its
   510  // arguments in registers.
   511  TEXT callRet<>(SB), NOSPLIT, $16-0
   512  	MOVL	DX, 0(SP)
   513  	MOVL	DI, 4(SP)
   514  	MOVL	SI, 8(SP)
   515  	MOVL	CX, 12(SP)
   516  	CALL	runtime·reflectcallmove(SB)
   517  	RET
   518  
   519  CALLFN(·call16, 16)
   520  CALLFN(·call32, 32)
   521  CALLFN(·call64, 64)
   522  CALLFN(·call128, 128)
   523  CALLFN(·call256, 256)
   524  CALLFN(·call512, 512)
   525  CALLFN(·call1024, 1024)
   526  CALLFN(·call2048, 2048)
   527  CALLFN(·call4096, 4096)
   528  CALLFN(·call8192, 8192)
   529  CALLFN(·call16384, 16384)
   530  CALLFN(·call32768, 32768)
   531  CALLFN(·call65536, 65536)
   532  CALLFN(·call131072, 131072)
   533  CALLFN(·call262144, 262144)
   534  CALLFN(·call524288, 524288)
   535  CALLFN(·call1048576, 1048576)
   536  CALLFN(·call2097152, 2097152)
   537  CALLFN(·call4194304, 4194304)
   538  CALLFN(·call8388608, 8388608)
   539  CALLFN(·call16777216, 16777216)
   540  CALLFN(·call33554432, 33554432)
   541  CALLFN(·call67108864, 67108864)
   542  CALLFN(·call134217728, 134217728)
   543  CALLFN(·call268435456, 268435456)
   544  CALLFN(·call536870912, 536870912)
   545  CALLFN(·call1073741824, 1073741824)
   546  
   547  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   548  	MOVL	cycles+0(FP), AX
   549  again:
   550  	PAUSE
   551  	SUBL	$1, AX
   552  	JNZ	again
   553  	RET
   554  
   555  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   556  	// Stores are already ordered on x86, so this is just a
   557  	// compile barrier.
   558  	RET
   559  
   560  // void jmpdefer(fn, sp);
   561  // called from deferreturn.
   562  // 1. pop the caller
   563  // 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers
   564  //    return (when building for shared libraries, subtract 16 bytes -- 5 bytes
   565  //    for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the
   566  //    LEAL to load the offset into BX, and finally 5 for the call & displacement)
   567  // 3. jmp to the argument
   568  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   569  	MOVL	fv+0(FP), DX	// fn
   570  	MOVL	argp+4(FP), BX	// caller sp
   571  	LEAL	-4(BX), SP	// caller sp after CALL
   572  #ifdef GOBUILDMODE_shared
   573  	SUBL	$16, (SP)	// return to CALL again
   574  #else
   575  	SUBL	$5, (SP)	// return to CALL again
   576  #endif
   577  	MOVL	0(DX), BX
   578  	JMP	BX	// but first run the deferred function
   579  
   580  // Save state of caller into g->sched.
   581  TEXT gosave<>(SB),NOSPLIT,$0
   582  	PUSHL	AX
   583  	PUSHL	BX
   584  	get_tls(BX)
   585  	MOVL	g(BX), BX
   586  	LEAL	arg+0(FP), AX
   587  	MOVL	AX, (g_sched+gobuf_sp)(BX)
   588  	MOVL	-4(AX), AX
   589  	MOVL	AX, (g_sched+gobuf_pc)(BX)
   590  	MOVL	$0, (g_sched+gobuf_ret)(BX)
   591  	// Assert ctxt is zero. See func save.
   592  	MOVL	(g_sched+gobuf_ctxt)(BX), AX
   593  	TESTL	AX, AX
   594  	JZ	2(PC)
   595  	CALL	runtime·badctxt(SB)
   596  	POPL	BX
   597  	POPL	AX
   598  	RET
   599  
   600  // func asmcgocall(fn, arg unsafe.Pointer) int32
   601  // Call fn(arg) on the scheduler stack,
   602  // aligned appropriately for the gcc ABI.
   603  // See cgocall.go for more details.
   604  TEXT ·asmcgocall(SB),NOSPLIT,$0-12
   605  	MOVL	fn+0(FP), AX
   606  	MOVL	arg+4(FP), BX
   607  
   608  	MOVL	SP, DX
   609  
   610  	// Figure out if we need to switch to m->g0 stack.
   611  	// We get called to create new OS threads too, and those
   612  	// come in on the m->g0 stack already.
   613  	get_tls(CX)
   614  	MOVL	g(CX), BP
   615  	MOVL	g_m(BP), BP
   616  	MOVL	m_g0(BP), SI
   617  	MOVL	g(CX), DI
   618  	CMPL	SI, DI
   619  	JEQ	noswitch
   620  	CALL	gosave<>(SB)
   621  	get_tls(CX)
   622  	MOVL	SI, g(CX)
   623  	MOVL	(g_sched+gobuf_sp)(SI), SP
   624  
   625  noswitch:
   626  	// Now on a scheduling stack (a pthread-created stack).
   627  	SUBL	$32, SP
   628  	ANDL	$~15, SP	// alignment, perhaps unnecessary
   629  	MOVL	DI, 8(SP)	// save g
   630  	MOVL	(g_stack+stack_hi)(DI), DI
   631  	SUBL	DX, DI
   632  	MOVL	DI, 4(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   633  	MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   634  	CALL	AX
   635  
   636  	// Restore registers, g, stack pointer.
   637  	get_tls(CX)
   638  	MOVL	8(SP), DI
   639  	MOVL	(g_stack+stack_hi)(DI), SI
   640  	SUBL	4(SP), SI
   641  	MOVL	DI, g(CX)
   642  	MOVL	SI, SP
   643  
   644  	MOVL	AX, ret+8(FP)
   645  	RET
   646  
   647  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   648  // Turn the fn into a Go func (by taking its address) and call
   649  // cgocallback_gofunc.
   650  TEXT runtime·cgocallback(SB),NOSPLIT,$16-16
   651  	LEAL	fn+0(FP), AX
   652  	MOVL	AX, 0(SP)
   653  	MOVL	frame+4(FP), AX
   654  	MOVL	AX, 4(SP)
   655  	MOVL	framesize+8(FP), AX
   656  	MOVL	AX, 8(SP)
   657  	MOVL	ctxt+12(FP), AX
   658  	MOVL	AX, 12(SP)
   659  	MOVL	$runtime·cgocallback_gofunc(SB), AX
   660  	CALL	AX
   661  	RET
   662  
   663  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   664  // See cgocall.go for more details.
   665  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16
   666  	NO_LOCAL_POINTERS
   667  
   668  	// If g is nil, Go did not create the current thread.
   669  	// Call needm to obtain one for temporary use.
   670  	// In this case, we're running on the thread stack, so there's
   671  	// lots of space, but the linker doesn't know. Hide the call from
   672  	// the linker analysis by using an indirect call through AX.
   673  	get_tls(CX)
   674  #ifdef GOOS_windows
   675  	MOVL	$0, BP
   676  	CMPL	CX, $0
   677  	JEQ	2(PC) // TODO
   678  #endif
   679  	MOVL	g(CX), BP
   680  	CMPL	BP, $0
   681  	JEQ	needm
   682  	MOVL	g_m(BP), BP
   683  	MOVL	BP, DX // saved copy of oldm
   684  	JMP	havem
   685  needm:
   686  	MOVL	$0, 0(SP)
   687  	MOVL	$runtime·needm(SB), AX
   688  	CALL	AX
   689  	MOVL	0(SP), DX
   690  	get_tls(CX)
   691  	MOVL	g(CX), BP
   692  	MOVL	g_m(BP), BP
   693  
   694  	// Set m->sched.sp = SP, so that if a panic happens
   695  	// during the function we are about to execute, it will
   696  	// have a valid SP to run on the g0 stack.
   697  	// The next few lines (after the havem label)
   698  	// will save this SP onto the stack and then write
   699  	// the same SP back to m->sched.sp. That seems redundant,
   700  	// but if an unrecovered panic happens, unwindm will
   701  	// restore the g->sched.sp from the stack location
   702  	// and then systemstack will try to use it. If we don't set it here,
   703  	// that restored SP will be uninitialized (typically 0) and
   704  	// will not be usable.
   705  	MOVL	m_g0(BP), SI
   706  	MOVL	SP, (g_sched+gobuf_sp)(SI)
   707  
   708  havem:
   709  	// Now there's a valid m, and we're running on its m->g0.
   710  	// Save current m->g0->sched.sp on stack and then set it to SP.
   711  	// Save current sp in m->g0->sched.sp in preparation for
   712  	// switch back to m->curg stack.
   713  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   714  	MOVL	m_g0(BP), SI
   715  	MOVL	(g_sched+gobuf_sp)(SI), AX
   716  	MOVL	AX, 0(SP)
   717  	MOVL	SP, (g_sched+gobuf_sp)(SI)
   718  
   719  	// Switch to m->curg stack and call runtime.cgocallbackg.
   720  	// Because we are taking over the execution of m->curg
   721  	// but *not* resuming what had been running, we need to
   722  	// save that information (m->curg->sched) so we can restore it.
   723  	// We can restore m->curg->sched.sp easily, because calling
   724  	// runtime.cgocallbackg leaves SP unchanged upon return.
   725  	// To save m->curg->sched.pc, we push it onto the stack.
   726  	// This has the added benefit that it looks to the traceback
   727  	// routine like cgocallbackg is going to return to that
   728  	// PC (because the frame we allocate below has the same
   729  	// size as cgocallback_gofunc's frame declared above)
   730  	// so that the traceback will seamlessly trace back into
   731  	// the earlier calls.
   732  	//
   733  	// In the new goroutine, 4(SP) holds the saved oldm (DX) register.
   734  	// 8(SP) is unused.
   735  	MOVL	m_curg(BP), SI
   736  	MOVL	SI, g(CX)
   737  	MOVL	(g_sched+gobuf_sp)(SI), DI // prepare stack as DI
   738  	MOVL	(g_sched+gobuf_pc)(SI), BP
   739  	MOVL	BP, -4(DI)
   740  	MOVL	ctxt+12(FP), CX
   741  	LEAL	-(4+12)(DI), SP
   742  	MOVL	DX, 4(SP)
   743  	MOVL	CX, 0(SP)
   744  	CALL	runtime·cgocallbackg(SB)
   745  	MOVL	4(SP), DX
   746  
   747  	// Restore g->sched (== m->curg->sched) from saved values.
   748  	get_tls(CX)
   749  	MOVL	g(CX), SI
   750  	MOVL	12(SP), BP
   751  	MOVL	BP, (g_sched+gobuf_pc)(SI)
   752  	LEAL	(12+4)(SP), DI
   753  	MOVL	DI, (g_sched+gobuf_sp)(SI)
   754  
   755  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   756  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   757  	// so we do not have to restore it.)
   758  	MOVL	g(CX), BP
   759  	MOVL	g_m(BP), BP
   760  	MOVL	m_g0(BP), SI
   761  	MOVL	SI, g(CX)
   762  	MOVL	(g_sched+gobuf_sp)(SI), SP
   763  	MOVL	0(SP), AX
   764  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   765  	
   766  	// If the m on entry was nil, we called needm above to borrow an m
   767  	// for the duration of the call. Since the call is over, return it with dropm.
   768  	CMPL	DX, $0
   769  	JNE 3(PC)
   770  	MOVL	$runtime·dropm(SB), AX
   771  	CALL	AX
   772  
   773  	// Done!
   774  	RET
   775  
   776  // void setg(G*); set g. for use by needm.
   777  TEXT runtime·setg(SB), NOSPLIT, $0-4
   778  	MOVL	gg+0(FP), BX
   779  #ifdef GOOS_windows
   780  	CMPL	BX, $0
   781  	JNE	settls
   782  	MOVL	$0, 0x14(FS)
   783  	RET
   784  settls:
   785  	MOVL	g_m(BX), AX
   786  	LEAL	m_tls(AX), AX
   787  	MOVL	AX, 0x14(FS)
   788  #endif
   789  	get_tls(CX)
   790  	MOVL	BX, g(CX)
   791  	RET
   792  
   793  // void setg_gcc(G*); set g. for use by gcc
   794  TEXT setg_gcc<>(SB), NOSPLIT, $0
   795  	get_tls(AX)
   796  	MOVL	gg+0(FP), DX
   797  	MOVL	DX, g(AX)
   798  	RET
   799  
   800  // check that SP is in range [g->stack.lo, g->stack.hi)
   801  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   802  	get_tls(CX)
   803  	MOVL	g(CX), AX
   804  	CMPL	(g_stack+stack_hi)(AX), SP
   805  	JHI	2(PC)
   806  	INT	$3
   807  	CMPL	SP, (g_stack+stack_lo)(AX)
   808  	JHI	2(PC)
   809  	INT	$3
   810  	RET
   811  
   812  TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8
   813  	MOVL	argp+0(FP),AX		// addr of first arg
   814  	MOVL	-4(AX),AX		// get calling pc
   815  	CMPL	AX, runtime·stackBarrierPC(SB)
   816  	JNE	nobar
   817  	// Get original return PC.
   818  	CALL	runtime·nextBarrierPC(SB)
   819  	MOVL	0(SP), AX
   820  nobar:
   821  	MOVL	AX, ret+4(FP)
   822  	RET
   823  
   824  TEXT runtime·setcallerpc(SB),NOSPLIT,$4-8
   825  	MOVL	argp+0(FP),AX		// addr of first arg
   826  	MOVL	pc+4(FP), BX
   827  	MOVL	-4(AX), DX
   828  	CMPL	DX, runtime·stackBarrierPC(SB)
   829  	JEQ	setbar
   830  	MOVL	BX, -4(AX)		// set calling pc
   831  	RET
   832  setbar:
   833  	// Set the stack barrier return PC.
   834  	MOVL	BX, 0(SP)
   835  	CALL	runtime·setNextBarrierPC(SB)
   836  	RET
   837  
   838  // func cputicks() int64
   839  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   840  	TESTL	$0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence
   841  	JEQ	done
   842  	CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   843  	JNE	mfence
   844  	BYTE	$0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE
   845  	JMP	done
   846  mfence:
   847  	BYTE	$0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE
   848  done:
   849  	RDTSC
   850  	MOVL	AX, ret_lo+0(FP)
   851  	MOVL	DX, ret_hi+4(FP)
   852  	RET
   853  
   854  TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
   855  	// set up ldt 7 to point at m0.tls
   856  	// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
   857  	// the entry number is just a hint.  setldt will set up GS with what it used.
   858  	MOVL	$7, 0(SP)
   859  	LEAL	runtime·m0+m_tls(SB), AX
   860  	MOVL	AX, 4(SP)
   861  	MOVL	$32, 8(SP)	// sizeof(tls array)
   862  	CALL	runtime·setldt(SB)
   863  	RET
   864  
   865  TEXT runtime·emptyfunc(SB),0,$0-0
   866  	RET
   867  
   868  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   869  // redirects to memhash(p, h, size) using the size
   870  // stored in the closure.
   871  TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12
   872  	GO_ARGS
   873  	NO_LOCAL_POINTERS
   874  	MOVL	p+0(FP), AX
   875  	MOVL	h+4(FP), BX
   876  	MOVL	4(DX), CX
   877  	MOVL	AX, 0(SP)
   878  	MOVL	BX, 4(SP)
   879  	MOVL	CX, 8(SP)
   880  	CALL	runtime·memhash(SB)
   881  	MOVL	12(SP), AX
   882  	MOVL	AX, ret+8(FP)
   883  	RET
   884  
   885  // hash function using AES hardware instructions
   886  TEXT runtime·aeshash(SB),NOSPLIT,$0-16
   887  	MOVL	p+0(FP), AX	// ptr to data
   888  	MOVL	s+8(FP), BX	// size
   889  	LEAL	ret+12(FP), DX
   890  	JMP	runtime·aeshashbody(SB)
   891  
   892  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
   893  	MOVL	p+0(FP), AX	// ptr to string object
   894  	MOVL	4(AX), BX	// length of string
   895  	MOVL	(AX), AX	// string data
   896  	LEAL	ret+8(FP), DX
   897  	JMP	runtime·aeshashbody(SB)
   898  
   899  // AX: data
   900  // BX: length
   901  // DX: address to put return value
   902  TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
   903  	MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
   904  	PINSRW	$4, BX, X0	            // 16 bits of length
   905  	PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
   906  	MOVO	X0, X1                      // save unscrambled seed
   907  	PXOR	runtime·aeskeysched(SB), X0 // xor in per-process seed
   908  	AESENC	X0, X0                      // scramble seed
   909  
   910  	CMPL	BX, $16
   911  	JB	aes0to15
   912  	JE	aes16
   913  	CMPL	BX, $32
   914  	JBE	aes17to32
   915  	CMPL	BX, $64
   916  	JBE	aes33to64
   917  	JMP	aes65plus
   918  	
   919  aes0to15:
   920  	TESTL	BX, BX
   921  	JE	aes0
   922  
   923  	ADDL	$16, AX
   924  	TESTW	$0xff0, AX
   925  	JE	endofpage
   926  
   927  	// 16 bytes loaded at this address won't cross
   928  	// a page boundary, so we can load it directly.
   929  	MOVOU	-16(AX), X1
   930  	ADDL	BX, BX
   931  	PAND	masks<>(SB)(BX*8), X1
   932  
   933  final1:	
   934  	AESENC	X0, X1  // scramble input, xor in seed
   935  	AESENC	X1, X1  // scramble combo 2 times
   936  	AESENC	X1, X1
   937  	MOVL	X1, (DX)
   938  	RET
   939  
   940  endofpage:
   941  	// address ends in 1111xxxx. Might be up against
   942  	// a page boundary, so load ending at last byte.
   943  	// Then shift bytes down using pshufb.
   944  	MOVOU	-32(AX)(BX*1), X1
   945  	ADDL	BX, BX
   946  	PSHUFB	shifts<>(SB)(BX*8), X1
   947  	JMP	final1
   948  
   949  aes0:
   950  	// Return scrambled input seed
   951  	AESENC	X0, X0
   952  	MOVL	X0, (DX)
   953  	RET
   954  
   955  aes16:
   956  	MOVOU	(AX), X1
   957  	JMP	final1
   958  
   959  aes17to32:
   960  	// make second starting seed
   961  	PXOR	runtime·aeskeysched+16(SB), X1
   962  	AESENC	X1, X1
   963  	
   964  	// load data to be hashed
   965  	MOVOU	(AX), X2
   966  	MOVOU	-16(AX)(BX*1), X3
   967  
   968  	// scramble 3 times
   969  	AESENC	X0, X2
   970  	AESENC	X1, X3
   971  	AESENC	X2, X2
   972  	AESENC	X3, X3
   973  	AESENC	X2, X2
   974  	AESENC	X3, X3
   975  
   976  	// combine results
   977  	PXOR	X3, X2
   978  	MOVL	X2, (DX)
   979  	RET
   980  
   981  aes33to64:
   982  	// make 3 more starting seeds
   983  	MOVO	X1, X2
   984  	MOVO	X1, X3
   985  	PXOR	runtime·aeskeysched+16(SB), X1
   986  	PXOR	runtime·aeskeysched+32(SB), X2
   987  	PXOR	runtime·aeskeysched+48(SB), X3
   988  	AESENC	X1, X1
   989  	AESENC	X2, X2
   990  	AESENC	X3, X3
   991  	
   992  	MOVOU	(AX), X4
   993  	MOVOU	16(AX), X5
   994  	MOVOU	-32(AX)(BX*1), X6
   995  	MOVOU	-16(AX)(BX*1), X7
   996  	
   997  	AESENC	X0, X4
   998  	AESENC	X1, X5
   999  	AESENC	X2, X6
  1000  	AESENC	X3, X7
  1001  	
  1002  	AESENC	X4, X4
  1003  	AESENC	X5, X5
  1004  	AESENC	X6, X6
  1005  	AESENC	X7, X7
  1006  	
  1007  	AESENC	X4, X4
  1008  	AESENC	X5, X5
  1009  	AESENC	X6, X6
  1010  	AESENC	X7, X7
  1011  
  1012  	PXOR	X6, X4
  1013  	PXOR	X7, X5
  1014  	PXOR	X5, X4
  1015  	MOVL	X4, (DX)
  1016  	RET
  1017  
  1018  aes65plus:
  1019  	// make 3 more starting seeds
  1020  	MOVO	X1, X2
  1021  	MOVO	X1, X3
  1022  	PXOR	runtime·aeskeysched+16(SB), X1
  1023  	PXOR	runtime·aeskeysched+32(SB), X2
  1024  	PXOR	runtime·aeskeysched+48(SB), X3
  1025  	AESENC	X1, X1
  1026  	AESENC	X2, X2
  1027  	AESENC	X3, X3
  1028  	
  1029  	// start with last (possibly overlapping) block
  1030  	MOVOU	-64(AX)(BX*1), X4
  1031  	MOVOU	-48(AX)(BX*1), X5
  1032  	MOVOU	-32(AX)(BX*1), X6
  1033  	MOVOU	-16(AX)(BX*1), X7
  1034  
  1035  	// scramble state once
  1036  	AESENC	X0, X4
  1037  	AESENC	X1, X5
  1038  	AESENC	X2, X6
  1039  	AESENC	X3, X7
  1040  
  1041  	// compute number of remaining 64-byte blocks
  1042  	DECL	BX
  1043  	SHRL	$6, BX
  1044  	
  1045  aesloop:
  1046  	// scramble state, xor in a block
  1047  	MOVOU	(AX), X0
  1048  	MOVOU	16(AX), X1
  1049  	MOVOU	32(AX), X2
  1050  	MOVOU	48(AX), X3
  1051  	AESENC	X0, X4
  1052  	AESENC	X1, X5
  1053  	AESENC	X2, X6
  1054  	AESENC	X3, X7
  1055  
  1056  	// scramble state
  1057  	AESENC	X4, X4
  1058  	AESENC	X5, X5
  1059  	AESENC	X6, X6
  1060  	AESENC	X7, X7
  1061  
  1062  	ADDL	$64, AX
  1063  	DECL	BX
  1064  	JNE	aesloop
  1065  
  1066  	// 2 more scrambles to finish
  1067  	AESENC	X4, X4
  1068  	AESENC	X5, X5
  1069  	AESENC	X6, X6
  1070  	AESENC	X7, X7
  1071  	
  1072  	AESENC	X4, X4
  1073  	AESENC	X5, X5
  1074  	AESENC	X6, X6
  1075  	AESENC	X7, X7
  1076  
  1077  	PXOR	X6, X4
  1078  	PXOR	X7, X5
  1079  	PXOR	X5, X4
  1080  	MOVL	X4, (DX)
  1081  	RET
  1082  
  1083  TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
  1084  	MOVL	p+0(FP), AX	// ptr to data
  1085  	MOVL	h+4(FP), X0	// seed
  1086  	PINSRD	$1, (AX), X0	// data
  1087  	AESENC	runtime·aeskeysched+0(SB), X0
  1088  	AESENC	runtime·aeskeysched+16(SB), X0
  1089  	AESENC	runtime·aeskeysched+32(SB), X0
  1090  	MOVL	X0, ret+8(FP)
  1091  	RET
  1092  
  1093  TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
  1094  	MOVL	p+0(FP), AX	// ptr to data
  1095  	MOVQ	(AX), X0	// data
  1096  	PINSRD	$2, h+4(FP), X0	// seed
  1097  	AESENC	runtime·aeskeysched+0(SB), X0
  1098  	AESENC	runtime·aeskeysched+16(SB), X0
  1099  	AESENC	runtime·aeskeysched+32(SB), X0
  1100  	MOVL	X0, ret+8(FP)
  1101  	RET
  1102  
  1103  // simple mask to get rid of data in the high part of the register.
  1104  DATA masks<>+0x00(SB)/4, $0x00000000
  1105  DATA masks<>+0x04(SB)/4, $0x00000000
  1106  DATA masks<>+0x08(SB)/4, $0x00000000
  1107  DATA masks<>+0x0c(SB)/4, $0x00000000
  1108  	
  1109  DATA masks<>+0x10(SB)/4, $0x000000ff
  1110  DATA masks<>+0x14(SB)/4, $0x00000000
  1111  DATA masks<>+0x18(SB)/4, $0x00000000
  1112  DATA masks<>+0x1c(SB)/4, $0x00000000
  1113  	
  1114  DATA masks<>+0x20(SB)/4, $0x0000ffff
  1115  DATA masks<>+0x24(SB)/4, $0x00000000
  1116  DATA masks<>+0x28(SB)/4, $0x00000000
  1117  DATA masks<>+0x2c(SB)/4, $0x00000000
  1118  	
  1119  DATA masks<>+0x30(SB)/4, $0x00ffffff
  1120  DATA masks<>+0x34(SB)/4, $0x00000000
  1121  DATA masks<>+0x38(SB)/4, $0x00000000
  1122  DATA masks<>+0x3c(SB)/4, $0x00000000
  1123  	
  1124  DATA masks<>+0x40(SB)/4, $0xffffffff
  1125  DATA masks<>+0x44(SB)/4, $0x00000000
  1126  DATA masks<>+0x48(SB)/4, $0x00000000
  1127  DATA masks<>+0x4c(SB)/4, $0x00000000
  1128  	
  1129  DATA masks<>+0x50(SB)/4, $0xffffffff
  1130  DATA masks<>+0x54(SB)/4, $0x000000ff
  1131  DATA masks<>+0x58(SB)/4, $0x00000000
  1132  DATA masks<>+0x5c(SB)/4, $0x00000000
  1133  	
  1134  DATA masks<>+0x60(SB)/4, $0xffffffff
  1135  DATA masks<>+0x64(SB)/4, $0x0000ffff
  1136  DATA masks<>+0x68(SB)/4, $0x00000000
  1137  DATA masks<>+0x6c(SB)/4, $0x00000000
  1138  	
  1139  DATA masks<>+0x70(SB)/4, $0xffffffff
  1140  DATA masks<>+0x74(SB)/4, $0x00ffffff
  1141  DATA masks<>+0x78(SB)/4, $0x00000000
  1142  DATA masks<>+0x7c(SB)/4, $0x00000000
  1143  	
  1144  DATA masks<>+0x80(SB)/4, $0xffffffff
  1145  DATA masks<>+0x84(SB)/4, $0xffffffff
  1146  DATA masks<>+0x88(SB)/4, $0x00000000
  1147  DATA masks<>+0x8c(SB)/4, $0x00000000
  1148  	
  1149  DATA masks<>+0x90(SB)/4, $0xffffffff
  1150  DATA masks<>+0x94(SB)/4, $0xffffffff
  1151  DATA masks<>+0x98(SB)/4, $0x000000ff
  1152  DATA masks<>+0x9c(SB)/4, $0x00000000
  1153  	
  1154  DATA masks<>+0xa0(SB)/4, $0xffffffff
  1155  DATA masks<>+0xa4(SB)/4, $0xffffffff
  1156  DATA masks<>+0xa8(SB)/4, $0x0000ffff
  1157  DATA masks<>+0xac(SB)/4, $0x00000000
  1158  	
  1159  DATA masks<>+0xb0(SB)/4, $0xffffffff
  1160  DATA masks<>+0xb4(SB)/4, $0xffffffff
  1161  DATA masks<>+0xb8(SB)/4, $0x00ffffff
  1162  DATA masks<>+0xbc(SB)/4, $0x00000000
  1163  	
  1164  DATA masks<>+0xc0(SB)/4, $0xffffffff
  1165  DATA masks<>+0xc4(SB)/4, $0xffffffff
  1166  DATA masks<>+0xc8(SB)/4, $0xffffffff
  1167  DATA masks<>+0xcc(SB)/4, $0x00000000
  1168  	
  1169  DATA masks<>+0xd0(SB)/4, $0xffffffff
  1170  DATA masks<>+0xd4(SB)/4, $0xffffffff
  1171  DATA masks<>+0xd8(SB)/4, $0xffffffff
  1172  DATA masks<>+0xdc(SB)/4, $0x000000ff
  1173  	
  1174  DATA masks<>+0xe0(SB)/4, $0xffffffff
  1175  DATA masks<>+0xe4(SB)/4, $0xffffffff
  1176  DATA masks<>+0xe8(SB)/4, $0xffffffff
  1177  DATA masks<>+0xec(SB)/4, $0x0000ffff
  1178  	
  1179  DATA masks<>+0xf0(SB)/4, $0xffffffff
  1180  DATA masks<>+0xf4(SB)/4, $0xffffffff
  1181  DATA masks<>+0xf8(SB)/4, $0xffffffff
  1182  DATA masks<>+0xfc(SB)/4, $0x00ffffff
  1183  
  1184  GLOBL masks<>(SB),RODATA,$256
  1185  
  1186  // these are arguments to pshufb. They move data down from
  1187  // the high bytes of the register to the low bytes of the register.
  1188  // index is how many bytes to move.
  1189  DATA shifts<>+0x00(SB)/4, $0x00000000
  1190  DATA shifts<>+0x04(SB)/4, $0x00000000
  1191  DATA shifts<>+0x08(SB)/4, $0x00000000
  1192  DATA shifts<>+0x0c(SB)/4, $0x00000000
  1193  	
  1194  DATA shifts<>+0x10(SB)/4, $0xffffff0f
  1195  DATA shifts<>+0x14(SB)/4, $0xffffffff
  1196  DATA shifts<>+0x18(SB)/4, $0xffffffff
  1197  DATA shifts<>+0x1c(SB)/4, $0xffffffff
  1198  	
  1199  DATA shifts<>+0x20(SB)/4, $0xffff0f0e
  1200  DATA shifts<>+0x24(SB)/4, $0xffffffff
  1201  DATA shifts<>+0x28(SB)/4, $0xffffffff
  1202  DATA shifts<>+0x2c(SB)/4, $0xffffffff
  1203  	
  1204  DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
  1205  DATA shifts<>+0x34(SB)/4, $0xffffffff
  1206  DATA shifts<>+0x38(SB)/4, $0xffffffff
  1207  DATA shifts<>+0x3c(SB)/4, $0xffffffff
  1208  	
  1209  DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
  1210  DATA shifts<>+0x44(SB)/4, $0xffffffff
  1211  DATA shifts<>+0x48(SB)/4, $0xffffffff
  1212  DATA shifts<>+0x4c(SB)/4, $0xffffffff
  1213  	
  1214  DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
  1215  DATA shifts<>+0x54(SB)/4, $0xffffff0f
  1216  DATA shifts<>+0x58(SB)/4, $0xffffffff
  1217  DATA shifts<>+0x5c(SB)/4, $0xffffffff
  1218  	
  1219  DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
  1220  DATA shifts<>+0x64(SB)/4, $0xffff0f0e
  1221  DATA shifts<>+0x68(SB)/4, $0xffffffff
  1222  DATA shifts<>+0x6c(SB)/4, $0xffffffff
  1223  	
  1224  DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
  1225  DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
  1226  DATA shifts<>+0x78(SB)/4, $0xffffffff
  1227  DATA shifts<>+0x7c(SB)/4, $0xffffffff
  1228  	
  1229  DATA shifts<>+0x80(SB)/4, $0x0b0a0908
  1230  DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
  1231  DATA shifts<>+0x88(SB)/4, $0xffffffff
  1232  DATA shifts<>+0x8c(SB)/4, $0xffffffff
  1233  	
  1234  DATA shifts<>+0x90(SB)/4, $0x0a090807
  1235  DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
  1236  DATA shifts<>+0x98(SB)/4, $0xffffff0f
  1237  DATA shifts<>+0x9c(SB)/4, $0xffffffff
  1238  	
  1239  DATA shifts<>+0xa0(SB)/4, $0x09080706
  1240  DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
  1241  DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
  1242  DATA shifts<>+0xac(SB)/4, $0xffffffff
  1243  	
  1244  DATA shifts<>+0xb0(SB)/4, $0x08070605
  1245  DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
  1246  DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
  1247  DATA shifts<>+0xbc(SB)/4, $0xffffffff
  1248  	
  1249  DATA shifts<>+0xc0(SB)/4, $0x07060504
  1250  DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
  1251  DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
  1252  DATA shifts<>+0xcc(SB)/4, $0xffffffff
  1253  	
  1254  DATA shifts<>+0xd0(SB)/4, $0x06050403
  1255  DATA shifts<>+0xd4(SB)/4, $0x0a090807
  1256  DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
  1257  DATA shifts<>+0xdc(SB)/4, $0xffffff0f
  1258  	
  1259  DATA shifts<>+0xe0(SB)/4, $0x05040302
  1260  DATA shifts<>+0xe4(SB)/4, $0x09080706
  1261  DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
  1262  DATA shifts<>+0xec(SB)/4, $0xffff0f0e
  1263  	
  1264  DATA shifts<>+0xf0(SB)/4, $0x04030201
  1265  DATA shifts<>+0xf4(SB)/4, $0x08070605
  1266  DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
  1267  DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
  1268  
  1269  GLOBL shifts<>(SB),RODATA,$256
  1270  
  1271  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1272  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1273  	MOVL	$masks<>(SB), AX
  1274  	MOVL	$shifts<>(SB), BX
  1275  	ORL	BX, AX
  1276  	TESTL	$15, AX
  1277  	SETEQ	ret+0(FP)
  1278  	RET
  1279  
  1280  // memequal(p, q unsafe.Pointer, size uintptr) bool
  1281  TEXT runtime·memequal(SB),NOSPLIT,$0-13
  1282  	MOVL	a+0(FP), SI
  1283  	MOVL	b+4(FP), DI
  1284  	CMPL	SI, DI
  1285  	JEQ	eq
  1286  	MOVL	size+8(FP), BX
  1287  	LEAL	ret+12(FP), AX
  1288  	JMP	runtime·memeqbody(SB)
  1289  eq:
  1290  	MOVB    $1, ret+12(FP)
  1291  	RET
  1292  
  1293  // memequal_varlen(a, b unsafe.Pointer) bool
  1294  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
  1295  	MOVL    a+0(FP), SI
  1296  	MOVL    b+4(FP), DI
  1297  	CMPL    SI, DI
  1298  	JEQ     eq
  1299  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
  1300  	LEAL	ret+8(FP), AX
  1301  	JMP	runtime·memeqbody(SB)
  1302  eq:
  1303  	MOVB    $1, ret+8(FP)
  1304  	RET
  1305  
  1306  // eqstring tests whether two strings are equal.
  1307  // The compiler guarantees that strings passed
  1308  // to eqstring have equal length.
  1309  // See runtime_test.go:eqstring_generic for
  1310  // equivalent Go code.
  1311  TEXT runtime·eqstring(SB),NOSPLIT,$0-17
  1312  	MOVL	s1_base+0(FP), SI
  1313  	MOVL	s2_base+8(FP), DI
  1314  	CMPL	SI, DI
  1315  	JEQ	same
  1316  	MOVL	s1_len+4(FP), BX
  1317  	LEAL	ret+16(FP), AX
  1318  	JMP	runtime·memeqbody(SB)
  1319  same:
  1320  	MOVB	$1, ret+16(FP)
  1321  	RET
  1322  
  1323  TEXT bytes·Equal(SB),NOSPLIT,$0-25
  1324  	MOVL	a_len+4(FP), BX
  1325  	MOVL	b_len+16(FP), CX
  1326  	CMPL	BX, CX
  1327  	JNE	eqret
  1328  	MOVL	a+0(FP), SI
  1329  	MOVL	b+12(FP), DI
  1330  	LEAL	ret+24(FP), AX
  1331  	JMP	runtime·memeqbody(SB)
  1332  eqret:
  1333  	MOVB	$0, ret+24(FP)
  1334  	RET
  1335  
  1336  // a in SI
  1337  // b in DI
  1338  // count in BX
  1339  // address of result byte in AX
  1340  TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
  1341  	CMPL	BX, $4
  1342  	JB	small
  1343  
  1344  	// 64 bytes at a time using xmm registers
  1345  hugeloop:
  1346  	CMPL	BX, $64
  1347  	JB	bigloop
  1348  	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
  1349  	JE	bigloop
  1350  	MOVOU	(SI), X0
  1351  	MOVOU	(DI), X1
  1352  	MOVOU	16(SI), X2
  1353  	MOVOU	16(DI), X3
  1354  	MOVOU	32(SI), X4
  1355  	MOVOU	32(DI), X5
  1356  	MOVOU	48(SI), X6
  1357  	MOVOU	48(DI), X7
  1358  	PCMPEQB	X1, X0
  1359  	PCMPEQB	X3, X2
  1360  	PCMPEQB	X5, X4
  1361  	PCMPEQB	X7, X6
  1362  	PAND	X2, X0
  1363  	PAND	X6, X4
  1364  	PAND	X4, X0
  1365  	PMOVMSKB X0, DX
  1366  	ADDL	$64, SI
  1367  	ADDL	$64, DI
  1368  	SUBL	$64, BX
  1369  	CMPL	DX, $0xffff
  1370  	JEQ	hugeloop
  1371  	MOVB	$0, (AX)
  1372  	RET
  1373  
  1374  	// 4 bytes at a time using 32-bit register
  1375  bigloop:
  1376  	CMPL	BX, $4
  1377  	JBE	leftover
  1378  	MOVL	(SI), CX
  1379  	MOVL	(DI), DX
  1380  	ADDL	$4, SI
  1381  	ADDL	$4, DI
  1382  	SUBL	$4, BX
  1383  	CMPL	CX, DX
  1384  	JEQ	bigloop
  1385  	MOVB	$0, (AX)
  1386  	RET
  1387  
  1388  	// remaining 0-4 bytes
  1389  leftover:
  1390  	MOVL	-4(SI)(BX*1), CX
  1391  	MOVL	-4(DI)(BX*1), DX
  1392  	CMPL	CX, DX
  1393  	SETEQ	(AX)
  1394  	RET
  1395  
  1396  small:
  1397  	CMPL	BX, $0
  1398  	JEQ	equal
  1399  
  1400  	LEAL	0(BX*8), CX
  1401  	NEGL	CX
  1402  
  1403  	MOVL	SI, DX
  1404  	CMPB	DX, $0xfc
  1405  	JA	si_high
  1406  
  1407  	// load at SI won't cross a page boundary.
  1408  	MOVL	(SI), SI
  1409  	JMP	si_finish
  1410  si_high:
  1411  	// address ends in 111111xx. Load up to bytes we want, move to correct position.
  1412  	MOVL	-4(SI)(BX*1), SI
  1413  	SHRL	CX, SI
  1414  si_finish:
  1415  
  1416  	// same for DI.
  1417  	MOVL	DI, DX
  1418  	CMPB	DX, $0xfc
  1419  	JA	di_high
  1420  	MOVL	(DI), DI
  1421  	JMP	di_finish
  1422  di_high:
  1423  	MOVL	-4(DI)(BX*1), DI
  1424  	SHRL	CX, DI
  1425  di_finish:
  1426  
  1427  	SUBL	SI, DI
  1428  	SHLL	CX, DI
  1429  equal:
  1430  	SETEQ	(AX)
  1431  	RET
  1432  
  1433  TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
  1434  	MOVL	s1_base+0(FP), SI
  1435  	MOVL	s1_len+4(FP), BX
  1436  	MOVL	s2_base+8(FP), DI
  1437  	MOVL	s2_len+12(FP), DX
  1438  	LEAL	ret+16(FP), AX
  1439  	JMP	runtime·cmpbody(SB)
  1440  
  1441  TEXT bytes·Compare(SB),NOSPLIT,$0-28
  1442  	MOVL	s1+0(FP), SI
  1443  	MOVL	s1+4(FP), BX
  1444  	MOVL	s2+12(FP), DI
  1445  	MOVL	s2+16(FP), DX
  1446  	LEAL	ret+24(FP), AX
  1447  	JMP	runtime·cmpbody(SB)
  1448  
  1449  TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
  1450  	MOVL	s+0(FP), SI
  1451  	MOVL	s_len+4(FP), CX
  1452  	MOVB	c+12(FP), AL
  1453  	MOVL	SI, DI
  1454  	CLD; REPN; SCASB
  1455  	JZ 3(PC)
  1456  	MOVL	$-1, ret+16(FP)
  1457  	RET
  1458  	SUBL	SI, DI
  1459  	SUBL	$1, DI
  1460  	MOVL	DI, ret+16(FP)
  1461  	RET
  1462  
  1463  TEXT strings·IndexByte(SB),NOSPLIT,$0-16
  1464  	MOVL	s+0(FP), SI
  1465  	MOVL	s_len+4(FP), CX
  1466  	MOVB	c+8(FP), AL
  1467  	MOVL	SI, DI
  1468  	CLD; REPN; SCASB
  1469  	JZ 3(PC)
  1470  	MOVL	$-1, ret+12(FP)
  1471  	RET
  1472  	SUBL	SI, DI
  1473  	SUBL	$1, DI
  1474  	MOVL	DI, ret+12(FP)
  1475  	RET
  1476  
  1477  // input:
  1478  //   SI = a
  1479  //   DI = b
  1480  //   BX = alen
  1481  //   DX = blen
  1482  //   AX = address of return word (set to 1/0/-1)
  1483  TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
  1484  	MOVL	DX, BP
  1485  	SUBL	BX, DX // DX = blen-alen
  1486  	JLE	2(PC)
  1487  	MOVL	BX, BP // BP = min(alen, blen)
  1488  	CMPL	SI, DI
  1489  	JEQ	allsame
  1490  	CMPL	BP, $4
  1491  	JB	small
  1492  	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
  1493  	JE	mediumloop
  1494  largeloop:
  1495  	CMPL	BP, $16
  1496  	JB	mediumloop
  1497  	MOVOU	(SI), X0
  1498  	MOVOU	(DI), X1
  1499  	PCMPEQB X0, X1
  1500  	PMOVMSKB X1, BX
  1501  	XORL	$0xffff, BX	// convert EQ to NE
  1502  	JNE	diff16	// branch if at least one byte is not equal
  1503  	ADDL	$16, SI
  1504  	ADDL	$16, DI
  1505  	SUBL	$16, BP
  1506  	JMP	largeloop
  1507  
  1508  diff16:
  1509  	BSFL	BX, BX	// index of first byte that differs
  1510  	XORL	DX, DX
  1511  	MOVB	(SI)(BX*1), CX
  1512  	CMPB	CX, (DI)(BX*1)
  1513  	SETHI	DX
  1514  	LEAL	-1(DX*2), DX	// convert 1/0 to +1/-1
  1515  	MOVL	DX, (AX)
  1516  	RET
  1517  
  1518  mediumloop:
  1519  	CMPL	BP, $4
  1520  	JBE	_0through4
  1521  	MOVL	(SI), BX
  1522  	MOVL	(DI), CX
  1523  	CMPL	BX, CX
  1524  	JNE	diff4
  1525  	ADDL	$4, SI
  1526  	ADDL	$4, DI
  1527  	SUBL	$4, BP
  1528  	JMP	mediumloop
  1529  
  1530  _0through4:
  1531  	MOVL	-4(SI)(BP*1), BX
  1532  	MOVL	-4(DI)(BP*1), CX
  1533  	CMPL	BX, CX
  1534  	JEQ	allsame
  1535  
  1536  diff4:
  1537  	BSWAPL	BX	// reverse order of bytes
  1538  	BSWAPL	CX
  1539  	XORL	BX, CX	// find bit differences
  1540  	BSRL	CX, CX	// index of highest bit difference
  1541  	SHRL	CX, BX	// move a's bit to bottom
  1542  	ANDL	$1, BX	// mask bit
  1543  	LEAL	-1(BX*2), BX // 1/0 => +1/-1
  1544  	MOVL	BX, (AX)
  1545  	RET
  1546  
  1547  	// 0-3 bytes in common
  1548  small:
  1549  	LEAL	(BP*8), CX
  1550  	NEGL	CX
  1551  	JEQ	allsame
  1552  
  1553  	// load si
  1554  	CMPB	SI, $0xfc
  1555  	JA	si_high
  1556  	MOVL	(SI), SI
  1557  	JMP	si_finish
  1558  si_high:
  1559  	MOVL	-4(SI)(BP*1), SI
  1560  	SHRL	CX, SI
  1561  si_finish:
  1562  	SHLL	CX, SI
  1563  
  1564  	// same for di
  1565  	CMPB	DI, $0xfc
  1566  	JA	di_high
  1567  	MOVL	(DI), DI
  1568  	JMP	di_finish
  1569  di_high:
  1570  	MOVL	-4(DI)(BP*1), DI
  1571  	SHRL	CX, DI
  1572  di_finish:
  1573  	SHLL	CX, DI
  1574  
  1575  	BSWAPL	SI	// reverse order of bytes
  1576  	BSWAPL	DI
  1577  	XORL	SI, DI	// find bit differences
  1578  	JEQ	allsame
  1579  	BSRL	DI, CX	// index of highest bit difference
  1580  	SHRL	CX, SI	// move a's bit to bottom
  1581  	ANDL	$1, SI	// mask bit
  1582  	LEAL	-1(SI*2), BX // 1/0 => +1/-1
  1583  	MOVL	BX, (AX)
  1584  	RET
  1585  
  1586  	// all the bytes in common are the same, so we just need
  1587  	// to compare the lengths.
  1588  allsame:
  1589  	XORL	BX, BX
  1590  	XORL	CX, CX
  1591  	TESTL	DX, DX
  1592  	SETLT	BX	// 1 if alen > blen
  1593  	SETEQ	CX	// 1 if alen == blen
  1594  	LEAL	-1(CX)(BX*2), BX	// 1,0,-1 result
  1595  	MOVL	BX, (AX)
  1596  	RET
  1597  
  1598  TEXT runtime·fastrand(SB), NOSPLIT, $0-4
  1599  	get_tls(CX)
  1600  	MOVL	g(CX), AX
  1601  	MOVL	g_m(AX), AX
  1602  	MOVL	m_fastrand(AX), DX
  1603  	ADDL	DX, DX
  1604  	MOVL	DX, BX
  1605  	XORL	$0x88888eef, DX
  1606  	JPL	2(PC)
  1607  	MOVL	BX, DX
  1608  	MOVL	DX, m_fastrand(AX)
  1609  	MOVL	DX, ret+0(FP)
  1610  	RET
  1611  
  1612  TEXT runtime·return0(SB), NOSPLIT, $0
  1613  	MOVL	$0, AX
  1614  	RET
  1615  
  1616  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1617  // Must obey the gcc calling convention.
  1618  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1619  	get_tls(CX)
  1620  	MOVL	g(CX), AX
  1621  	MOVL	g_m(AX), AX
  1622  	MOVL	m_curg(AX), AX
  1623  	MOVL	(g_stack+stack_hi)(AX), AX
  1624  	RET
  1625  
  1626  // The top-most function running on a goroutine
  1627  // returns to goexit+PCQuantum.
  1628  TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1629  	BYTE	$0x90	// NOP
  1630  	CALL	runtime·goexit1(SB)	// does not return
  1631  	// traceback from goexit1 must hit code range of goexit
  1632  	BYTE	$0x90	// NOP
  1633  
  1634  // Prefetching doesn't seem to help.
  1635  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
  1636  	RET
  1637  
  1638  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
  1639  	RET
  1640  
  1641  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
  1642  	RET
  1643  
  1644  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
  1645  	RET
  1646  
  1647  // Add a module's moduledata to the linked list of moduledata objects. This
  1648  // is called from .init_array by a function generated in the linker and so
  1649  // follows the platform ABI wrt register preservation -- it only touches AX,
  1650  // CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
  1651  // instead the pointer to the moduledata is passed in AX.
  1652  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1653         MOVL    runtime·lastmoduledatap(SB), DX
  1654         MOVL    AX, moduledata_next(DX)
  1655         MOVL    AX, runtime·lastmoduledatap(SB)
  1656         RET
  1657  
  1658  TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
  1659  	MOVL	a+0(FP), AX
  1660  	MOVL	AX, 0(SP)
  1661  	MOVL	$0, 4(SP)
  1662  	FMOVV	0(SP), F0
  1663  	FMOVDP	F0, ret+4(FP)
  1664  	RET
  1665  
  1666  TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
  1667  	FMOVD	a+0(FP), F0
  1668  	FSTCW	0(SP)
  1669  	FLDCW	runtime·controlWord64trunc(SB)
  1670  	FMOVVP	F0, 4(SP)
  1671  	FLDCW	0(SP)
  1672  	MOVL	4(SP), AX
  1673  	MOVL	AX, ret+8(FP)
  1674  	RET