github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/runtime/asm_386.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  // _rt0_386 is common startup code for most 386 systems when using
    11  // internal linking. This is the entry point for the program from the
    12  // kernel for an ordinary -buildmode=exe program. The stack holds the
    13  // number of arguments and the C-style argv.
    14  TEXT _rt0_386(SB),NOSPLIT,$8
    15  	MOVL	8(SP), AX	// argc
    16  	LEAL	12(SP), BX	// argv
    17  	MOVL	AX, 0(SP)
    18  	MOVL	BX, 4(SP)
    19  	JMP	runtime·rt0_go(SB)
    20  
    21  // _rt0_386_lib is common startup code for most 386 systems when
    22  // using -buildmode=c-archive or -buildmode=c-shared. The linker will
    23  // arrange to invoke this function as a global constructor (for
    24  // c-archive) or when the shared library is loaded (for c-shared).
    25  // We expect argc and argv to be passed on the stack following the
    26  // usual C ABI.
    27  TEXT _rt0_386_lib(SB),NOSPLIT,$0
    28  	PUSHL	BP
    29  	MOVL	SP, BP
    30  	PUSHL	BX
    31  	PUSHL	SI
    32  	PUSHL	DI
    33  
    34  	MOVL	8(BP), AX
    35  	MOVL	AX, _rt0_386_lib_argc<>(SB)
    36  	MOVL	12(BP), AX
    37  	MOVL	AX, _rt0_386_lib_argv<>(SB)
    38  
    39  	// Synchronous initialization.
    40  	CALL	runtime·libpreinit(SB)
    41  
    42  	SUBL	$8, SP
    43  
    44  	// Create a new thread to do the runtime initialization.
    45  	MOVL	_cgo_sys_thread_create(SB), AX
    46  	TESTL	AX, AX
    47  	JZ	nocgo
    48  
    49  	// Align stack to call C function.
    50  	// We moved SP to BP above, but BP was clobbered by the libpreinit call.
    51  	MOVL	SP, BP
    52  	ANDL	$~15, SP
    53  
    54  	MOVL	$_rt0_386_lib_go(SB), BX
    55  	MOVL	BX, 0(SP)
    56  	MOVL	$0, 4(SP)
    57  
    58  	CALL	AX
    59  
    60  	MOVL	BP, SP
    61  
    62  	JMP	restore
    63  
    64  nocgo:
    65  	MOVL	$0x800000, 0(SP)                    // stacksize = 8192KB
    66  	MOVL	$_rt0_386_lib_go(SB), AX
    67  	MOVL	AX, 4(SP)                           // fn
    68  	CALL	runtime·newosproc0(SB)
    69  
    70  restore:
    71  	ADDL	$8, SP
    72  	POPL	DI
    73  	POPL	SI
    74  	POPL	BX
    75  	POPL	BP
    76  	RET
    77  
    78  // _rt0_386_lib_go initializes the Go runtime.
    79  // This is started in a separate thread by _rt0_386_lib.
    80  TEXT _rt0_386_lib_go(SB),NOSPLIT,$8
    81  	MOVL	_rt0_386_lib_argc<>(SB), AX
    82  	MOVL	AX, 0(SP)
    83  	MOVL	_rt0_386_lib_argv<>(SB), AX
    84  	MOVL	AX, 4(SP)
    85  	JMP	runtime·rt0_go(SB)
    86  
    87  DATA _rt0_386_lib_argc<>(SB)/4, $0
    88  GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
    89  DATA _rt0_386_lib_argv<>(SB)/4, $0
    90  GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
    91  
    92  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    93  	// Copy arguments forward on an even stack.
    94  	// Users of this function jump to it, they don't call it.
    95  	MOVL	0(SP), AX
    96  	MOVL	4(SP), BX
    97  	SUBL	$128, SP		// plenty of scratch
    98  	ANDL	$~15, SP
    99  	MOVL	AX, 120(SP)		// save argc, argv away
   100  	MOVL	BX, 124(SP)
   101  
   102  	// set default stack bounds.
   103  	// _cgo_init may update stackguard.
   104  	MOVL	$runtime·g0(SB), BP
   105  	LEAL	(-64*1024+104)(SP), BX
   106  	MOVL	BX, g_stackguard0(BP)
   107  	MOVL	BX, g_stackguard1(BP)
   108  	MOVL	BX, (g_stack+stack_lo)(BP)
   109  	MOVL	SP, (g_stack+stack_hi)(BP)
   110  
   111  	// find out information about the processor we're on
   112  #ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL
   113  	JMP 	has_cpuid
   114  #else
   115  	// first see if CPUID instruction is supported.
   116  	PUSHFL
   117  	PUSHFL
   118  	XORL	$(1<<21), 0(SP) // flip ID bit
   119  	POPFL
   120  	PUSHFL
   121  	POPL	AX
   122  	XORL	0(SP), AX
   123  	POPFL	// restore EFLAGS
   124  	TESTL	$(1<<21), AX
   125  	JNE 	has_cpuid
   126  #endif
   127  
   128  bad_proc: // show that the program requires MMX.
   129  	MOVL	$2, 0(SP)
   130  	MOVL	$bad_proc_msg<>(SB), 4(SP)
   131  	MOVL	$0x3d, 8(SP)
   132  	CALL	runtime·write(SB)
   133  	MOVL	$1, 0(SP)
   134  	CALL	runtime·exit(SB)
   135  	CALL	runtime·abort(SB)
   136  
   137  has_cpuid:
   138  	MOVL	$0, AX
   139  	CPUID
   140  	MOVL	AX, SI
   141  	CMPL	AX, $0
   142  	JE	nocpuinfo
   143  
   144  	// Figure out how to serialize RDTSC.
   145  	// On Intel processors LFENCE is enough. AMD requires MFENCE.
   146  	// Don't know about the rest, so let's do MFENCE.
   147  	CMPL	BX, $0x756E6547  // "Genu"
   148  	JNE	notintel
   149  	CMPL	DX, $0x49656E69  // "ineI"
   150  	JNE	notintel
   151  	CMPL	CX, $0x6C65746E  // "ntel"
   152  	JNE	notintel
   153  	MOVB	$1, runtime·isIntel(SB)
   154  	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
   155  notintel:
   156  
   157  	// Load EAX=1 cpuid flags
   158  	MOVL	$1, AX
   159  	CPUID
   160  	MOVL	CX, DI // Move to global variable clobbers CX when generating PIC
   161  	MOVL	AX, runtime·processorVersionInfo(SB)
   162  
   163  	// Check for MMX support
   164  	TESTL	$(1<<23), DX // MMX
   165  	JZ	bad_proc
   166  
   167  nocpuinfo:
   168  	// if there is an _cgo_init, call it to let it
   169  	// initialize and to set up GS.  if not,
   170  	// we set up GS ourselves.
   171  	MOVL	_cgo_init(SB), AX
   172  	TESTL	AX, AX
   173  	JZ	needtls
   174  	MOVL	$setg_gcc<>(SB), BX
   175  	MOVL	BX, 4(SP)
   176  	MOVL	BP, 0(SP)
   177  	CALL	AX
   178  
   179  	// update stackguard after _cgo_init
   180  	MOVL	$runtime·g0(SB), CX
   181  	MOVL	(g_stack+stack_lo)(CX), AX
   182  	ADDL	$const__StackGuard, AX
   183  	MOVL	AX, g_stackguard0(CX)
   184  	MOVL	AX, g_stackguard1(CX)
   185  
   186  #ifndef GOOS_windows
   187  	// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
   188  	JMP ok
   189  #endif
   190  needtls:
   191  #ifdef GOOS_plan9
   192  	// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
   193  	JMP	ok
   194  #endif
   195  #ifdef GOOS_darwin
   196  	// skip runtime·ldt0setup(SB) on Darwin
   197  	JMP	ok
   198  #endif
   199  
   200  	// set up %gs
   201  	CALL	runtime·ldt0setup(SB)
   202  
   203  	// store through it, to make sure it works
   204  	get_tls(BX)
   205  	MOVL	$0x123, g(BX)
   206  	MOVL	runtime·m0+m_tls(SB), AX
   207  	CMPL	AX, $0x123
   208  	JEQ	ok
   209  	MOVL	AX, 0	// abort
   210  ok:
   211  	// set up m and g "registers"
   212  	get_tls(BX)
   213  	LEAL	runtime·g0(SB), DX
   214  	MOVL	DX, g(BX)
   215  	LEAL	runtime·m0(SB), AX
   216  
   217  	// save m->g0 = g0
   218  	MOVL	DX, m_g0(AX)
   219  	// save g0->m = m0
   220  	MOVL	AX, g_m(DX)
   221  
   222  	CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
   223  
   224  	// convention is D is always cleared
   225  	CLD
   226  
   227  	CALL	runtime·check(SB)
   228  
   229  	// saved argc, argv
   230  	MOVL	120(SP), AX
   231  	MOVL	AX, 0(SP)
   232  	MOVL	124(SP), AX
   233  	MOVL	AX, 4(SP)
   234  	CALL	runtime·args(SB)
   235  	CALL	runtime·osinit(SB)
   236  	CALL	runtime·schedinit(SB)
   237  
   238  	// create a new goroutine to start program
   239  	PUSHL	$runtime·mainPC(SB)	// entry
   240  	PUSHL	$0	// arg size
   241  	CALL	runtime·newproc(SB)
   242  	POPL	AX
   243  	POPL	AX
   244  
   245  	// start this M
   246  	CALL	runtime·mstart(SB)
   247  
   248  	CALL	runtime·abort(SB)
   249  	RET
   250  
   251  DATA	bad_proc_msg<>+0x00(SB)/8, $"This pro"
   252  DATA	bad_proc_msg<>+0x08(SB)/8, $"gram can"
   253  DATA	bad_proc_msg<>+0x10(SB)/8, $" only be"
   254  DATA	bad_proc_msg<>+0x18(SB)/8, $" run on "
   255  DATA	bad_proc_msg<>+0x20(SB)/8, $"processo"
   256  DATA	bad_proc_msg<>+0x28(SB)/8, $"rs with "
   257  DATA	bad_proc_msg<>+0x30(SB)/8, $"MMX supp"
   258  DATA	bad_proc_msg<>+0x38(SB)/4, $"ort."
   259  DATA	bad_proc_msg<>+0x3c(SB)/1, $0xa
   260  GLOBL	bad_proc_msg<>(SB), RODATA, $0x3d
   261  
   262  DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
   263  GLOBL	runtime·mainPC(SB),RODATA,$4
   264  
   265  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   266  	INT $3
   267  	RET
   268  
   269  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   270  	// Linux and MinGW start the FPU in extended double precision.
   271  	// Other operating systems use double precision.
   272  	// Change to double precision to match them,
   273  	// and to match other hardware that only has double.
   274  	FLDCW	runtime·controlWord64(SB)
   275  	RET
   276  
   277  /*
   278   *  go-routine
   279   */
   280  
   281  // void gosave(Gobuf*)
   282  // save state in Gobuf; setjmp
   283  TEXT runtime·gosave(SB), NOSPLIT, $0-4
   284  	MOVL	buf+0(FP), AX		// gobuf
   285  	LEAL	buf+0(FP), BX		// caller's SP
   286  	MOVL	BX, gobuf_sp(AX)
   287  	MOVL	0(SP), BX		// caller's PC
   288  	MOVL	BX, gobuf_pc(AX)
   289  	MOVL	$0, gobuf_ret(AX)
   290  	// Assert ctxt is zero. See func save.
   291  	MOVL	gobuf_ctxt(AX), BX
   292  	TESTL	BX, BX
   293  	JZ	2(PC)
   294  	CALL	runtime·badctxt(SB)
   295  	get_tls(CX)
   296  	MOVL	g(CX), BX
   297  	MOVL	BX, gobuf_g(AX)
   298  	RET
   299  
   300  // void gogo(Gobuf*)
   301  // restore state from Gobuf; longjmp
   302  TEXT runtime·gogo(SB), NOSPLIT, $8-4
   303  	MOVL	buf+0(FP), BX		// gobuf
   304  	MOVL	gobuf_g(BX), DX
   305  	MOVL	0(DX), CX		// make sure g != nil
   306  	get_tls(CX)
   307  	MOVL	DX, g(CX)
   308  	MOVL	gobuf_sp(BX), SP	// restore SP
   309  	MOVL	gobuf_ret(BX), AX
   310  	MOVL	gobuf_ctxt(BX), DX
   311  	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   312  	MOVL	$0, gobuf_ret(BX)
   313  	MOVL	$0, gobuf_ctxt(BX)
   314  	MOVL	gobuf_pc(BX), BX
   315  	JMP	BX
   316  
   317  // func mcall(fn func(*g))
   318  // Switch to m->g0's stack, call fn(g).
   319  // Fn must never return. It should gogo(&g->sched)
   320  // to keep running g.
   321  TEXT runtime·mcall(SB), NOSPLIT, $0-4
   322  	MOVL	fn+0(FP), DI
   323  
   324  	get_tls(DX)
   325  	MOVL	g(DX), AX	// save state in g->sched
   326  	MOVL	0(SP), BX	// caller's PC
   327  	MOVL	BX, (g_sched+gobuf_pc)(AX)
   328  	LEAL	fn+0(FP), BX	// caller's SP
   329  	MOVL	BX, (g_sched+gobuf_sp)(AX)
   330  	MOVL	AX, (g_sched+gobuf_g)(AX)
   331  
   332  	// switch to m->g0 & its stack, call fn
   333  	MOVL	g(DX), BX
   334  	MOVL	g_m(BX), BX
   335  	MOVL	m_g0(BX), SI
   336  	CMPL	SI, AX	// if g == m->g0 call badmcall
   337  	JNE	3(PC)
   338  	MOVL	$runtime·badmcall(SB), AX
   339  	JMP	AX
   340  	MOVL	SI, g(DX)	// g = m->g0
   341  	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   342  	PUSHL	AX
   343  	MOVL	DI, DX
   344  	MOVL	0(DI), DI
   345  	CALL	DI
   346  	POPL	AX
   347  	MOVL	$runtime·badmcall2(SB), AX
   348  	JMP	AX
   349  	RET
   350  
   351  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   352  // of the G stack. We need to distinguish the routine that
   353  // lives at the bottom of the G stack from the one that lives
   354  // at the top of the system stack because the one at the top of
   355  // the system stack terminates the stack walk (see topofstack()).
   356  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   357  	RET
   358  
   359  // func systemstack(fn func())
   360  TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   361  	MOVL	fn+0(FP), DI	// DI = fn
   362  	get_tls(CX)
   363  	MOVL	g(CX), AX	// AX = g
   364  	MOVL	g_m(AX), BX	// BX = m
   365  
   366  	CMPL	AX, m_gsignal(BX)
   367  	JEQ	noswitch
   368  
   369  	MOVL	m_g0(BX), DX	// DX = g0
   370  	CMPL	AX, DX
   371  	JEQ	noswitch
   372  
   373  	CMPL	AX, m_curg(BX)
   374  	JNE	bad
   375  
   376  	// switch stacks
   377  	// save our state in g->sched. Pretend to
   378  	// be systemstack_switch if the G stack is scanned.
   379  	MOVL	$runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
   380  	MOVL	SP, (g_sched+gobuf_sp)(AX)
   381  	MOVL	AX, (g_sched+gobuf_g)(AX)
   382  
   383  	// switch to g0
   384  	get_tls(CX)
   385  	MOVL	DX, g(CX)
   386  	MOVL	(g_sched+gobuf_sp)(DX), BX
   387  	// make it look like mstart called systemstack on g0, to stop traceback
   388  	SUBL	$4, BX
   389  	MOVL	$runtime·mstart(SB), DX
   390  	MOVL	DX, 0(BX)
   391  	MOVL	BX, SP
   392  
   393  	// call target function
   394  	MOVL	DI, DX
   395  	MOVL	0(DI), DI
   396  	CALL	DI
   397  
   398  	// switch back to g
   399  	get_tls(CX)
   400  	MOVL	g(CX), AX
   401  	MOVL	g_m(AX), BX
   402  	MOVL	m_curg(BX), AX
   403  	MOVL	AX, g(CX)
   404  	MOVL	(g_sched+gobuf_sp)(AX), SP
   405  	MOVL	$0, (g_sched+gobuf_sp)(AX)
   406  	RET
   407  
   408  noswitch:
   409  	// already on system stack; tail call the function
   410  	// Using a tail call here cleans up tracebacks since we won't stop
   411  	// at an intermediate systemstack.
   412  	MOVL	DI, DX
   413  	MOVL	0(DI), DI
   414  	JMP	DI
   415  
   416  bad:
   417  	// Bad: g is not gsignal, not g0, not curg. What is it?
   418  	// Hide call from linker nosplit analysis.
   419  	MOVL	$runtime·badsystemstack(SB), AX
   420  	CALL	AX
   421  	INT	$3
   422  
   423  /*
   424   * support for morestack
   425   */
   426  
   427  // Called during function prolog when more stack is needed.
   428  //
   429  // The traceback routines see morestack on a g0 as being
   430  // the top of a stack (for example, morestack calling newstack
   431  // calling the scheduler calling newm calling gc), so we must
   432  // record an argument size. For that purpose, it has no arguments.
   433  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   434  	// Cannot grow scheduler stack (m->g0).
   435  	get_tls(CX)
   436  	MOVL	g(CX), BX
   437  	MOVL	g_m(BX), BX
   438  	MOVL	m_g0(BX), SI
   439  	CMPL	g(CX), SI
   440  	JNE	3(PC)
   441  	CALL	runtime·badmorestackg0(SB)
   442  	CALL	runtime·abort(SB)
   443  
   444  	// Cannot grow signal stack.
   445  	MOVL	m_gsignal(BX), SI
   446  	CMPL	g(CX), SI
   447  	JNE	3(PC)
   448  	CALL	runtime·badmorestackgsignal(SB)
   449  	CALL	runtime·abort(SB)
   450  
   451  	// Called from f.
   452  	// Set m->morebuf to f's caller.
   453  	MOVL	4(SP), DI	// f's caller's PC
   454  	MOVL	DI, (m_morebuf+gobuf_pc)(BX)
   455  	LEAL	8(SP), CX	// f's caller's SP
   456  	MOVL	CX, (m_morebuf+gobuf_sp)(BX)
   457  	get_tls(CX)
   458  	MOVL	g(CX), SI
   459  	MOVL	SI, (m_morebuf+gobuf_g)(BX)
   460  
   461  	// Set g->sched to context in f.
   462  	MOVL	0(SP), AX	// f's PC
   463  	MOVL	AX, (g_sched+gobuf_pc)(SI)
   464  	MOVL	SI, (g_sched+gobuf_g)(SI)
   465  	LEAL	4(SP), AX	// f's SP
   466  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   467  	MOVL	DX, (g_sched+gobuf_ctxt)(SI)
   468  
   469  	// Call newstack on m->g0's stack.
   470  	MOVL	m_g0(BX), BP
   471  	MOVL	BP, g(CX)
   472  	MOVL	(g_sched+gobuf_sp)(BP), AX
   473  	MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
   474  	MOVL	AX, SP
   475  	CALL	runtime·newstack(SB)
   476  	CALL	runtime·abort(SB)	// crash if newstack returns
   477  	RET
   478  
   479  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
   480  	MOVL	$0, DX
   481  	JMP runtime·morestack(SB)
   482  
   483  // reflectcall: call a function with the given argument list
   484  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   485  // we don't have variable-sized frames, so we use a small number
   486  // of constant-sized-frame functions to encode a few bits of size in the pc.
   487  // Caution: ugly multiline assembly macros in your future!
   488  
   489  #define DISPATCH(NAME,MAXSIZE)		\
   490  	CMPL	CX, $MAXSIZE;		\
   491  	JA	3(PC);			\
   492  	MOVL	$NAME(SB), AX;		\
   493  	JMP	AX
   494  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   495  
   496  TEXT reflect·call(SB), NOSPLIT, $0-0
   497  	JMP	·reflectcall(SB)
   498  
   499  TEXT ·reflectcall(SB), NOSPLIT, $0-20
   500  	MOVL	argsize+12(FP), CX
   501  	DISPATCH(runtime·call16, 16)
   502  	DISPATCH(runtime·call32, 32)
   503  	DISPATCH(runtime·call64, 64)
   504  	DISPATCH(runtime·call128, 128)
   505  	DISPATCH(runtime·call256, 256)
   506  	DISPATCH(runtime·call512, 512)
   507  	DISPATCH(runtime·call1024, 1024)
   508  	DISPATCH(runtime·call2048, 2048)
   509  	DISPATCH(runtime·call4096, 4096)
   510  	DISPATCH(runtime·call8192, 8192)
   511  	DISPATCH(runtime·call16384, 16384)
   512  	DISPATCH(runtime·call32768, 32768)
   513  	DISPATCH(runtime·call65536, 65536)
   514  	DISPATCH(runtime·call131072, 131072)
   515  	DISPATCH(runtime·call262144, 262144)
   516  	DISPATCH(runtime·call524288, 524288)
   517  	DISPATCH(runtime·call1048576, 1048576)
   518  	DISPATCH(runtime·call2097152, 2097152)
   519  	DISPATCH(runtime·call4194304, 4194304)
   520  	DISPATCH(runtime·call8388608, 8388608)
   521  	DISPATCH(runtime·call16777216, 16777216)
   522  	DISPATCH(runtime·call33554432, 33554432)
   523  	DISPATCH(runtime·call67108864, 67108864)
   524  	DISPATCH(runtime·call134217728, 134217728)
   525  	DISPATCH(runtime·call268435456, 268435456)
   526  	DISPATCH(runtime·call536870912, 536870912)
   527  	DISPATCH(runtime·call1073741824, 1073741824)
   528  	MOVL	$runtime·badreflectcall(SB), AX
   529  	JMP	AX
   530  
   531  #define CALLFN(NAME,MAXSIZE)			\
   532  TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   533  	NO_LOCAL_POINTERS;			\
   534  	/* copy arguments to stack */		\
   535  	MOVL	argptr+8(FP), SI;		\
   536  	MOVL	argsize+12(FP), CX;		\
   537  	MOVL	SP, DI;				\
   538  	REP;MOVSB;				\
   539  	/* call function */			\
   540  	MOVL	f+4(FP), DX;			\
   541  	MOVL	(DX), AX; 			\
   542  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   543  	CALL	AX;				\
   544  	/* copy return values back */		\
   545  	MOVL	argtype+0(FP), DX;		\
   546  	MOVL	argptr+8(FP), DI;		\
   547  	MOVL	argsize+12(FP), CX;		\
   548  	MOVL	retoffset+16(FP), BX;		\
   549  	MOVL	SP, SI;				\
   550  	ADDL	BX, DI;				\
   551  	ADDL	BX, SI;				\
   552  	SUBL	BX, CX;				\
   553  	CALL	callRet<>(SB);			\
   554  	RET
   555  
   556  // callRet copies return values back at the end of call*. This is a
   557  // separate function so it can allocate stack space for the arguments
   558  // to reflectcallmove. It does not follow the Go ABI; it expects its
   559  // arguments in registers.
   560  TEXT callRet<>(SB), NOSPLIT, $16-0
   561  	MOVL	DX, 0(SP)
   562  	MOVL	DI, 4(SP)
   563  	MOVL	SI, 8(SP)
   564  	MOVL	CX, 12(SP)
   565  	CALL	runtime·reflectcallmove(SB)
   566  	RET
   567  
   568  CALLFN(·call16, 16)
   569  CALLFN(·call32, 32)
   570  CALLFN(·call64, 64)
   571  CALLFN(·call128, 128)
   572  CALLFN(·call256, 256)
   573  CALLFN(·call512, 512)
   574  CALLFN(·call1024, 1024)
   575  CALLFN(·call2048, 2048)
   576  CALLFN(·call4096, 4096)
   577  CALLFN(·call8192, 8192)
   578  CALLFN(·call16384, 16384)
   579  CALLFN(·call32768, 32768)
   580  CALLFN(·call65536, 65536)
   581  CALLFN(·call131072, 131072)
   582  CALLFN(·call262144, 262144)
   583  CALLFN(·call524288, 524288)
   584  CALLFN(·call1048576, 1048576)
   585  CALLFN(·call2097152, 2097152)
   586  CALLFN(·call4194304, 4194304)
   587  CALLFN(·call8388608, 8388608)
   588  CALLFN(·call16777216, 16777216)
   589  CALLFN(·call33554432, 33554432)
   590  CALLFN(·call67108864, 67108864)
   591  CALLFN(·call134217728, 134217728)
   592  CALLFN(·call268435456, 268435456)
   593  CALLFN(·call536870912, 536870912)
   594  CALLFN(·call1073741824, 1073741824)
   595  
   596  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   597  	MOVL	cycles+0(FP), AX
   598  again:
   599  	PAUSE
   600  	SUBL	$1, AX
   601  	JNZ	again
   602  	RET
   603  
   604  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   605  	// Stores are already ordered on x86, so this is just a
   606  	// compile barrier.
   607  	RET
   608  
   609  // void jmpdefer(fn, sp);
   610  // called from deferreturn.
   611  // 1. pop the caller
   612  // 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers
   613  //    return (when building for shared libraries, subtract 16 bytes -- 5 bytes
   614  //    for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the
   615  //    LEAL to load the offset into BX, and finally 5 for the call & displacement)
   616  // 3. jmp to the argument
   617  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   618  	MOVL	fv+0(FP), DX	// fn
   619  	MOVL	argp+4(FP), BX	// caller sp
   620  	LEAL	-4(BX), SP	// caller sp after CALL
   621  #ifdef GOBUILDMODE_shared
   622  	SUBL	$16, (SP)	// return to CALL again
   623  #else
   624  	SUBL	$5, (SP)	// return to CALL again
   625  #endif
   626  	MOVL	0(DX), BX
   627  	JMP	BX	// but first run the deferred function
   628  
   629  // Save state of caller into g->sched.
   630  TEXT gosave<>(SB),NOSPLIT,$0
   631  	PUSHL	AX
   632  	PUSHL	BX
   633  	get_tls(BX)
   634  	MOVL	g(BX), BX
   635  	LEAL	arg+0(FP), AX
   636  	MOVL	AX, (g_sched+gobuf_sp)(BX)
   637  	MOVL	-4(AX), AX
   638  	MOVL	AX, (g_sched+gobuf_pc)(BX)
   639  	MOVL	$0, (g_sched+gobuf_ret)(BX)
   640  	// Assert ctxt is zero. See func save.
   641  	MOVL	(g_sched+gobuf_ctxt)(BX), AX
   642  	TESTL	AX, AX
   643  	JZ	2(PC)
   644  	CALL	runtime·badctxt(SB)
   645  	POPL	BX
   646  	POPL	AX
   647  	RET
   648  
   649  // func asmcgocall(fn, arg unsafe.Pointer) int32
   650  // Call fn(arg) on the scheduler stack,
   651  // aligned appropriately for the gcc ABI.
   652  // See cgocall.go for more details.
   653  TEXT ·asmcgocall(SB),NOSPLIT,$0-12
   654  	MOVL	fn+0(FP), AX
   655  	MOVL	arg+4(FP), BX
   656  
   657  	MOVL	SP, DX
   658  
   659  	// Figure out if we need to switch to m->g0 stack.
   660  	// We get called to create new OS threads too, and those
   661  	// come in on the m->g0 stack already.
   662  	get_tls(CX)
   663  	MOVL	g(CX), BP
   664  	CMPL	BP, $0
   665  	JEQ	nosave	// Don't even have a G yet.
   666  	MOVL	g_m(BP), BP
   667  	MOVL	m_g0(BP), SI
   668  	MOVL	g(CX), DI
   669  	CMPL	SI, DI
   670  	JEQ	noswitch
   671  	CMPL	DI, m_gsignal(BP)
   672  	JEQ	noswitch
   673  	CALL	gosave<>(SB)
   674  	get_tls(CX)
   675  	MOVL	SI, g(CX)
   676  	MOVL	(g_sched+gobuf_sp)(SI), SP
   677  
   678  noswitch:
   679  	// Now on a scheduling stack (a pthread-created stack).
   680  	SUBL	$32, SP
   681  	ANDL	$~15, SP	// alignment, perhaps unnecessary
   682  	MOVL	DI, 8(SP)	// save g
   683  	MOVL	(g_stack+stack_hi)(DI), DI
   684  	SUBL	DX, DI
   685  	MOVL	DI, 4(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   686  	MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   687  	CALL	AX
   688  
   689  	// Restore registers, g, stack pointer.
   690  	get_tls(CX)
   691  	MOVL	8(SP), DI
   692  	MOVL	(g_stack+stack_hi)(DI), SI
   693  	SUBL	4(SP), SI
   694  	MOVL	DI, g(CX)
   695  	MOVL	SI, SP
   696  
   697  	MOVL	AX, ret+8(FP)
   698  	RET
   699  nosave:
   700  	// Now on a scheduling stack (a pthread-created stack).
   701  	SUBL	$32, SP
   702  	ANDL	$~15, SP	// alignment, perhaps unnecessary
   703  	MOVL	DX, 4(SP)	// save original stack pointer
   704  	MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   705  	CALL	AX
   706  
   707  	MOVL	4(SP), CX	// restore original stack pointer
   708  	MOVL	CX, SP
   709  	MOVL	AX, ret+8(FP)
   710  	RET
   711  
   712  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   713  // Turn the fn into a Go func (by taking its address) and call
   714  // cgocallback_gofunc.
   715  TEXT runtime·cgocallback(SB),NOSPLIT,$16-16
   716  	LEAL	fn+0(FP), AX
   717  	MOVL	AX, 0(SP)
   718  	MOVL	frame+4(FP), AX
   719  	MOVL	AX, 4(SP)
   720  	MOVL	framesize+8(FP), AX
   721  	MOVL	AX, 8(SP)
   722  	MOVL	ctxt+12(FP), AX
   723  	MOVL	AX, 12(SP)
   724  	MOVL	$runtime·cgocallback_gofunc(SB), AX
   725  	CALL	AX
   726  	RET
   727  
   728  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   729  // See cgocall.go for more details.
   730  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16
   731  	NO_LOCAL_POINTERS
   732  
   733  	// If g is nil, Go did not create the current thread.
   734  	// Call needm to obtain one for temporary use.
   735  	// In this case, we're running on the thread stack, so there's
   736  	// lots of space, but the linker doesn't know. Hide the call from
   737  	// the linker analysis by using an indirect call through AX.
   738  	get_tls(CX)
   739  #ifdef GOOS_windows
   740  	MOVL	$0, BP
   741  	CMPL	CX, $0
   742  	JEQ	2(PC) // TODO
   743  #endif
   744  	MOVL	g(CX), BP
   745  	CMPL	BP, $0
   746  	JEQ	needm
   747  	MOVL	g_m(BP), BP
   748  	MOVL	BP, DX // saved copy of oldm
   749  	JMP	havem
   750  needm:
   751  	MOVL	$0, 0(SP)
   752  	MOVL	$runtime·needm(SB), AX
   753  	CALL	AX
   754  	MOVL	0(SP), DX
   755  	get_tls(CX)
   756  	MOVL	g(CX), BP
   757  	MOVL	g_m(BP), BP
   758  
   759  	// Set m->sched.sp = SP, so that if a panic happens
   760  	// during the function we are about to execute, it will
   761  	// have a valid SP to run on the g0 stack.
   762  	// The next few lines (after the havem label)
   763  	// will save this SP onto the stack and then write
   764  	// the same SP back to m->sched.sp. That seems redundant,
   765  	// but if an unrecovered panic happens, unwindm will
   766  	// restore the g->sched.sp from the stack location
   767  	// and then systemstack will try to use it. If we don't set it here,
   768  	// that restored SP will be uninitialized (typically 0) and
   769  	// will not be usable.
   770  	MOVL	m_g0(BP), SI
   771  	MOVL	SP, (g_sched+gobuf_sp)(SI)
   772  
   773  havem:
   774  	// Now there's a valid m, and we're running on its m->g0.
   775  	// Save current m->g0->sched.sp on stack and then set it to SP.
   776  	// Save current sp in m->g0->sched.sp in preparation for
   777  	// switch back to m->curg stack.
   778  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   779  	MOVL	m_g0(BP), SI
   780  	MOVL	(g_sched+gobuf_sp)(SI), AX
   781  	MOVL	AX, 0(SP)
   782  	MOVL	SP, (g_sched+gobuf_sp)(SI)
   783  
   784  	// Switch to m->curg stack and call runtime.cgocallbackg.
   785  	// Because we are taking over the execution of m->curg
   786  	// but *not* resuming what had been running, we need to
   787  	// save that information (m->curg->sched) so we can restore it.
   788  	// We can restore m->curg->sched.sp easily, because calling
   789  	// runtime.cgocallbackg leaves SP unchanged upon return.
   790  	// To save m->curg->sched.pc, we push it onto the stack.
   791  	// This has the added benefit that it looks to the traceback
   792  	// routine like cgocallbackg is going to return to that
   793  	// PC (because the frame we allocate below has the same
   794  	// size as cgocallback_gofunc's frame declared above)
   795  	// so that the traceback will seamlessly trace back into
   796  	// the earlier calls.
   797  	//
   798  	// In the new goroutine, 4(SP) holds the saved oldm (DX) register.
   799  	// 8(SP) is unused.
   800  	MOVL	m_curg(BP), SI
   801  	MOVL	SI, g(CX)
   802  	MOVL	(g_sched+gobuf_sp)(SI), DI // prepare stack as DI
   803  	MOVL	(g_sched+gobuf_pc)(SI), BP
   804  	MOVL	BP, -4(DI)
   805  	MOVL	ctxt+12(FP), CX
   806  	LEAL	-(4+12)(DI), SP
   807  	MOVL	DX, 4(SP)
   808  	MOVL	CX, 0(SP)
   809  	CALL	runtime·cgocallbackg(SB)
   810  	MOVL	4(SP), DX
   811  
   812  	// Restore g->sched (== m->curg->sched) from saved values.
   813  	get_tls(CX)
   814  	MOVL	g(CX), SI
   815  	MOVL	12(SP), BP
   816  	MOVL	BP, (g_sched+gobuf_pc)(SI)
   817  	LEAL	(12+4)(SP), DI
   818  	MOVL	DI, (g_sched+gobuf_sp)(SI)
   819  
   820  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   821  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   822  	// so we do not have to restore it.)
   823  	MOVL	g(CX), BP
   824  	MOVL	g_m(BP), BP
   825  	MOVL	m_g0(BP), SI
   826  	MOVL	SI, g(CX)
   827  	MOVL	(g_sched+gobuf_sp)(SI), SP
   828  	MOVL	0(SP), AX
   829  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   830  
   831  	// If the m on entry was nil, we called needm above to borrow an m
   832  	// for the duration of the call. Since the call is over, return it with dropm.
   833  	CMPL	DX, $0
   834  	JNE 3(PC)
   835  	MOVL	$runtime·dropm(SB), AX
   836  	CALL	AX
   837  
   838  	// Done!
   839  	RET
   840  
   841  // void setg(G*); set g. for use by needm.
   842  TEXT runtime·setg(SB), NOSPLIT, $0-4
   843  	MOVL	gg+0(FP), BX
   844  #ifdef GOOS_windows
   845  	CMPL	BX, $0
   846  	JNE	settls
   847  	MOVL	$0, 0x14(FS)
   848  	RET
   849  settls:
   850  	MOVL	g_m(BX), AX
   851  	LEAL	m_tls(AX), AX
   852  	MOVL	AX, 0x14(FS)
   853  #endif
   854  	get_tls(CX)
   855  	MOVL	BX, g(CX)
   856  	RET
   857  
   858  // void setg_gcc(G*); set g. for use by gcc
   859  TEXT setg_gcc<>(SB), NOSPLIT, $0
   860  	get_tls(AX)
   861  	MOVL	gg+0(FP), DX
   862  	MOVL	DX, g(AX)
   863  	RET
   864  
   865  TEXT runtime·abort(SB),NOSPLIT,$0-0
   866  	INT	$3
   867  loop:
   868  	JMP	loop
   869  
   870  // check that SP is in range [g->stack.lo, g->stack.hi)
   871  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   872  	get_tls(CX)
   873  	MOVL	g(CX), AX
   874  	CMPL	(g_stack+stack_hi)(AX), SP
   875  	JHI	2(PC)
   876  	CALL	runtime·abort(SB)
   877  	CMPL	SP, (g_stack+stack_lo)(AX)
   878  	JHI	2(PC)
   879  	CALL	runtime·abort(SB)
   880  	RET
   881  
   882  // func cputicks() int64
   883  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   884  	CMPB	internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
   885  	JNE	done
   886  	CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   887  	JNE	mfence
   888  	LFENCE
   889  	JMP	done
   890  mfence:
   891  	MFENCE
   892  done:
   893  	RDTSC
   894  	MOVL	AX, ret_lo+0(FP)
   895  	MOVL	DX, ret_hi+4(FP)
   896  	RET
   897  
   898  TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
   899  	// set up ldt 7 to point at m0.tls
   900  	// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
   901  	// the entry number is just a hint.  setldt will set up GS with what it used.
   902  	MOVL	$7, 0(SP)
   903  	LEAL	runtime·m0+m_tls(SB), AX
   904  	MOVL	AX, 4(SP)
   905  	MOVL	$32, 8(SP)	// sizeof(tls array)
   906  	CALL	runtime·setldt(SB)
   907  	RET
   908  
   909  TEXT runtime·emptyfunc(SB),0,$0-0
   910  	RET
   911  
   912  // hash function using AES hardware instructions
   913  TEXT runtime·aeshash(SB),NOSPLIT,$0-16
   914  	MOVL	p+0(FP), AX	// ptr to data
   915  	MOVL	s+8(FP), BX	// size
   916  	LEAL	ret+12(FP), DX
   917  	JMP	runtime·aeshashbody(SB)
   918  
   919  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
   920  	MOVL	p+0(FP), AX	// ptr to string object
   921  	MOVL	4(AX), BX	// length of string
   922  	MOVL	(AX), AX	// string data
   923  	LEAL	ret+8(FP), DX
   924  	JMP	runtime·aeshashbody(SB)
   925  
   926  // AX: data
   927  // BX: length
   928  // DX: address to put return value
   929  TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
   930  	MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
   931  	PINSRW	$4, BX, X0	            // 16 bits of length
   932  	PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
   933  	MOVO	X0, X1                      // save unscrambled seed
   934  	PXOR	runtime·aeskeysched(SB), X0 // xor in per-process seed
   935  	AESENC	X0, X0                      // scramble seed
   936  
   937  	CMPL	BX, $16
   938  	JB	aes0to15
   939  	JE	aes16
   940  	CMPL	BX, $32
   941  	JBE	aes17to32
   942  	CMPL	BX, $64
   943  	JBE	aes33to64
   944  	JMP	aes65plus
   945  
   946  aes0to15:
   947  	TESTL	BX, BX
   948  	JE	aes0
   949  
   950  	ADDL	$16, AX
   951  	TESTW	$0xff0, AX
   952  	JE	endofpage
   953  
   954  	// 16 bytes loaded at this address won't cross
   955  	// a page boundary, so we can load it directly.
   956  	MOVOU	-16(AX), X1
   957  	ADDL	BX, BX
   958  	PAND	masks<>(SB)(BX*8), X1
   959  
   960  final1:
   961  	AESENC	X0, X1  // scramble input, xor in seed
   962  	AESENC	X1, X1  // scramble combo 2 times
   963  	AESENC	X1, X1
   964  	MOVL	X1, (DX)
   965  	RET
   966  
   967  endofpage:
   968  	// address ends in 1111xxxx. Might be up against
   969  	// a page boundary, so load ending at last byte.
   970  	// Then shift bytes down using pshufb.
   971  	MOVOU	-32(AX)(BX*1), X1
   972  	ADDL	BX, BX
   973  	PSHUFB	shifts<>(SB)(BX*8), X1
   974  	JMP	final1
   975  
   976  aes0:
   977  	// Return scrambled input seed
   978  	AESENC	X0, X0
   979  	MOVL	X0, (DX)
   980  	RET
   981  
   982  aes16:
   983  	MOVOU	(AX), X1
   984  	JMP	final1
   985  
   986  aes17to32:
   987  	// make second starting seed
   988  	PXOR	runtime·aeskeysched+16(SB), X1
   989  	AESENC	X1, X1
   990  
   991  	// load data to be hashed
   992  	MOVOU	(AX), X2
   993  	MOVOU	-16(AX)(BX*1), X3
   994  
   995  	// scramble 3 times
   996  	AESENC	X0, X2
   997  	AESENC	X1, X3
   998  	AESENC	X2, X2
   999  	AESENC	X3, X3
  1000  	AESENC	X2, X2
  1001  	AESENC	X3, X3
  1002  
  1003  	// combine results
  1004  	PXOR	X3, X2
  1005  	MOVL	X2, (DX)
  1006  	RET
  1007  
  1008  aes33to64:
  1009  	// make 3 more starting seeds
  1010  	MOVO	X1, X2
  1011  	MOVO	X1, X3
  1012  	PXOR	runtime·aeskeysched+16(SB), X1
  1013  	PXOR	runtime·aeskeysched+32(SB), X2
  1014  	PXOR	runtime·aeskeysched+48(SB), X3
  1015  	AESENC	X1, X1
  1016  	AESENC	X2, X2
  1017  	AESENC	X3, X3
  1018  
  1019  	MOVOU	(AX), X4
  1020  	MOVOU	16(AX), X5
  1021  	MOVOU	-32(AX)(BX*1), X6
  1022  	MOVOU	-16(AX)(BX*1), X7
  1023  
  1024  	AESENC	X0, X4
  1025  	AESENC	X1, X5
  1026  	AESENC	X2, X6
  1027  	AESENC	X3, X7
  1028  
  1029  	AESENC	X4, X4
  1030  	AESENC	X5, X5
  1031  	AESENC	X6, X6
  1032  	AESENC	X7, X7
  1033  
  1034  	AESENC	X4, X4
  1035  	AESENC	X5, X5
  1036  	AESENC	X6, X6
  1037  	AESENC	X7, X7
  1038  
  1039  	PXOR	X6, X4
  1040  	PXOR	X7, X5
  1041  	PXOR	X5, X4
  1042  	MOVL	X4, (DX)
  1043  	RET
  1044  
  1045  aes65plus:
  1046  	// make 3 more starting seeds
  1047  	MOVO	X1, X2
  1048  	MOVO	X1, X3
  1049  	PXOR	runtime·aeskeysched+16(SB), X1
  1050  	PXOR	runtime·aeskeysched+32(SB), X2
  1051  	PXOR	runtime·aeskeysched+48(SB), X3
  1052  	AESENC	X1, X1
  1053  	AESENC	X2, X2
  1054  	AESENC	X3, X3
  1055  
  1056  	// start with last (possibly overlapping) block
  1057  	MOVOU	-64(AX)(BX*1), X4
  1058  	MOVOU	-48(AX)(BX*1), X5
  1059  	MOVOU	-32(AX)(BX*1), X6
  1060  	MOVOU	-16(AX)(BX*1), X7
  1061  
  1062  	// scramble state once
  1063  	AESENC	X0, X4
  1064  	AESENC	X1, X5
  1065  	AESENC	X2, X6
  1066  	AESENC	X3, X7
  1067  
  1068  	// compute number of remaining 64-byte blocks
  1069  	DECL	BX
  1070  	SHRL	$6, BX
  1071  
  1072  aesloop:
  1073  	// scramble state, xor in a block
  1074  	MOVOU	(AX), X0
  1075  	MOVOU	16(AX), X1
  1076  	MOVOU	32(AX), X2
  1077  	MOVOU	48(AX), X3
  1078  	AESENC	X0, X4
  1079  	AESENC	X1, X5
  1080  	AESENC	X2, X6
  1081  	AESENC	X3, X7
  1082  
  1083  	// scramble state
  1084  	AESENC	X4, X4
  1085  	AESENC	X5, X5
  1086  	AESENC	X6, X6
  1087  	AESENC	X7, X7
  1088  
  1089  	ADDL	$64, AX
  1090  	DECL	BX
  1091  	JNE	aesloop
  1092  
  1093  	// 2 more scrambles to finish
  1094  	AESENC	X4, X4
  1095  	AESENC	X5, X5
  1096  	AESENC	X6, X6
  1097  	AESENC	X7, X7
  1098  
  1099  	AESENC	X4, X4
  1100  	AESENC	X5, X5
  1101  	AESENC	X6, X6
  1102  	AESENC	X7, X7
  1103  
  1104  	PXOR	X6, X4
  1105  	PXOR	X7, X5
  1106  	PXOR	X5, X4
  1107  	MOVL	X4, (DX)
  1108  	RET
  1109  
  1110  TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
  1111  	MOVL	p+0(FP), AX	// ptr to data
  1112  	MOVL	h+4(FP), X0	// seed
  1113  	PINSRD	$1, (AX), X0	// data
  1114  	AESENC	runtime·aeskeysched+0(SB), X0
  1115  	AESENC	runtime·aeskeysched+16(SB), X0
  1116  	AESENC	runtime·aeskeysched+32(SB), X0
  1117  	MOVL	X0, ret+8(FP)
  1118  	RET
  1119  
  1120  TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
  1121  	MOVL	p+0(FP), AX	// ptr to data
  1122  	MOVQ	(AX), X0	// data
  1123  	PINSRD	$2, h+4(FP), X0	// seed
  1124  	AESENC	runtime·aeskeysched+0(SB), X0
  1125  	AESENC	runtime·aeskeysched+16(SB), X0
  1126  	AESENC	runtime·aeskeysched+32(SB), X0
  1127  	MOVL	X0, ret+8(FP)
  1128  	RET
  1129  
  1130  // simple mask to get rid of data in the high part of the register.
  1131  DATA masks<>+0x00(SB)/4, $0x00000000
  1132  DATA masks<>+0x04(SB)/4, $0x00000000
  1133  DATA masks<>+0x08(SB)/4, $0x00000000
  1134  DATA masks<>+0x0c(SB)/4, $0x00000000
  1135  
  1136  DATA masks<>+0x10(SB)/4, $0x000000ff
  1137  DATA masks<>+0x14(SB)/4, $0x00000000
  1138  DATA masks<>+0x18(SB)/4, $0x00000000
  1139  DATA masks<>+0x1c(SB)/4, $0x00000000
  1140  
  1141  DATA masks<>+0x20(SB)/4, $0x0000ffff
  1142  DATA masks<>+0x24(SB)/4, $0x00000000
  1143  DATA masks<>+0x28(SB)/4, $0x00000000
  1144  DATA masks<>+0x2c(SB)/4, $0x00000000
  1145  
  1146  DATA masks<>+0x30(SB)/4, $0x00ffffff
  1147  DATA masks<>+0x34(SB)/4, $0x00000000
  1148  DATA masks<>+0x38(SB)/4, $0x00000000
  1149  DATA masks<>+0x3c(SB)/4, $0x00000000
  1150  
  1151  DATA masks<>+0x40(SB)/4, $0xffffffff
  1152  DATA masks<>+0x44(SB)/4, $0x00000000
  1153  DATA masks<>+0x48(SB)/4, $0x00000000
  1154  DATA masks<>+0x4c(SB)/4, $0x00000000
  1155  
  1156  DATA masks<>+0x50(SB)/4, $0xffffffff
  1157  DATA masks<>+0x54(SB)/4, $0x000000ff
  1158  DATA masks<>+0x58(SB)/4, $0x00000000
  1159  DATA masks<>+0x5c(SB)/4, $0x00000000
  1160  
  1161  DATA masks<>+0x60(SB)/4, $0xffffffff
  1162  DATA masks<>+0x64(SB)/4, $0x0000ffff
  1163  DATA masks<>+0x68(SB)/4, $0x00000000
  1164  DATA masks<>+0x6c(SB)/4, $0x00000000
  1165  
  1166  DATA masks<>+0x70(SB)/4, $0xffffffff
  1167  DATA masks<>+0x74(SB)/4, $0x00ffffff
  1168  DATA masks<>+0x78(SB)/4, $0x00000000
  1169  DATA masks<>+0x7c(SB)/4, $0x00000000
  1170  
  1171  DATA masks<>+0x80(SB)/4, $0xffffffff
  1172  DATA masks<>+0x84(SB)/4, $0xffffffff
  1173  DATA masks<>+0x88(SB)/4, $0x00000000
  1174  DATA masks<>+0x8c(SB)/4, $0x00000000
  1175  
  1176  DATA masks<>+0x90(SB)/4, $0xffffffff
  1177  DATA masks<>+0x94(SB)/4, $0xffffffff
  1178  DATA masks<>+0x98(SB)/4, $0x000000ff
  1179  DATA masks<>+0x9c(SB)/4, $0x00000000
  1180  
  1181  DATA masks<>+0xa0(SB)/4, $0xffffffff
  1182  DATA masks<>+0xa4(SB)/4, $0xffffffff
  1183  DATA masks<>+0xa8(SB)/4, $0x0000ffff
  1184  DATA masks<>+0xac(SB)/4, $0x00000000
  1185  
  1186  DATA masks<>+0xb0(SB)/4, $0xffffffff
  1187  DATA masks<>+0xb4(SB)/4, $0xffffffff
  1188  DATA masks<>+0xb8(SB)/4, $0x00ffffff
  1189  DATA masks<>+0xbc(SB)/4, $0x00000000
  1190  
  1191  DATA masks<>+0xc0(SB)/4, $0xffffffff
  1192  DATA masks<>+0xc4(SB)/4, $0xffffffff
  1193  DATA masks<>+0xc8(SB)/4, $0xffffffff
  1194  DATA masks<>+0xcc(SB)/4, $0x00000000
  1195  
  1196  DATA masks<>+0xd0(SB)/4, $0xffffffff
  1197  DATA masks<>+0xd4(SB)/4, $0xffffffff
  1198  DATA masks<>+0xd8(SB)/4, $0xffffffff
  1199  DATA masks<>+0xdc(SB)/4, $0x000000ff
  1200  
  1201  DATA masks<>+0xe0(SB)/4, $0xffffffff
  1202  DATA masks<>+0xe4(SB)/4, $0xffffffff
  1203  DATA masks<>+0xe8(SB)/4, $0xffffffff
  1204  DATA masks<>+0xec(SB)/4, $0x0000ffff
  1205  
  1206  DATA masks<>+0xf0(SB)/4, $0xffffffff
  1207  DATA masks<>+0xf4(SB)/4, $0xffffffff
  1208  DATA masks<>+0xf8(SB)/4, $0xffffffff
  1209  DATA masks<>+0xfc(SB)/4, $0x00ffffff
  1210  
  1211  GLOBL masks<>(SB),RODATA,$256
  1212  
  1213  // these are arguments to pshufb. They move data down from
  1214  // the high bytes of the register to the low bytes of the register.
  1215  // index is how many bytes to move.
  1216  DATA shifts<>+0x00(SB)/4, $0x00000000
  1217  DATA shifts<>+0x04(SB)/4, $0x00000000
  1218  DATA shifts<>+0x08(SB)/4, $0x00000000
  1219  DATA shifts<>+0x0c(SB)/4, $0x00000000
  1220  
  1221  DATA shifts<>+0x10(SB)/4, $0xffffff0f
  1222  DATA shifts<>+0x14(SB)/4, $0xffffffff
  1223  DATA shifts<>+0x18(SB)/4, $0xffffffff
  1224  DATA shifts<>+0x1c(SB)/4, $0xffffffff
  1225  
  1226  DATA shifts<>+0x20(SB)/4, $0xffff0f0e
  1227  DATA shifts<>+0x24(SB)/4, $0xffffffff
  1228  DATA shifts<>+0x28(SB)/4, $0xffffffff
  1229  DATA shifts<>+0x2c(SB)/4, $0xffffffff
  1230  
  1231  DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
  1232  DATA shifts<>+0x34(SB)/4, $0xffffffff
  1233  DATA shifts<>+0x38(SB)/4, $0xffffffff
  1234  DATA shifts<>+0x3c(SB)/4, $0xffffffff
  1235  
  1236  DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
  1237  DATA shifts<>+0x44(SB)/4, $0xffffffff
  1238  DATA shifts<>+0x48(SB)/4, $0xffffffff
  1239  DATA shifts<>+0x4c(SB)/4, $0xffffffff
  1240  
  1241  DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
  1242  DATA shifts<>+0x54(SB)/4, $0xffffff0f
  1243  DATA shifts<>+0x58(SB)/4, $0xffffffff
  1244  DATA shifts<>+0x5c(SB)/4, $0xffffffff
  1245  
  1246  DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
  1247  DATA shifts<>+0x64(SB)/4, $0xffff0f0e
  1248  DATA shifts<>+0x68(SB)/4, $0xffffffff
  1249  DATA shifts<>+0x6c(SB)/4, $0xffffffff
  1250  
  1251  DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
  1252  DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
  1253  DATA shifts<>+0x78(SB)/4, $0xffffffff
  1254  DATA shifts<>+0x7c(SB)/4, $0xffffffff
  1255  
  1256  DATA shifts<>+0x80(SB)/4, $0x0b0a0908
  1257  DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
  1258  DATA shifts<>+0x88(SB)/4, $0xffffffff
  1259  DATA shifts<>+0x8c(SB)/4, $0xffffffff
  1260  
  1261  DATA shifts<>+0x90(SB)/4, $0x0a090807
  1262  DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
  1263  DATA shifts<>+0x98(SB)/4, $0xffffff0f
  1264  DATA shifts<>+0x9c(SB)/4, $0xffffffff
  1265  
  1266  DATA shifts<>+0xa0(SB)/4, $0x09080706
  1267  DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
  1268  DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
  1269  DATA shifts<>+0xac(SB)/4, $0xffffffff
  1270  
  1271  DATA shifts<>+0xb0(SB)/4, $0x08070605
  1272  DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
  1273  DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
  1274  DATA shifts<>+0xbc(SB)/4, $0xffffffff
  1275  
  1276  DATA shifts<>+0xc0(SB)/4, $0x07060504
  1277  DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
  1278  DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
  1279  DATA shifts<>+0xcc(SB)/4, $0xffffffff
  1280  
  1281  DATA shifts<>+0xd0(SB)/4, $0x06050403
  1282  DATA shifts<>+0xd4(SB)/4, $0x0a090807
  1283  DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
  1284  DATA shifts<>+0xdc(SB)/4, $0xffffff0f
  1285  
  1286  DATA shifts<>+0xe0(SB)/4, $0x05040302
  1287  DATA shifts<>+0xe4(SB)/4, $0x09080706
  1288  DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
  1289  DATA shifts<>+0xec(SB)/4, $0xffff0f0e
  1290  
  1291  DATA shifts<>+0xf0(SB)/4, $0x04030201
  1292  DATA shifts<>+0xf4(SB)/4, $0x08070605
  1293  DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
  1294  DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
  1295  
  1296  GLOBL shifts<>(SB),RODATA,$256
  1297  
  1298  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1299  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1300  	MOVL	$masks<>(SB), AX
  1301  	MOVL	$shifts<>(SB), BX
  1302  	ORL	BX, AX
  1303  	TESTL	$15, AX
  1304  	SETEQ	ret+0(FP)
  1305  	RET
  1306  
  1307  TEXT runtime·return0(SB), NOSPLIT, $0
  1308  	MOVL	$0, AX
  1309  	RET
  1310  
  1311  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1312  // Must obey the gcc calling convention.
  1313  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1314  	get_tls(CX)
  1315  	MOVL	g(CX), AX
  1316  	MOVL	g_m(AX), AX
  1317  	MOVL	m_curg(AX), AX
  1318  	MOVL	(g_stack+stack_hi)(AX), AX
  1319  	RET
  1320  
  1321  // The top-most function running on a goroutine
  1322  // returns to goexit+PCQuantum.
  1323  TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1324  	BYTE	$0x90	// NOP
  1325  	CALL	runtime·goexit1(SB)	// does not return
  1326  	// traceback from goexit1 must hit code range of goexit
  1327  	BYTE	$0x90	// NOP
  1328  
  1329  // Add a module's moduledata to the linked list of moduledata objects. This
  1330  // is called from .init_array by a function generated in the linker and so
  1331  // follows the platform ABI wrt register preservation -- it only touches AX,
  1332  // CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
  1333  // instead the pointer to the moduledata is passed in AX.
  1334  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1335         MOVL    runtime·lastmoduledatap(SB), DX
  1336         MOVL    AX, moduledata_next(DX)
  1337         MOVL    AX, runtime·lastmoduledatap(SB)
  1338         RET
  1339  
  1340  TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
  1341  	MOVL	a+0(FP), AX
  1342  	MOVL	AX, 0(SP)
  1343  	MOVL	$0, 4(SP)
  1344  	FMOVV	0(SP), F0
  1345  	FMOVDP	F0, ret+4(FP)
  1346  	RET
  1347  
  1348  TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
  1349  	FMOVD	a+0(FP), F0
  1350  	FSTCW	0(SP)
  1351  	FLDCW	runtime·controlWord64trunc(SB)
  1352  	FMOVVP	F0, 4(SP)
  1353  	FLDCW	0(SP)
  1354  	MOVL	4(SP), AX
  1355  	MOVL	AX, ret+8(FP)
  1356  	RET
  1357  
  1358  // gcWriteBarrier performs a heap pointer write and informs the GC.
  1359  //
  1360  // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
  1361  // - DI is the destination of the write
  1362  // - AX is the value being written at DI
  1363  // It clobbers FLAGS. It does not clobber any general-purpose registers,
  1364  // but may clobber others (e.g., SSE registers).
  1365  TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$28
  1366  	// Save the registers clobbered by the fast path. This is slightly
  1367  	// faster than having the caller spill these.
  1368  	MOVL	CX, 20(SP)
  1369  	MOVL	BX, 24(SP)
  1370  	// TODO: Consider passing g.m.p in as an argument so they can be shared
  1371  	// across a sequence of write barriers.
  1372  	get_tls(BX)
  1373  	MOVL	g(BX), BX
  1374  	MOVL	g_m(BX), BX
  1375  	MOVL	m_p(BX), BX
  1376  	MOVL	(p_wbBuf+wbBuf_next)(BX), CX
  1377  	// Increment wbBuf.next position.
  1378  	LEAL	8(CX), CX
  1379  	MOVL	CX, (p_wbBuf+wbBuf_next)(BX)
  1380  	CMPL	CX, (p_wbBuf+wbBuf_end)(BX)
  1381  	// Record the write.
  1382  	MOVL	AX, -8(CX)	// Record value
  1383  	MOVL	(DI), BX	// TODO: This turns bad writes into bad reads.
  1384  	MOVL	BX, -4(CX)	// Record *slot
  1385  	// Is the buffer full? (flags set in CMPL above)
  1386  	JEQ	flush
  1387  ret:
  1388  	MOVL	20(SP), CX
  1389  	MOVL	24(SP), BX
  1390  	// Do the write.
  1391  	MOVL	AX, (DI)
  1392  	RET
  1393  
  1394  flush:
  1395  	// Save all general purpose registers since these could be
  1396  	// clobbered by wbBufFlush and were not saved by the caller.
  1397  	MOVL	DI, 0(SP)	// Also first argument to wbBufFlush
  1398  	MOVL	AX, 4(SP)	// Also second argument to wbBufFlush
  1399  	// BX already saved
  1400  	// CX already saved
  1401  	MOVL	DX, 8(SP)
  1402  	MOVL	BP, 12(SP)
  1403  	MOVL	SI, 16(SP)
  1404  	// DI already saved
  1405  
  1406  	// This takes arguments DI and AX
  1407  	CALL	runtime·wbBufFlush(SB)
  1408  
  1409  	MOVL	0(SP), DI
  1410  	MOVL	4(SP), AX
  1411  	MOVL	8(SP), DX
  1412  	MOVL	12(SP), BP
  1413  	MOVL	16(SP), SI
  1414  	JMP	ret