github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/runtime/asm_amd64.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  // _rt0_amd64 is common startup code for most amd64 systems when using
    11  // internal linking. This is the entry point for the program from the
    12  // kernel for an ordinary -buildmode=exe program. The stack holds the
    13  // number of arguments and the C-style argv.
    14  TEXT _rt0_amd64(SB),NOSPLIT,$-8
    15  	MOVQ	0(SP), DI	// argc
    16  	LEAQ	8(SP), SI	// argv
    17  	JMP	runtime·rt0_go(SB)
    18  
    19  // main is common startup code for most amd64 systems when using
    20  // external linking. The C startup code will call the symbol "main"
    21  // passing argc and argv in the usual C ABI registers DI and SI.
    22  TEXT main(SB),NOSPLIT,$-8
    23  	JMP	runtime·rt0_go(SB)
    24  
    25  // _rt0_amd64_lib is common startup code for most amd64 systems when
    26  // using -buildmode=c-archive or -buildmode=c-shared. The linker will
    27  // arrange to invoke this function as a global constructor (for
    28  // c-archive) or when the shared library is loaded (for c-shared).
    29  // We expect argc and argv to be passed in the usual C ABI registers
    30  // DI and SI.
    31  TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50
    32  	// Align stack per ELF ABI requirements.
    33  	MOVQ	SP, AX
    34  	ANDQ	$~15, SP
    35  	// Save C ABI callee-saved registers, as caller may need them.
    36  	MOVQ	BX, 0x10(SP)
    37  	MOVQ	BP, 0x18(SP)
    38  	MOVQ	R12, 0x20(SP)
    39  	MOVQ	R13, 0x28(SP)
    40  	MOVQ	R14, 0x30(SP)
    41  	MOVQ	R15, 0x38(SP)
    42  	MOVQ	AX, 0x40(SP)
    43  
    44  	MOVQ	DI, _rt0_amd64_lib_argc<>(SB)
    45  	MOVQ	SI, _rt0_amd64_lib_argv<>(SB)
    46  
    47  	// Synchronous initialization.
    48  	CALL	runtime·libpreinit(SB)
    49  
    50  	// Create a new thread to finish Go runtime initialization.
    51  	MOVQ	_cgo_sys_thread_create(SB), AX
    52  	TESTQ	AX, AX
    53  	JZ	nocgo
    54  	MOVQ	$_rt0_amd64_lib_go(SB), DI
    55  	MOVQ	$0, SI
    56  	CALL	AX
    57  	JMP	restore
    58  
    59  nocgo:
    60  	MOVQ	$0x800000, 0(SP)		// stacksize
    61  	MOVQ	$_rt0_amd64_lib_go(SB), AX
    62  	MOVQ	AX, 8(SP)			// fn
    63  	CALL	runtime·newosproc0(SB)
    64  
    65  restore:
    66  	MOVQ	0x10(SP), BX
    67  	MOVQ	0x18(SP), BP
    68  	MOVQ	0x20(SP), R12
    69  	MOVQ	0x28(SP), R13
    70  	MOVQ	0x30(SP), R14
    71  	MOVQ	0x38(SP), R15
    72  	MOVQ	0x40(SP), SP
    73  	RET
    74  
    75  // _rt0_amd64_lib_go initializes the Go runtime.
    76  // This is started in a separate thread by _rt0_amd64_lib.
    77  TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
    78  	MOVQ	_rt0_amd64_lib_argc<>(SB), DI
    79  	MOVQ	_rt0_amd64_lib_argv<>(SB), SI
    80  	JMP	runtime·rt0_go(SB)
    81  
    82  DATA _rt0_amd64_lib_argc<>(SB)/8, $0
    83  GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
    84  DATA _rt0_amd64_lib_argv<>(SB)/8, $0
    85  GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
    86  
    87  // Defined as ABIInternal since it does not use the stack-based Go ABI (and
    88  // in addition there are no calls to this entry point from Go code).
    89  TEXT runtime·rt0_go<ABIInternal>(SB),NOSPLIT,$0
    90  	// copy arguments forward on an even stack
    91  	MOVQ	DI, AX		// argc
    92  	MOVQ	SI, BX		// argv
    93  	SUBQ	$(4*8+7), SP		// 2args 2auto
    94  	ANDQ	$~15, SP
    95  	MOVQ	AX, 16(SP)
    96  	MOVQ	BX, 24(SP)
    97  
    98  	// create istack out of the given (operating system) stack.
    99  	// _cgo_init may update stackguard.
   100  	MOVQ	$runtime·g0(SB), DI
   101  	LEAQ	(-64*1024+104)(SP), BX
   102  	MOVQ	BX, g_stackguard0(DI)
   103  	MOVQ	BX, g_stackguard1(DI)
   104  	MOVQ	BX, (g_stack+stack_lo)(DI)
   105  	MOVQ	SP, (g_stack+stack_hi)(DI)
   106  
   107  	// find out information about the processor we're on
   108  	MOVL	$0, AX
   109  	CPUID
   110  	MOVL	AX, SI
   111  	CMPL	AX, $0
   112  	JE	nocpuinfo
   113  
   114  	// Figure out how to serialize RDTSC.
   115  	// On Intel processors LFENCE is enough. AMD requires MFENCE.
   116  	// Don't know about the rest, so let's do MFENCE.
   117  	CMPL	BX, $0x756E6547  // "Genu"
   118  	JNE	notintel
   119  	CMPL	DX, $0x49656E69  // "ineI"
   120  	JNE	notintel
   121  	CMPL	CX, $0x6C65746E  // "ntel"
   122  	JNE	notintel
   123  	MOVB	$1, runtime·isIntel(SB)
   124  	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
   125  notintel:
   126  
   127  	// Load EAX=1 cpuid flags
   128  	MOVL	$1, AX
   129  	CPUID
   130  	MOVL	AX, runtime·processorVersionInfo(SB)
   131  
   132  nocpuinfo:
   133  	// if there is an _cgo_init, call it.
   134  	MOVQ	_cgo_init(SB), AX
   135  	TESTQ	AX, AX
   136  	JZ	needtls
   137  	// arg 1: g0, already in DI
   138  	MOVQ	$setg_gcc<>(SB), SI // arg 2: setg_gcc
   139  #ifdef GOOS_android
   140  	MOVQ	$runtime·tls_g(SB), DX 	// arg 3: &tls_g
   141  	// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
   142  	// Compensate for tls_g (+16).
   143  	MOVQ	-16(TLS), CX
   144  #else
   145  	MOVQ	$0, DX	// arg 3, 4: not used when using platform's TLS
   146  	MOVQ	$0, CX
   147  #endif
   148  #ifdef GOOS_windows
   149  	// Adjust for the Win64 calling convention.
   150  	MOVQ	CX, R9 // arg 4
   151  	MOVQ	DX, R8 // arg 3
   152  	MOVQ	SI, DX // arg 2
   153  	MOVQ	DI, CX // arg 1
   154  #endif
   155  	CALL	AX
   156  
   157  	// update stackguard after _cgo_init
   158  	MOVQ	$runtime·g0(SB), CX
   159  	MOVQ	(g_stack+stack_lo)(CX), AX
   160  	ADDQ	$const__StackGuard, AX
   161  	MOVQ	AX, g_stackguard0(CX)
   162  	MOVQ	AX, g_stackguard1(CX)
   163  
   164  #ifndef GOOS_windows
   165  	JMP ok
   166  #endif
   167  needtls:
   168  #ifdef GOOS_plan9
   169  	// skip TLS setup on Plan 9
   170  	JMP ok
   171  #endif
   172  #ifdef GOOS_solaris
   173  	// skip TLS setup on Solaris
   174  	JMP ok
   175  #endif
   176  #ifdef GOOS_illumos
   177  	// skip TLS setup on illumos
   178  	JMP ok
   179  #endif
   180  #ifdef GOOS_darwin
   181  	// skip TLS setup on Darwin
   182  	JMP ok
   183  #endif
   184  #ifdef GOOS_openbsd
   185  	// skip TLS setup on OpenBSD
   186  	JMP ok
   187  #endif
   188  
   189  	LEAQ	runtime·m0+m_tls(SB), DI
   190  	CALL	runtime·settls(SB)
   191  
   192  	// store through it, to make sure it works
   193  	get_tls(BX)
   194  	MOVQ	$0x123, g(BX)
   195  	MOVQ	runtime·m0+m_tls(SB), AX
   196  	CMPQ	AX, $0x123
   197  	JEQ 2(PC)
   198  	CALL	runtime·abort(SB)
   199  ok:
   200  	// set the per-goroutine and per-mach "registers"
   201  	get_tls(BX)
   202  	LEAQ	runtime·g0(SB), CX
   203  	MOVQ	CX, g(BX)
   204  	LEAQ	runtime·m0(SB), AX
   205  
   206  	// save m->g0 = g0
   207  	MOVQ	CX, m_g0(AX)
   208  	// save m0 to g0->m
   209  	MOVQ	AX, g_m(CX)
   210  
   211  	CLD				// convention is D is always left cleared
   212  	CALL	runtime·check(SB)
   213  
   214  	MOVL	16(SP), AX		// copy argc
   215  	MOVL	AX, 0(SP)
   216  	MOVQ	24(SP), AX		// copy argv
   217  	MOVQ	AX, 8(SP)
   218  	CALL	runtime·args(SB)
   219  	CALL	runtime·osinit(SB)
   220  	CALL	runtime·schedinit(SB)
   221  
   222  	// create a new goroutine to start program
   223  	MOVQ	$runtime·mainPC(SB), AX		// entry
   224  	PUSHQ	AX
   225  	PUSHQ	$0			// arg size
   226  	CALL	runtime·newproc(SB)
   227  	POPQ	AX
   228  	POPQ	AX
   229  
   230  	// start this M
   231  	CALL	runtime·mstart(SB)
   232  
   233  	CALL	runtime·abort(SB)	// mstart should never return
   234  	RET
   235  
   236  	// Prevent dead-code elimination of debugCallV1, which is
   237  	// intended to be called by debuggers.
   238  	MOVQ	$runtime·debugCallV1<ABIInternal>(SB), AX
   239  	RET
   240  
   241  // mainPC is a function value for runtime.main, to be passed to newproc.
   242  // The reference to runtime.main is made via ABIInternal, since the
   243  // actual function (not the ABI0 wrapper) is needed by newproc.
   244  DATA	runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
   245  GLOBL	runtime·mainPC(SB),RODATA,$8
   246  
   247  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   248  	BYTE	$0xcc
   249  	RET
   250  
   251  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   252  	// No per-thread init.
   253  	RET
   254  
   255  /*
   256   *  go-routine
   257   */
   258  
   259  // func gosave(buf *gobuf)
   260  // save state in Gobuf; setjmp
   261  TEXT runtime·gosave(SB), NOSPLIT, $0-8
   262  	MOVQ	buf+0(FP), AX		// gobuf
   263  	LEAQ	buf+0(FP), BX		// caller's SP
   264  	MOVQ	BX, gobuf_sp(AX)
   265  	MOVQ	0(SP), BX		// caller's PC
   266  	MOVQ	BX, gobuf_pc(AX)
   267  	MOVQ	$0, gobuf_ret(AX)
   268  	MOVQ	BP, gobuf_bp(AX)
   269  	// Assert ctxt is zero. See func save.
   270  	MOVQ	gobuf_ctxt(AX), BX
   271  	TESTQ	BX, BX
   272  	JZ	2(PC)
   273  	CALL	runtime·badctxt(SB)
   274  	get_tls(CX)
   275  	MOVQ	g(CX), BX
   276  	MOVQ	BX, gobuf_g(AX)
   277  	RET
   278  
   279  // func gogo(buf *gobuf)
   280  // restore state from Gobuf; longjmp
   281  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   282  	MOVQ	buf+0(FP), BX		// gobuf
   283  	MOVQ	gobuf_g(BX), DX
   284  	MOVQ	0(DX), CX		// make sure g != nil
   285  	get_tls(CX)
   286  	MOVQ	DX, g(CX)
   287  	MOVQ	gobuf_sp(BX), SP	// restore SP
   288  	MOVQ	gobuf_ret(BX), AX
   289  	MOVQ	gobuf_ctxt(BX), DX
   290  	MOVQ	gobuf_bp(BX), BP
   291  	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
   292  	MOVQ	$0, gobuf_ret(BX)
   293  	MOVQ	$0, gobuf_ctxt(BX)
   294  	MOVQ	$0, gobuf_bp(BX)
   295  	MOVQ	gobuf_pc(BX), BX
   296  	JMP	BX
   297  
   298  // func mcall(fn func(*g))
   299  // Switch to m->g0's stack, call fn(g).
   300  // Fn must never return. It should gogo(&g->sched)
   301  // to keep running g.
   302  TEXT runtime·mcall(SB), NOSPLIT, $0-8
   303  	MOVQ	fn+0(FP), DI
   304  
   305  	get_tls(CX)
   306  	MOVQ	g(CX), AX	// save state in g->sched
   307  	MOVQ	0(SP), BX	// caller's PC
   308  	MOVQ	BX, (g_sched+gobuf_pc)(AX)
   309  	LEAQ	fn+0(FP), BX	// caller's SP
   310  	MOVQ	BX, (g_sched+gobuf_sp)(AX)
   311  	MOVQ	AX, (g_sched+gobuf_g)(AX)
   312  	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   313  
   314  	// switch to m->g0 & its stack, call fn
   315  	MOVQ	g(CX), BX
   316  	MOVQ	g_m(BX), BX
   317  	MOVQ	m_g0(BX), SI
   318  	CMPQ	SI, AX	// if g == m->g0 call badmcall
   319  	JNE	3(PC)
   320  	MOVQ	$runtime·badmcall(SB), AX
   321  	JMP	AX
   322  	MOVQ	SI, g(CX)	// g = m->g0
   323  	MOVQ	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   324  	PUSHQ	AX
   325  	MOVQ	DI, DX
   326  	MOVQ	0(DI), DI
   327  	CALL	DI
   328  	POPQ	AX
   329  	MOVQ	$runtime·badmcall2(SB), AX
   330  	JMP	AX
   331  	RET
   332  
   333  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   334  // of the G stack. We need to distinguish the routine that
   335  // lives at the bottom of the G stack from the one that lives
   336  // at the top of the system stack because the one at the top of
   337  // the system stack terminates the stack walk (see topofstack()).
   338  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   339  	RET
   340  
   341  // func systemstack(fn func())
   342  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   343  	MOVQ	fn+0(FP), DI	// DI = fn
   344  	get_tls(CX)
   345  	MOVQ	g(CX), AX	// AX = g
   346  	MOVQ	g_m(AX), BX	// BX = m
   347  
   348  	CMPQ	AX, m_gsignal(BX)
   349  	JEQ	noswitch
   350  
   351  	MOVQ	m_g0(BX), DX	// DX = g0
   352  	CMPQ	AX, DX
   353  	JEQ	noswitch
   354  
   355  	CMPQ	AX, m_curg(BX)
   356  	JNE	bad
   357  
   358  	// switch stacks
   359  	// save our state in g->sched. Pretend to
   360  	// be systemstack_switch if the G stack is scanned.
   361  	MOVQ	$runtime·systemstack_switch(SB), SI
   362  	MOVQ	SI, (g_sched+gobuf_pc)(AX)
   363  	MOVQ	SP, (g_sched+gobuf_sp)(AX)
   364  	MOVQ	AX, (g_sched+gobuf_g)(AX)
   365  	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   366  
   367  	// switch to g0
   368  	MOVQ	DX, g(CX)
   369  	MOVQ	(g_sched+gobuf_sp)(DX), BX
   370  	// make it look like mstart called systemstack on g0, to stop traceback
   371  	SUBQ	$8, BX
   372  	MOVQ	$runtime·mstart(SB), DX
   373  	MOVQ	DX, 0(BX)
   374  	MOVQ	BX, SP
   375  
   376  	// call target function
   377  	MOVQ	DI, DX
   378  	MOVQ	0(DI), DI
   379  	CALL	DI
   380  
   381  	// switch back to g
   382  	get_tls(CX)
   383  	MOVQ	g(CX), AX
   384  	MOVQ	g_m(AX), BX
   385  	MOVQ	m_curg(BX), AX
   386  	MOVQ	AX, g(CX)
   387  	MOVQ	(g_sched+gobuf_sp)(AX), SP
   388  	MOVQ	$0, (g_sched+gobuf_sp)(AX)
   389  	RET
   390  
   391  noswitch:
   392  	// already on m stack; tail call the function
   393  	// Using a tail call here cleans up tracebacks since we won't stop
   394  	// at an intermediate systemstack.
   395  	MOVQ	DI, DX
   396  	MOVQ	0(DI), DI
   397  	JMP	DI
   398  
   399  bad:
   400  	// Bad: g is not gsignal, not g0, not curg. What is it?
   401  	MOVQ	$runtime·badsystemstack(SB), AX
   402  	CALL	AX
   403  	INT	$3
   404  
   405  
   406  /*
   407   * support for morestack
   408   */
   409  
   410  // Called during function prolog when more stack is needed.
   411  //
   412  // The traceback routines see morestack on a g0 as being
   413  // the top of a stack (for example, morestack calling newstack
   414  // calling the scheduler calling newm calling gc), so we must
   415  // record an argument size. For that purpose, it has no arguments.
   416  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   417  	// Cannot grow scheduler stack (m->g0).
   418  	get_tls(CX)
   419  	MOVQ	g(CX), BX
   420  	MOVQ	g_m(BX), BX
   421  	MOVQ	m_g0(BX), SI
   422  	CMPQ	g(CX), SI
   423  	JNE	3(PC)
   424  	CALL	runtime·badmorestackg0(SB)
   425  	CALL	runtime·abort(SB)
   426  
   427  	// Cannot grow signal stack (m->gsignal).
   428  	MOVQ	m_gsignal(BX), SI
   429  	CMPQ	g(CX), SI
   430  	JNE	3(PC)
   431  	CALL	runtime·badmorestackgsignal(SB)
   432  	CALL	runtime·abort(SB)
   433  
   434  	// Called from f.
   435  	// Set m->morebuf to f's caller.
   436  	NOP	SP	// tell vet SP changed - stop checking offsets
   437  	MOVQ	8(SP), AX	// f's caller's PC
   438  	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
   439  	LEAQ	16(SP), AX	// f's caller's SP
   440  	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
   441  	get_tls(CX)
   442  	MOVQ	g(CX), SI
   443  	MOVQ	SI, (m_morebuf+gobuf_g)(BX)
   444  
   445  	// Set g->sched to context in f.
   446  	MOVQ	0(SP), AX // f's PC
   447  	MOVQ	AX, (g_sched+gobuf_pc)(SI)
   448  	MOVQ	SI, (g_sched+gobuf_g)(SI)
   449  	LEAQ	8(SP), AX // f's SP
   450  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   451  	MOVQ	BP, (g_sched+gobuf_bp)(SI)
   452  	MOVQ	DX, (g_sched+gobuf_ctxt)(SI)
   453  
   454  	// Call newstack on m->g0's stack.
   455  	MOVQ	m_g0(BX), BX
   456  	MOVQ	BX, g(CX)
   457  	MOVQ	(g_sched+gobuf_sp)(BX), SP
   458  	CALL	runtime·newstack(SB)
   459  	CALL	runtime·abort(SB)	// crash if newstack returns
   460  	RET
   461  
   462  // morestack but not preserving ctxt.
   463  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   464  	MOVL	$0, DX
   465  	JMP	runtime·morestack(SB)
   466  
   467  // reflectcall: call a function with the given argument list
   468  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   469  // we don't have variable-sized frames, so we use a small number
   470  // of constant-sized-frame functions to encode a few bits of size in the pc.
   471  // Caution: ugly multiline assembly macros in your future!
   472  
   473  #define DISPATCH(NAME,MAXSIZE)		\
   474  	CMPQ	CX, $MAXSIZE;		\
   475  	JA	3(PC);			\
   476  	MOVQ	$NAME(SB), AX;		\
   477  	JMP	AX
   478  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   479  
   480  TEXT ·reflectcall<ABIInternal>(SB), NOSPLIT, $0-32
   481  	MOVLQZX argsize+24(FP), CX
   482  	DISPATCH(runtime·call16, 16)
   483  	DISPATCH(runtime·call32, 32)
   484  	DISPATCH(runtime·call64, 64)
   485  	DISPATCH(runtime·call128, 128)
   486  	DISPATCH(runtime·call256, 256)
   487  	DISPATCH(runtime·call512, 512)
   488  	DISPATCH(runtime·call1024, 1024)
   489  	DISPATCH(runtime·call2048, 2048)
   490  	DISPATCH(runtime·call4096, 4096)
   491  	DISPATCH(runtime·call8192, 8192)
   492  	DISPATCH(runtime·call16384, 16384)
   493  	DISPATCH(runtime·call32768, 32768)
   494  	DISPATCH(runtime·call65536, 65536)
   495  	DISPATCH(runtime·call131072, 131072)
   496  	DISPATCH(runtime·call262144, 262144)
   497  	DISPATCH(runtime·call524288, 524288)
   498  	DISPATCH(runtime·call1048576, 1048576)
   499  	DISPATCH(runtime·call2097152, 2097152)
   500  	DISPATCH(runtime·call4194304, 4194304)
   501  	DISPATCH(runtime·call8388608, 8388608)
   502  	DISPATCH(runtime·call16777216, 16777216)
   503  	DISPATCH(runtime·call33554432, 33554432)
   504  	DISPATCH(runtime·call67108864, 67108864)
   505  	DISPATCH(runtime·call134217728, 134217728)
   506  	DISPATCH(runtime·call268435456, 268435456)
   507  	DISPATCH(runtime·call536870912, 536870912)
   508  	DISPATCH(runtime·call1073741824, 1073741824)
   509  	MOVQ	$runtime·badreflectcall(SB), AX
   510  	JMP	AX
   511  
   512  #define CALLFN(NAME,MAXSIZE)			\
   513  TEXT NAME(SB), WRAPPER, $MAXSIZE-32;		\
   514  	NO_LOCAL_POINTERS;			\
   515  	/* copy arguments to stack */		\
   516  	MOVQ	argptr+16(FP), SI;		\
   517  	MOVLQZX argsize+24(FP), CX;		\
   518  	MOVQ	SP, DI;				\
   519  	REP;MOVSB;				\
   520  	/* call function */			\
   521  	MOVQ	f+8(FP), DX;			\
   522  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   523  	MOVQ	(DX), AX;			\
   524  	CALL	AX;				\
   525  	/* copy return values back */		\
   526  	MOVQ	argtype+0(FP), DX;		\
   527  	MOVQ	argptr+16(FP), DI;		\
   528  	MOVLQZX	argsize+24(FP), CX;		\
   529  	MOVLQZX	retoffset+28(FP), BX;		\
   530  	MOVQ	SP, SI;				\
   531  	ADDQ	BX, DI;				\
   532  	ADDQ	BX, SI;				\
   533  	SUBQ	BX, CX;				\
   534  	CALL	callRet<>(SB);			\
   535  	RET
   536  
   537  // callRet copies return values back at the end of call*. This is a
   538  // separate function so it can allocate stack space for the arguments
   539  // to reflectcallmove. It does not follow the Go ABI; it expects its
   540  // arguments in registers.
   541  TEXT callRet<>(SB), NOSPLIT, $32-0
   542  	NO_LOCAL_POINTERS
   543  	MOVQ	DX, 0(SP)
   544  	MOVQ	DI, 8(SP)
   545  	MOVQ	SI, 16(SP)
   546  	MOVQ	CX, 24(SP)
   547  	CALL	runtime·reflectcallmove(SB)
   548  	RET
   549  
   550  CALLFN(·call16, 16)
   551  CALLFN(·call32, 32)
   552  CALLFN(·call64, 64)
   553  CALLFN(·call128, 128)
   554  CALLFN(·call256, 256)
   555  CALLFN(·call512, 512)
   556  CALLFN(·call1024, 1024)
   557  CALLFN(·call2048, 2048)
   558  CALLFN(·call4096, 4096)
   559  CALLFN(·call8192, 8192)
   560  CALLFN(·call16384, 16384)
   561  CALLFN(·call32768, 32768)
   562  CALLFN(·call65536, 65536)
   563  CALLFN(·call131072, 131072)
   564  CALLFN(·call262144, 262144)
   565  CALLFN(·call524288, 524288)
   566  CALLFN(·call1048576, 1048576)
   567  CALLFN(·call2097152, 2097152)
   568  CALLFN(·call4194304, 4194304)
   569  CALLFN(·call8388608, 8388608)
   570  CALLFN(·call16777216, 16777216)
   571  CALLFN(·call33554432, 33554432)
   572  CALLFN(·call67108864, 67108864)
   573  CALLFN(·call134217728, 134217728)
   574  CALLFN(·call268435456, 268435456)
   575  CALLFN(·call536870912, 536870912)
   576  CALLFN(·call1073741824, 1073741824)
   577  
   578  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   579  	MOVL	cycles+0(FP), AX
   580  again:
   581  	PAUSE
   582  	SUBL	$1, AX
   583  	JNZ	again
   584  	RET
   585  
   586  
   587  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   588  	// Stores are already ordered on x86, so this is just a
   589  	// compile barrier.
   590  	RET
   591  
   592  // func jmpdefer(fv *funcval, argp uintptr)
   593  // argp is a caller SP.
   594  // called from deferreturn.
   595  // 1. pop the caller
   596  // 2. sub 5 bytes from the callers return
   597  // 3. jmp to the argument
   598  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
   599  	MOVQ	fv+0(FP), DX	// fn
   600  	MOVQ	argp+8(FP), BX	// caller sp
   601  	LEAQ	-8(BX), SP	// caller sp after CALL
   602  	MOVQ	-8(SP), BP	// restore BP as if deferreturn returned (harmless if framepointers not in use)
   603  	SUBQ	$5, (SP)	// return to CALL again
   604  	MOVQ	0(DX), BX
   605  	JMP	BX	// but first run the deferred function
   606  
   607  // Save state of caller into g->sched. Smashes R8, R9.
   608  TEXT gosave<>(SB),NOSPLIT,$0
   609  	get_tls(R8)
   610  	MOVQ	g(R8), R8
   611  	MOVQ	0(SP), R9
   612  	MOVQ	R9, (g_sched+gobuf_pc)(R8)
   613  	LEAQ	8(SP), R9
   614  	MOVQ	R9, (g_sched+gobuf_sp)(R8)
   615  	MOVQ	$0, (g_sched+gobuf_ret)(R8)
   616  	MOVQ	BP, (g_sched+gobuf_bp)(R8)
   617  	// Assert ctxt is zero. See func save.
   618  	MOVQ	(g_sched+gobuf_ctxt)(R8), R9
   619  	TESTQ	R9, R9
   620  	JZ	2(PC)
   621  	CALL	runtime·badctxt(SB)
   622  	RET
   623  
   624  // func asmcgocall(fn, arg unsafe.Pointer) int32
   625  // Call fn(arg) on the scheduler stack,
   626  // aligned appropriately for the gcc ABI.
   627  // See cgocall.go for more details.
   628  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   629  	MOVQ	fn+0(FP), AX
   630  	MOVQ	arg+8(FP), BX
   631  
   632  	MOVQ	SP, DX
   633  
   634  	// Figure out if we need to switch to m->g0 stack.
   635  	// We get called to create new OS threads too, and those
   636  	// come in on the m->g0 stack already.
   637  	get_tls(CX)
   638  	MOVQ	g(CX), R8
   639  	CMPQ	R8, $0
   640  	JEQ	nosave
   641  	MOVQ	g_m(R8), R8
   642  	MOVQ	m_g0(R8), SI
   643  	MOVQ	g(CX), DI
   644  	CMPQ	SI, DI
   645  	JEQ	nosave
   646  	MOVQ	m_gsignal(R8), SI
   647  	CMPQ	SI, DI
   648  	JEQ	nosave
   649  
   650  	// Switch to system stack.
   651  	MOVQ	m_g0(R8), SI
   652  	CALL	gosave<>(SB)
   653  	MOVQ	SI, g(CX)
   654  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   655  
   656  	// Now on a scheduling stack (a pthread-created stack).
   657  	// Make sure we have enough room for 4 stack-backed fast-call
   658  	// registers as per windows amd64 calling convention.
   659  	SUBQ	$64, SP
   660  	ANDQ	$~15, SP	// alignment for gcc ABI
   661  	MOVQ	DI, 48(SP)	// save g
   662  	MOVQ	(g_stack+stack_hi)(DI), DI
   663  	SUBQ	DX, DI
   664  	MOVQ	DI, 40(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   665  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   666  	MOVQ	BX, CX		// CX = first argument in Win64
   667  	CALL	AX
   668  
   669  	// Restore registers, g, stack pointer.
   670  	get_tls(CX)
   671  	MOVQ	48(SP), DI
   672  	MOVQ	(g_stack+stack_hi)(DI), SI
   673  	SUBQ	40(SP), SI
   674  	MOVQ	DI, g(CX)
   675  	MOVQ	SI, SP
   676  
   677  	MOVL	AX, ret+16(FP)
   678  	RET
   679  
   680  nosave:
   681  	// Running on a system stack, perhaps even without a g.
   682  	// Having no g can happen during thread creation or thread teardown
   683  	// (see needm/dropm on Solaris, for example).
   684  	// This code is like the above sequence but without saving/restoring g
   685  	// and without worrying about the stack moving out from under us
   686  	// (because we're on a system stack, not a goroutine stack).
   687  	// The above code could be used directly if already on a system stack,
   688  	// but then the only path through this code would be a rare case on Solaris.
   689  	// Using this code for all "already on system stack" calls exercises it more,
   690  	// which should help keep it correct.
   691  	SUBQ	$64, SP
   692  	ANDQ	$~15, SP
   693  	MOVQ	$0, 48(SP)		// where above code stores g, in case someone looks during debugging
   694  	MOVQ	DX, 40(SP)	// save original stack pointer
   695  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   696  	MOVQ	BX, CX		// CX = first argument in Win64
   697  	CALL	AX
   698  	MOVQ	40(SP), SI	// restore original stack pointer
   699  	MOVQ	SI, SP
   700  	MOVL	AX, ret+16(FP)
   701  	RET
   702  
   703  // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
   704  // See cgocall.go for more details.
   705  TEXT ·cgocallback(SB),NOSPLIT,$24-24
   706  	NO_LOCAL_POINTERS
   707  
   708  	// If g is nil, Go did not create the current thread.
   709  	// Call needm to obtain one m for temporary use.
   710  	// In this case, we're running on the thread stack, so there's
   711  	// lots of space, but the linker doesn't know. Hide the call from
   712  	// the linker analysis by using an indirect call through AX.
   713  	get_tls(CX)
   714  #ifdef GOOS_windows
   715  	MOVL	$0, BX
   716  	CMPQ	CX, $0
   717  	JEQ	2(PC)
   718  #endif
   719  	MOVQ	g(CX), BX
   720  	CMPQ	BX, $0
   721  	JEQ	needm
   722  	MOVQ	g_m(BX), BX
   723  	MOVQ	BX, savedm-8(SP)	// saved copy of oldm
   724  	JMP	havem
   725  needm:
   726  	MOVQ    $runtime·needm(SB), AX
   727  	CALL	AX
   728  	MOVQ	$0, savedm-8(SP) // dropm on return
   729  	get_tls(CX)
   730  	MOVQ	g(CX), BX
   731  	MOVQ	g_m(BX), BX
   732  
   733  	// Set m->sched.sp = SP, so that if a panic happens
   734  	// during the function we are about to execute, it will
   735  	// have a valid SP to run on the g0 stack.
   736  	// The next few lines (after the havem label)
   737  	// will save this SP onto the stack and then write
   738  	// the same SP back to m->sched.sp. That seems redundant,
   739  	// but if an unrecovered panic happens, unwindm will
   740  	// restore the g->sched.sp from the stack location
   741  	// and then systemstack will try to use it. If we don't set it here,
   742  	// that restored SP will be uninitialized (typically 0) and
   743  	// will not be usable.
   744  	MOVQ	m_g0(BX), SI
   745  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   746  
   747  havem:
   748  	// Now there's a valid m, and we're running on its m->g0.
   749  	// Save current m->g0->sched.sp on stack and then set it to SP.
   750  	// Save current sp in m->g0->sched.sp in preparation for
   751  	// switch back to m->curg stack.
   752  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   753  	MOVQ	m_g0(BX), SI
   754  	MOVQ	(g_sched+gobuf_sp)(SI), AX
   755  	MOVQ	AX, 0(SP)
   756  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   757  
   758  	// Switch to m->curg stack and call runtime.cgocallbackg.
   759  	// Because we are taking over the execution of m->curg
   760  	// but *not* resuming what had been running, we need to
   761  	// save that information (m->curg->sched) so we can restore it.
   762  	// We can restore m->curg->sched.sp easily, because calling
   763  	// runtime.cgocallbackg leaves SP unchanged upon return.
   764  	// To save m->curg->sched.pc, we push it onto the curg stack and
   765  	// open a frame the same size as cgocallback's g0 frame.
   766  	// Once we switch to the curg stack, the pushed PC will appear
   767  	// to be the return PC of cgocallback, so that the traceback
   768  	// will seamlessly trace back into the earlier calls.
   769  	MOVQ	m_curg(BX), SI
   770  	MOVQ	SI, g(CX)
   771  	MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
   772  	MOVQ	(g_sched+gobuf_pc)(SI), BX
   773  	MOVQ	BX, -8(DI)  // "push" return PC on the g stack
   774  	// Gather our arguments into registers.
   775  	MOVQ	fn+0(FP), BX
   776  	MOVQ	frame+8(FP), CX
   777  	MOVQ	ctxt+16(FP), DX
   778  	// Compute the size of the frame, including return PC and, if
   779  	// GOEXPERIMENT=framepointer, the saved base pointer
   780  	LEAQ	fn+0(FP), AX
   781  	SUBQ	SP, AX   // AX is our actual frame size
   782  	SUBQ	AX, DI   // Allocate the same frame size on the g stack
   783  	MOVQ	DI, SP
   784  
   785  	MOVQ	BX, 0(SP)
   786  	MOVQ	CX, 8(SP)
   787  	MOVQ	DX, 16(SP)
   788  	CALL	runtime·cgocallbackg(SB)
   789  
   790  	// Compute the size of the frame again. FP and SP have
   791  	// completely different values here than they did above,
   792  	// but only their difference matters.
   793  	LEAQ	fn+0(FP), AX
   794  	SUBQ	SP, AX
   795  
   796  	// Restore g->sched (== m->curg->sched) from saved values.
   797  	get_tls(CX)
   798  	MOVQ	g(CX), SI
   799  	MOVQ	SP, DI
   800  	ADDQ	AX, DI
   801  	MOVQ	-8(DI), BX
   802  	MOVQ	BX, (g_sched+gobuf_pc)(SI)
   803  	MOVQ	DI, (g_sched+gobuf_sp)(SI)
   804  
   805  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   806  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   807  	// so we do not have to restore it.)
   808  	MOVQ	g(CX), BX
   809  	MOVQ	g_m(BX), BX
   810  	MOVQ	m_g0(BX), SI
   811  	MOVQ	SI, g(CX)
   812  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   813  	MOVQ	0(SP), AX
   814  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   815  
   816  	// If the m on entry was nil, we called needm above to borrow an m
   817  	// for the duration of the call. Since the call is over, return it with dropm.
   818  	MOVQ	savedm-8(SP), BX
   819  	CMPQ	BX, $0
   820  	JNE 3(PC)
   821  	MOVQ	$runtime·dropm(SB), AX
   822  	CALL	AX
   823  
   824  	// Done!
   825  	RET
   826  
   827  // func setg(gg *g)
   828  // set g. for use by needm.
   829  TEXT runtime·setg(SB), NOSPLIT, $0-8
   830  	MOVQ	gg+0(FP), BX
   831  #ifdef GOOS_windows
   832  	CMPQ	BX, $0
   833  	JNE	settls
   834  	MOVQ	$0, 0x28(GS)
   835  	RET
   836  settls:
   837  	MOVQ	g_m(BX), AX
   838  	LEAQ	m_tls(AX), AX
   839  	MOVQ	AX, 0x28(GS)
   840  #endif
   841  	get_tls(CX)
   842  	MOVQ	BX, g(CX)
   843  	RET
   844  
   845  // void setg_gcc(G*); set g called from gcc.
   846  TEXT setg_gcc<>(SB),NOSPLIT,$0
   847  	get_tls(AX)
   848  	MOVQ	DI, g(AX)
   849  	RET
   850  
   851  TEXT runtime·abort(SB),NOSPLIT,$0-0
   852  	INT	$3
   853  loop:
   854  	JMP	loop
   855  
   856  // check that SP is in range [g->stack.lo, g->stack.hi)
   857  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   858  	get_tls(CX)
   859  	MOVQ	g(CX), AX
   860  	CMPQ	(g_stack+stack_hi)(AX), SP
   861  	JHI	2(PC)
   862  	CALL	runtime·abort(SB)
   863  	CMPQ	SP, (g_stack+stack_lo)(AX)
   864  	JHI	2(PC)
   865  	CALL	runtime·abort(SB)
   866  	RET
   867  
   868  // func cputicks() int64
   869  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   870  	CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   871  	JNE	mfence
   872  	LFENCE
   873  	JMP	done
   874  mfence:
   875  	MFENCE
   876  done:
   877  	RDTSC
   878  	SHLQ	$32, DX
   879  	ADDQ	DX, AX
   880  	MOVQ	AX, ret+0(FP)
   881  	RET
   882  
   883  // func memhash(p unsafe.Pointer, h, s uintptr) uintptr
   884  // hash function using AES hardware instructions
   885  TEXT runtime·memhash(SB),NOSPLIT,$0-32
   886  	CMPB	runtime·useAeshash(SB), $0
   887  	JEQ	noaes
   888  	MOVQ	p+0(FP), AX	// ptr to data
   889  	MOVQ	s+16(FP), CX	// size
   890  	LEAQ	ret+24(FP), DX
   891  	JMP	aeshashbody<>(SB)
   892  noaes:
   893  	JMP	runtime·memhashFallback(SB)
   894  
   895  // func strhash(p unsafe.Pointer, h uintptr) uintptr
   896  TEXT runtime·strhash(SB),NOSPLIT,$0-24
   897  	CMPB	runtime·useAeshash(SB), $0
   898  	JEQ	noaes
   899  	MOVQ	p+0(FP), AX	// ptr to string struct
   900  	MOVQ	8(AX), CX	// length of string
   901  	MOVQ	(AX), AX	// string data
   902  	LEAQ	ret+16(FP), DX
   903  	JMP	aeshashbody<>(SB)
   904  noaes:
   905  	JMP	runtime·strhashFallback(SB)
   906  
   907  // AX: data
   908  // CX: length
   909  // DX: address to put return value
   910  TEXT aeshashbody<>(SB),NOSPLIT,$0-0
   911  	// Fill an SSE register with our seeds.
   912  	MOVQ	h+8(FP), X0			// 64 bits of per-table hash seed
   913  	PINSRW	$4, CX, X0			// 16 bits of length
   914  	PSHUFHW $0, X0, X0			// repeat length 4 times total
   915  	MOVO	X0, X1				// save unscrambled seed
   916  	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
   917  	AESENC	X0, X0				// scramble seed
   918  
   919  	CMPQ	CX, $16
   920  	JB	aes0to15
   921  	JE	aes16
   922  	CMPQ	CX, $32
   923  	JBE	aes17to32
   924  	CMPQ	CX, $64
   925  	JBE	aes33to64
   926  	CMPQ	CX, $128
   927  	JBE	aes65to128
   928  	JMP	aes129plus
   929  
   930  aes0to15:
   931  	TESTQ	CX, CX
   932  	JE	aes0
   933  
   934  	ADDQ	$16, AX
   935  	TESTW	$0xff0, AX
   936  	JE	endofpage
   937  
   938  	// 16 bytes loaded at this address won't cross
   939  	// a page boundary, so we can load it directly.
   940  	MOVOU	-16(AX), X1
   941  	ADDQ	CX, CX
   942  	MOVQ	$masks<>(SB), AX
   943  	PAND	(AX)(CX*8), X1
   944  final1:
   945  	PXOR	X0, X1	// xor data with seed
   946  	AESENC	X1, X1	// scramble combo 3 times
   947  	AESENC	X1, X1
   948  	AESENC	X1, X1
   949  	MOVQ	X1, (DX)
   950  	RET
   951  
   952  endofpage:
   953  	// address ends in 1111xxxx. Might be up against
   954  	// a page boundary, so load ending at last byte.
   955  	// Then shift bytes down using pshufb.
   956  	MOVOU	-32(AX)(CX*1), X1
   957  	ADDQ	CX, CX
   958  	MOVQ	$shifts<>(SB), AX
   959  	PSHUFB	(AX)(CX*8), X1
   960  	JMP	final1
   961  
   962  aes0:
   963  	// Return scrambled input seed
   964  	AESENC	X0, X0
   965  	MOVQ	X0, (DX)
   966  	RET
   967  
   968  aes16:
   969  	MOVOU	(AX), X1
   970  	JMP	final1
   971  
   972  aes17to32:
   973  	// make second starting seed
   974  	PXOR	runtime·aeskeysched+16(SB), X1
   975  	AESENC	X1, X1
   976  
   977  	// load data to be hashed
   978  	MOVOU	(AX), X2
   979  	MOVOU	-16(AX)(CX*1), X3
   980  
   981  	// xor with seed
   982  	PXOR	X0, X2
   983  	PXOR	X1, X3
   984  
   985  	// scramble 3 times
   986  	AESENC	X2, X2
   987  	AESENC	X3, X3
   988  	AESENC	X2, X2
   989  	AESENC	X3, X3
   990  	AESENC	X2, X2
   991  	AESENC	X3, X3
   992  
   993  	// combine results
   994  	PXOR	X3, X2
   995  	MOVQ	X2, (DX)
   996  	RET
   997  
   998  aes33to64:
   999  	// make 3 more starting seeds
  1000  	MOVO	X1, X2
  1001  	MOVO	X1, X3
  1002  	PXOR	runtime·aeskeysched+16(SB), X1
  1003  	PXOR	runtime·aeskeysched+32(SB), X2
  1004  	PXOR	runtime·aeskeysched+48(SB), X3
  1005  	AESENC	X1, X1
  1006  	AESENC	X2, X2
  1007  	AESENC	X3, X3
  1008  
  1009  	MOVOU	(AX), X4
  1010  	MOVOU	16(AX), X5
  1011  	MOVOU	-32(AX)(CX*1), X6
  1012  	MOVOU	-16(AX)(CX*1), X7
  1013  
  1014  	PXOR	X0, X4
  1015  	PXOR	X1, X5
  1016  	PXOR	X2, X6
  1017  	PXOR	X3, X7
  1018  
  1019  	AESENC	X4, X4
  1020  	AESENC	X5, X5
  1021  	AESENC	X6, X6
  1022  	AESENC	X7, X7
  1023  
  1024  	AESENC	X4, X4
  1025  	AESENC	X5, X5
  1026  	AESENC	X6, X6
  1027  	AESENC	X7, X7
  1028  
  1029  	AESENC	X4, X4
  1030  	AESENC	X5, X5
  1031  	AESENC	X6, X6
  1032  	AESENC	X7, X7
  1033  
  1034  	PXOR	X6, X4
  1035  	PXOR	X7, X5
  1036  	PXOR	X5, X4
  1037  	MOVQ	X4, (DX)
  1038  	RET
  1039  
  1040  aes65to128:
  1041  	// make 7 more starting seeds
  1042  	MOVO	X1, X2
  1043  	MOVO	X1, X3
  1044  	MOVO	X1, X4
  1045  	MOVO	X1, X5
  1046  	MOVO	X1, X6
  1047  	MOVO	X1, X7
  1048  	PXOR	runtime·aeskeysched+16(SB), X1
  1049  	PXOR	runtime·aeskeysched+32(SB), X2
  1050  	PXOR	runtime·aeskeysched+48(SB), X3
  1051  	PXOR	runtime·aeskeysched+64(SB), X4
  1052  	PXOR	runtime·aeskeysched+80(SB), X5
  1053  	PXOR	runtime·aeskeysched+96(SB), X6
  1054  	PXOR	runtime·aeskeysched+112(SB), X7
  1055  	AESENC	X1, X1
  1056  	AESENC	X2, X2
  1057  	AESENC	X3, X3
  1058  	AESENC	X4, X4
  1059  	AESENC	X5, X5
  1060  	AESENC	X6, X6
  1061  	AESENC	X7, X7
  1062  
  1063  	// load data
  1064  	MOVOU	(AX), X8
  1065  	MOVOU	16(AX), X9
  1066  	MOVOU	32(AX), X10
  1067  	MOVOU	48(AX), X11
  1068  	MOVOU	-64(AX)(CX*1), X12
  1069  	MOVOU	-48(AX)(CX*1), X13
  1070  	MOVOU	-32(AX)(CX*1), X14
  1071  	MOVOU	-16(AX)(CX*1), X15
  1072  
  1073  	// xor with seed
  1074  	PXOR	X0, X8
  1075  	PXOR	X1, X9
  1076  	PXOR	X2, X10
  1077  	PXOR	X3, X11
  1078  	PXOR	X4, X12
  1079  	PXOR	X5, X13
  1080  	PXOR	X6, X14
  1081  	PXOR	X7, X15
  1082  
  1083  	// scramble 3 times
  1084  	AESENC	X8, X8
  1085  	AESENC	X9, X9
  1086  	AESENC	X10, X10
  1087  	AESENC	X11, X11
  1088  	AESENC	X12, X12
  1089  	AESENC	X13, X13
  1090  	AESENC	X14, X14
  1091  	AESENC	X15, X15
  1092  
  1093  	AESENC	X8, X8
  1094  	AESENC	X9, X9
  1095  	AESENC	X10, X10
  1096  	AESENC	X11, X11
  1097  	AESENC	X12, X12
  1098  	AESENC	X13, X13
  1099  	AESENC	X14, X14
  1100  	AESENC	X15, X15
  1101  
  1102  	AESENC	X8, X8
  1103  	AESENC	X9, X9
  1104  	AESENC	X10, X10
  1105  	AESENC	X11, X11
  1106  	AESENC	X12, X12
  1107  	AESENC	X13, X13
  1108  	AESENC	X14, X14
  1109  	AESENC	X15, X15
  1110  
  1111  	// combine results
  1112  	PXOR	X12, X8
  1113  	PXOR	X13, X9
  1114  	PXOR	X14, X10
  1115  	PXOR	X15, X11
  1116  	PXOR	X10, X8
  1117  	PXOR	X11, X9
  1118  	PXOR	X9, X8
  1119  	MOVQ	X8, (DX)
  1120  	RET
  1121  
  1122  aes129plus:
  1123  	// make 7 more starting seeds
  1124  	MOVO	X1, X2
  1125  	MOVO	X1, X3
  1126  	MOVO	X1, X4
  1127  	MOVO	X1, X5
  1128  	MOVO	X1, X6
  1129  	MOVO	X1, X7
  1130  	PXOR	runtime·aeskeysched+16(SB), X1
  1131  	PXOR	runtime·aeskeysched+32(SB), X2
  1132  	PXOR	runtime·aeskeysched+48(SB), X3
  1133  	PXOR	runtime·aeskeysched+64(SB), X4
  1134  	PXOR	runtime·aeskeysched+80(SB), X5
  1135  	PXOR	runtime·aeskeysched+96(SB), X6
  1136  	PXOR	runtime·aeskeysched+112(SB), X7
  1137  	AESENC	X1, X1
  1138  	AESENC	X2, X2
  1139  	AESENC	X3, X3
  1140  	AESENC	X4, X4
  1141  	AESENC	X5, X5
  1142  	AESENC	X6, X6
  1143  	AESENC	X7, X7
  1144  
  1145  	// start with last (possibly overlapping) block
  1146  	MOVOU	-128(AX)(CX*1), X8
  1147  	MOVOU	-112(AX)(CX*1), X9
  1148  	MOVOU	-96(AX)(CX*1), X10
  1149  	MOVOU	-80(AX)(CX*1), X11
  1150  	MOVOU	-64(AX)(CX*1), X12
  1151  	MOVOU	-48(AX)(CX*1), X13
  1152  	MOVOU	-32(AX)(CX*1), X14
  1153  	MOVOU	-16(AX)(CX*1), X15
  1154  
  1155  	// xor in seed
  1156  	PXOR	X0, X8
  1157  	PXOR	X1, X9
  1158  	PXOR	X2, X10
  1159  	PXOR	X3, X11
  1160  	PXOR	X4, X12
  1161  	PXOR	X5, X13
  1162  	PXOR	X6, X14
  1163  	PXOR	X7, X15
  1164  
  1165  	// compute number of remaining 128-byte blocks
  1166  	DECQ	CX
  1167  	SHRQ	$7, CX
  1168  
  1169  aesloop:
  1170  	// scramble state
  1171  	AESENC	X8, X8
  1172  	AESENC	X9, X9
  1173  	AESENC	X10, X10
  1174  	AESENC	X11, X11
  1175  	AESENC	X12, X12
  1176  	AESENC	X13, X13
  1177  	AESENC	X14, X14
  1178  	AESENC	X15, X15
  1179  
  1180  	// scramble state, xor in a block
  1181  	MOVOU	(AX), X0
  1182  	MOVOU	16(AX), X1
  1183  	MOVOU	32(AX), X2
  1184  	MOVOU	48(AX), X3
  1185  	AESENC	X0, X8
  1186  	AESENC	X1, X9
  1187  	AESENC	X2, X10
  1188  	AESENC	X3, X11
  1189  	MOVOU	64(AX), X4
  1190  	MOVOU	80(AX), X5
  1191  	MOVOU	96(AX), X6
  1192  	MOVOU	112(AX), X7
  1193  	AESENC	X4, X12
  1194  	AESENC	X5, X13
  1195  	AESENC	X6, X14
  1196  	AESENC	X7, X15
  1197  
  1198  	ADDQ	$128, AX
  1199  	DECQ	CX
  1200  	JNE	aesloop
  1201  
  1202  	// 3 more scrambles to finish
  1203  	AESENC	X8, X8
  1204  	AESENC	X9, X9
  1205  	AESENC	X10, X10
  1206  	AESENC	X11, X11
  1207  	AESENC	X12, X12
  1208  	AESENC	X13, X13
  1209  	AESENC	X14, X14
  1210  	AESENC	X15, X15
  1211  	AESENC	X8, X8
  1212  	AESENC	X9, X9
  1213  	AESENC	X10, X10
  1214  	AESENC	X11, X11
  1215  	AESENC	X12, X12
  1216  	AESENC	X13, X13
  1217  	AESENC	X14, X14
  1218  	AESENC	X15, X15
  1219  	AESENC	X8, X8
  1220  	AESENC	X9, X9
  1221  	AESENC	X10, X10
  1222  	AESENC	X11, X11
  1223  	AESENC	X12, X12
  1224  	AESENC	X13, X13
  1225  	AESENC	X14, X14
  1226  	AESENC	X15, X15
  1227  
  1228  	PXOR	X12, X8
  1229  	PXOR	X13, X9
  1230  	PXOR	X14, X10
  1231  	PXOR	X15, X11
  1232  	PXOR	X10, X8
  1233  	PXOR	X11, X9
  1234  	PXOR	X9, X8
  1235  	MOVQ	X8, (DX)
  1236  	RET
  1237  
  1238  // func memhash32(p unsafe.Pointer, h uintptr) uintptr
  1239  TEXT runtime·memhash32(SB),NOSPLIT,$0-24
  1240  	CMPB	runtime·useAeshash(SB), $0
  1241  	JEQ	noaes
  1242  	MOVQ	p+0(FP), AX	// ptr to data
  1243  	MOVQ	h+8(FP), X0	// seed
  1244  	PINSRD	$2, (AX), X0	// data
  1245  	AESENC	runtime·aeskeysched+0(SB), X0
  1246  	AESENC	runtime·aeskeysched+16(SB), X0
  1247  	AESENC	runtime·aeskeysched+32(SB), X0
  1248  	MOVQ	X0, ret+16(FP)
  1249  	RET
  1250  noaes:
  1251  	JMP	runtime·memhash32Fallback(SB)
  1252  
  1253  // func memhash64(p unsafe.Pointer, h uintptr) uintptr
  1254  TEXT runtime·memhash64(SB),NOSPLIT,$0-24
  1255  	CMPB	runtime·useAeshash(SB), $0
  1256  	JEQ	noaes
  1257  	MOVQ	p+0(FP), AX	// ptr to data
  1258  	MOVQ	h+8(FP), X0	// seed
  1259  	PINSRQ	$1, (AX), X0	// data
  1260  	AESENC	runtime·aeskeysched+0(SB), X0
  1261  	AESENC	runtime·aeskeysched+16(SB), X0
  1262  	AESENC	runtime·aeskeysched+32(SB), X0
  1263  	MOVQ	X0, ret+16(FP)
  1264  	RET
  1265  noaes:
  1266  	JMP	runtime·memhash64Fallback(SB)
  1267  
  1268  // simple mask to get rid of data in the high part of the register.
  1269  DATA masks<>+0x00(SB)/8, $0x0000000000000000
  1270  DATA masks<>+0x08(SB)/8, $0x0000000000000000
  1271  DATA masks<>+0x10(SB)/8, $0x00000000000000ff
  1272  DATA masks<>+0x18(SB)/8, $0x0000000000000000
  1273  DATA masks<>+0x20(SB)/8, $0x000000000000ffff
  1274  DATA masks<>+0x28(SB)/8, $0x0000000000000000
  1275  DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
  1276  DATA masks<>+0x38(SB)/8, $0x0000000000000000
  1277  DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
  1278  DATA masks<>+0x48(SB)/8, $0x0000000000000000
  1279  DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
  1280  DATA masks<>+0x58(SB)/8, $0x0000000000000000
  1281  DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
  1282  DATA masks<>+0x68(SB)/8, $0x0000000000000000
  1283  DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
  1284  DATA masks<>+0x78(SB)/8, $0x0000000000000000
  1285  DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
  1286  DATA masks<>+0x88(SB)/8, $0x0000000000000000
  1287  DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
  1288  DATA masks<>+0x98(SB)/8, $0x00000000000000ff
  1289  DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
  1290  DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
  1291  DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
  1292  DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
  1293  DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
  1294  DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
  1295  DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
  1296  DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
  1297  DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
  1298  DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
  1299  DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
  1300  DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
  1301  GLOBL masks<>(SB),RODATA,$256
  1302  
  1303  // func checkASM() bool
  1304  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1305  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1306  	MOVQ	$masks<>(SB), AX
  1307  	MOVQ	$shifts<>(SB), BX
  1308  	ORQ	BX, AX
  1309  	TESTQ	$15, AX
  1310  	SETEQ	ret+0(FP)
  1311  	RET
  1312  
  1313  // these are arguments to pshufb. They move data down from
  1314  // the high bytes of the register to the low bytes of the register.
  1315  // index is how many bytes to move.
  1316  DATA shifts<>+0x00(SB)/8, $0x0000000000000000
  1317  DATA shifts<>+0x08(SB)/8, $0x0000000000000000
  1318  DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
  1319  DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
  1320  DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
  1321  DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
  1322  DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
  1323  DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
  1324  DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
  1325  DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
  1326  DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
  1327  DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
  1328  DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
  1329  DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
  1330  DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
  1331  DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
  1332  DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
  1333  DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
  1334  DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
  1335  DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
  1336  DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
  1337  DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
  1338  DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
  1339  DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
  1340  DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
  1341  DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
  1342  DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
  1343  DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
  1344  DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
  1345  DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
  1346  DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
  1347  DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
  1348  GLOBL shifts<>(SB),RODATA,$256
  1349  
  1350  TEXT runtime·return0(SB), NOSPLIT, $0
  1351  	MOVL	$0, AX
  1352  	RET
  1353  
  1354  
  1355  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1356  // Must obey the gcc calling convention.
  1357  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1358  	get_tls(CX)
  1359  	MOVQ	g(CX), AX
  1360  	MOVQ	g_m(AX), AX
  1361  	MOVQ	m_curg(AX), AX
  1362  	MOVQ	(g_stack+stack_hi)(AX), AX
  1363  	RET
  1364  
  1365  // The top-most function running on a goroutine
  1366  // returns to goexit+PCQuantum. Defined as ABIInternal
  1367  // so as to make it identifiable to traceback (this
  1368  // function it used as a sentinel; traceback wants to
  1369  // see the func PC, not a wrapper PC).
  1370  TEXT runtime·goexit<ABIInternal>(SB),NOSPLIT,$0-0
  1371  	BYTE	$0x90	// NOP
  1372  	CALL	runtime·goexit1(SB)	// does not return
  1373  	// traceback from goexit1 must hit code range of goexit
  1374  	BYTE	$0x90	// NOP
  1375  
  1376  // This is called from .init_array and follows the platform, not Go, ABI.
  1377  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1378  	PUSHQ	R15 // The access to global variables below implicitly uses R15, which is callee-save
  1379  	MOVQ	runtime·lastmoduledatap(SB), AX
  1380  	MOVQ	DI, moduledata_next(AX)
  1381  	MOVQ	DI, runtime·lastmoduledatap(SB)
  1382  	POPQ	R15
  1383  	RET
  1384  
  1385  // gcWriteBarrier performs a heap pointer write and informs the GC.
  1386  //
  1387  // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
  1388  // - DI is the destination of the write
  1389  // - AX is the value being written at DI
  1390  // It clobbers FLAGS. It does not clobber any general-purpose registers,
  1391  // but may clobber others (e.g., SSE registers).
  1392  // Defined as ABIInternal since it does not use the stack-based Go ABI.
  1393  TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$120
  1394  	// Save the registers clobbered by the fast path. This is slightly
  1395  	// faster than having the caller spill these.
  1396  	MOVQ	R14, 104(SP)
  1397  	MOVQ	R13, 112(SP)
  1398  	// TODO: Consider passing g.m.p in as an argument so they can be shared
  1399  	// across a sequence of write barriers.
  1400  	get_tls(R13)
  1401  	MOVQ	g(R13), R13
  1402  	MOVQ	g_m(R13), R13
  1403  	MOVQ	m_p(R13), R13
  1404  	MOVQ	(p_wbBuf+wbBuf_next)(R13), R14
  1405  	// Increment wbBuf.next position.
  1406  	LEAQ	16(R14), R14
  1407  	MOVQ	R14, (p_wbBuf+wbBuf_next)(R13)
  1408  	CMPQ	R14, (p_wbBuf+wbBuf_end)(R13)
  1409  	// Record the write.
  1410  	MOVQ	AX, -16(R14)	// Record value
  1411  	// Note: This turns bad pointer writes into bad
  1412  	// pointer reads, which could be confusing. We could avoid
  1413  	// reading from obviously bad pointers, which would
  1414  	// take care of the vast majority of these. We could
  1415  	// patch this up in the signal handler, or use XCHG to
  1416  	// combine the read and the write.
  1417  	MOVQ	(DI), R13
  1418  	MOVQ	R13, -8(R14)	// Record *slot
  1419  	// Is the buffer full? (flags set in CMPQ above)
  1420  	JEQ	flush
  1421  ret:
  1422  	MOVQ	104(SP), R14
  1423  	MOVQ	112(SP), R13
  1424  	// Do the write.
  1425  	MOVQ	AX, (DI)
  1426  	RET
  1427  
  1428  flush:
  1429  	// Save all general purpose registers since these could be
  1430  	// clobbered by wbBufFlush and were not saved by the caller.
  1431  	// It is possible for wbBufFlush to clobber other registers
  1432  	// (e.g., SSE registers), but the compiler takes care of saving
  1433  	// those in the caller if necessary. This strikes a balance
  1434  	// with registers that are likely to be used.
  1435  	//
  1436  	// We don't have type information for these, but all code under
  1437  	// here is NOSPLIT, so nothing will observe these.
  1438  	//
  1439  	// TODO: We could strike a different balance; e.g., saving X0
  1440  	// and not saving GP registers that are less likely to be used.
  1441  	MOVQ	DI, 0(SP)	// Also first argument to wbBufFlush
  1442  	MOVQ	AX, 8(SP)	// Also second argument to wbBufFlush
  1443  	MOVQ	BX, 16(SP)
  1444  	MOVQ	CX, 24(SP)
  1445  	MOVQ	DX, 32(SP)
  1446  	// DI already saved
  1447  	MOVQ	SI, 40(SP)
  1448  	MOVQ	BP, 48(SP)
  1449  	MOVQ	R8, 56(SP)
  1450  	MOVQ	R9, 64(SP)
  1451  	MOVQ	R10, 72(SP)
  1452  	MOVQ	R11, 80(SP)
  1453  	MOVQ	R12, 88(SP)
  1454  	// R13 already saved
  1455  	// R14 already saved
  1456  	MOVQ	R15, 96(SP)
  1457  
  1458  	// This takes arguments DI and AX
  1459  	CALL	runtime·wbBufFlush(SB)
  1460  
  1461  	MOVQ	0(SP), DI
  1462  	MOVQ	8(SP), AX
  1463  	MOVQ	16(SP), BX
  1464  	MOVQ	24(SP), CX
  1465  	MOVQ	32(SP), DX
  1466  	MOVQ	40(SP), SI
  1467  	MOVQ	48(SP), BP
  1468  	MOVQ	56(SP), R8
  1469  	MOVQ	64(SP), R9
  1470  	MOVQ	72(SP), R10
  1471  	MOVQ	80(SP), R11
  1472  	MOVQ	88(SP), R12
  1473  	MOVQ	96(SP), R15
  1474  	JMP	ret
  1475  
  1476  // gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX.
  1477  // Defined as ABIInternal since it does not use the stable Go ABI.
  1478  TEXT runtime·gcWriteBarrierCX<ABIInternal>(SB),NOSPLIT,$0
  1479  	XCHGQ CX, AX
  1480  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1481  	XCHGQ CX, AX
  1482  	RET
  1483  
  1484  // gcWriteBarrierDX is gcWriteBarrier, but with args in DI and DX.
  1485  // Defined as ABIInternal since it does not use the stable Go ABI.
  1486  TEXT runtime·gcWriteBarrierDX<ABIInternal>(SB),NOSPLIT,$0
  1487  	XCHGQ DX, AX
  1488  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1489  	XCHGQ DX, AX
  1490  	RET
  1491  
  1492  // gcWriteBarrierBX is gcWriteBarrier, but with args in DI and BX.
  1493  // Defined as ABIInternal since it does not use the stable Go ABI.
  1494  TEXT runtime·gcWriteBarrierBX<ABIInternal>(SB),NOSPLIT,$0
  1495  	XCHGQ BX, AX
  1496  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1497  	XCHGQ BX, AX
  1498  	RET
  1499  
  1500  // gcWriteBarrierBP is gcWriteBarrier, but with args in DI and BP.
  1501  // Defined as ABIInternal since it does not use the stable Go ABI.
  1502  TEXT runtime·gcWriteBarrierBP<ABIInternal>(SB),NOSPLIT,$0
  1503  	XCHGQ BP, AX
  1504  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1505  	XCHGQ BP, AX
  1506  	RET
  1507  
  1508  // gcWriteBarrierSI is gcWriteBarrier, but with args in DI and SI.
  1509  // Defined as ABIInternal since it does not use the stable Go ABI.
  1510  TEXT runtime·gcWriteBarrierSI<ABIInternal>(SB),NOSPLIT,$0
  1511  	XCHGQ SI, AX
  1512  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1513  	XCHGQ SI, AX
  1514  	RET
  1515  
  1516  // gcWriteBarrierR8 is gcWriteBarrier, but with args in DI and R8.
  1517  // Defined as ABIInternal since it does not use the stable Go ABI.
  1518  TEXT runtime·gcWriteBarrierR8<ABIInternal>(SB),NOSPLIT,$0
  1519  	XCHGQ R8, AX
  1520  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1521  	XCHGQ R8, AX
  1522  	RET
  1523  
  1524  // gcWriteBarrierR9 is gcWriteBarrier, but with args in DI and R9.
  1525  // Defined as ABIInternal since it does not use the stable Go ABI.
  1526  TEXT runtime·gcWriteBarrierR9<ABIInternal>(SB),NOSPLIT,$0
  1527  	XCHGQ R9, AX
  1528  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1529  	XCHGQ R9, AX
  1530  	RET
  1531  
  1532  DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
  1533  GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
  1534  
  1535  // debugCallV1 is the entry point for debugger-injected function
  1536  // calls on running goroutines. It informs the runtime that a
  1537  // debug call has been injected and creates a call frame for the
  1538  // debugger to fill in.
  1539  //
  1540  // To inject a function call, a debugger should:
  1541  // 1. Check that the goroutine is in state _Grunning and that
  1542  //    there are at least 256 bytes free on the stack.
  1543  // 2. Push the current PC on the stack (updating SP).
  1544  // 3. Write the desired argument frame size at SP-16 (using the SP
  1545  //    after step 2).
  1546  // 4. Save all machine registers (including flags and XMM reigsters)
  1547  //    so they can be restored later by the debugger.
  1548  // 5. Set the PC to debugCallV1 and resume execution.
  1549  //
  1550  // If the goroutine is in state _Grunnable, then it's not generally
  1551  // safe to inject a call because it may return out via other runtime
  1552  // operations. Instead, the debugger should unwind the stack to find
  1553  // the return to non-runtime code, add a temporary breakpoint there,
  1554  // and inject the call once that breakpoint is hit.
  1555  //
  1556  // If the goroutine is in any other state, it's not safe to inject a call.
  1557  //
  1558  // This function communicates back to the debugger by setting RAX and
  1559  // invoking INT3 to raise a breakpoint signal. See the comments in the
  1560  // implementation for the protocol the debugger is expected to
  1561  // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
  1562  //
  1563  // The debugger must ensure that any pointers passed to the function
  1564  // obey escape analysis requirements. Specifically, it must not pass
  1565  // a stack pointer to an escaping argument. debugCallV1 cannot check
  1566  // this invariant.
  1567  //
  1568  // This is ABIInternal because Go code injects its PC directly into new
  1569  // goroutine stacks.
  1570  TEXT runtime·debugCallV1<ABIInternal>(SB),NOSPLIT,$152-0
  1571  	// Save all registers that may contain pointers so they can be
  1572  	// conservatively scanned.
  1573  	//
  1574  	// We can't do anything that might clobber any of these
  1575  	// registers before this.
  1576  	MOVQ	R15, r15-(14*8+8)(SP)
  1577  	MOVQ	R14, r14-(13*8+8)(SP)
  1578  	MOVQ	R13, r13-(12*8+8)(SP)
  1579  	MOVQ	R12, r12-(11*8+8)(SP)
  1580  	MOVQ	R11, r11-(10*8+8)(SP)
  1581  	MOVQ	R10, r10-(9*8+8)(SP)
  1582  	MOVQ	R9, r9-(8*8+8)(SP)
  1583  	MOVQ	R8, r8-(7*8+8)(SP)
  1584  	MOVQ	DI, di-(6*8+8)(SP)
  1585  	MOVQ	SI, si-(5*8+8)(SP)
  1586  	MOVQ	BP, bp-(4*8+8)(SP)
  1587  	MOVQ	BX, bx-(3*8+8)(SP)
  1588  	MOVQ	DX, dx-(2*8+8)(SP)
  1589  	// Save the frame size before we clobber it. Either of the last
  1590  	// saves could clobber this depending on whether there's a saved BP.
  1591  	MOVQ	frameSize-24(FP), DX	// aka -16(RSP) before prologue
  1592  	MOVQ	CX, cx-(1*8+8)(SP)
  1593  	MOVQ	AX, ax-(0*8+8)(SP)
  1594  
  1595  	// Save the argument frame size.
  1596  	MOVQ	DX, frameSize-128(SP)
  1597  
  1598  	// Perform a safe-point check.
  1599  	MOVQ	retpc-8(FP), AX	// Caller's PC
  1600  	MOVQ	AX, 0(SP)
  1601  	CALL	runtime·debugCallCheck(SB)
  1602  	MOVQ	8(SP), AX
  1603  	TESTQ	AX, AX
  1604  	JZ	good
  1605  	// The safety check failed. Put the reason string at the top
  1606  	// of the stack.
  1607  	MOVQ	AX, 0(SP)
  1608  	MOVQ	16(SP), AX
  1609  	MOVQ	AX, 8(SP)
  1610  	// Set AX to 8 and invoke INT3. The debugger should get the
  1611  	// reason a call can't be injected from the top of the stack
  1612  	// and resume execution.
  1613  	MOVQ	$8, AX
  1614  	BYTE	$0xcc
  1615  	JMP	restore
  1616  
  1617  good:
  1618  	// Registers are saved and it's safe to make a call.
  1619  	// Open up a call frame, moving the stack if necessary.
  1620  	//
  1621  	// Once the frame is allocated, this will set AX to 0 and
  1622  	// invoke INT3. The debugger should write the argument
  1623  	// frame for the call at SP, push the trapping PC on the
  1624  	// stack, set the PC to the function to call, set RCX to point
  1625  	// to the closure (if a closure call), and resume execution.
  1626  	//
  1627  	// If the function returns, this will set AX to 1 and invoke
  1628  	// INT3. The debugger can then inspect any return value saved
  1629  	// on the stack at SP and resume execution again.
  1630  	//
  1631  	// If the function panics, this will set AX to 2 and invoke INT3.
  1632  	// The interface{} value of the panic will be at SP. The debugger
  1633  	// can inspect the panic value and resume execution again.
  1634  #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1635  	CMPQ	AX, $MAXSIZE;			\
  1636  	JA	5(PC);				\
  1637  	MOVQ	$NAME(SB), AX;			\
  1638  	MOVQ	AX, 0(SP);			\
  1639  	CALL	runtime·debugCallWrap(SB);	\
  1640  	JMP	restore
  1641  
  1642  	MOVQ	frameSize-128(SP), AX
  1643  	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1644  	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1645  	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1646  	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1647  	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1648  	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1649  	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1650  	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1651  	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1652  	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1653  	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1654  	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1655  	// The frame size is too large. Report the error.
  1656  	MOVQ	$debugCallFrameTooLarge<>(SB), AX
  1657  	MOVQ	AX, 0(SP)
  1658  	MOVQ	$20, 8(SP) // length of debugCallFrameTooLarge string
  1659  	MOVQ	$8, AX
  1660  	BYTE	$0xcc
  1661  	JMP	restore
  1662  
  1663  restore:
  1664  	// Calls and failures resume here.
  1665  	//
  1666  	// Set AX to 16 and invoke INT3. The debugger should restore
  1667  	// all registers except RIP and RSP and resume execution.
  1668  	MOVQ	$16, AX
  1669  	BYTE	$0xcc
  1670  	// We must not modify flags after this point.
  1671  
  1672  	// Restore pointer-containing registers, which may have been
  1673  	// modified from the debugger's copy by stack copying.
  1674  	MOVQ	ax-(0*8+8)(SP), AX
  1675  	MOVQ	cx-(1*8+8)(SP), CX
  1676  	MOVQ	dx-(2*8+8)(SP), DX
  1677  	MOVQ	bx-(3*8+8)(SP), BX
  1678  	MOVQ	bp-(4*8+8)(SP), BP
  1679  	MOVQ	si-(5*8+8)(SP), SI
  1680  	MOVQ	di-(6*8+8)(SP), DI
  1681  	MOVQ	r8-(7*8+8)(SP), R8
  1682  	MOVQ	r9-(8*8+8)(SP), R9
  1683  	MOVQ	r10-(9*8+8)(SP), R10
  1684  	MOVQ	r11-(10*8+8)(SP), R11
  1685  	MOVQ	r12-(11*8+8)(SP), R12
  1686  	MOVQ	r13-(12*8+8)(SP), R13
  1687  	MOVQ	r14-(13*8+8)(SP), R14
  1688  	MOVQ	r15-(14*8+8)(SP), R15
  1689  
  1690  	RET
  1691  
  1692  // runtime.debugCallCheck assumes that functions defined with the
  1693  // DEBUG_CALL_FN macro are safe points to inject calls.
  1694  #define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  1695  TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  1696  	NO_LOCAL_POINTERS;			\
  1697  	MOVQ	$0, AX;				\
  1698  	BYTE	$0xcc;				\
  1699  	MOVQ	$1, AX;				\
  1700  	BYTE	$0xcc;				\
  1701  	RET
  1702  DEBUG_CALL_FN(debugCall32<>, 32)
  1703  DEBUG_CALL_FN(debugCall64<>, 64)
  1704  DEBUG_CALL_FN(debugCall128<>, 128)
  1705  DEBUG_CALL_FN(debugCall256<>, 256)
  1706  DEBUG_CALL_FN(debugCall512<>, 512)
  1707  DEBUG_CALL_FN(debugCall1024<>, 1024)
  1708  DEBUG_CALL_FN(debugCall2048<>, 2048)
  1709  DEBUG_CALL_FN(debugCall4096<>, 4096)
  1710  DEBUG_CALL_FN(debugCall8192<>, 8192)
  1711  DEBUG_CALL_FN(debugCall16384<>, 16384)
  1712  DEBUG_CALL_FN(debugCall32768<>, 32768)
  1713  DEBUG_CALL_FN(debugCall65536<>, 65536)
  1714  
  1715  // func debugCallPanicked(val interface{})
  1716  TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  1717  	// Copy the panic value to the top of stack.
  1718  	MOVQ	val_type+0(FP), AX
  1719  	MOVQ	AX, 0(SP)
  1720  	MOVQ	val_data+8(FP), AX
  1721  	MOVQ	AX, 8(SP)
  1722  	MOVQ	$2, AX
  1723  	BYTE	$0xcc
  1724  	RET
  1725  
  1726  // Note: these functions use a special calling convention to save generated code space.
  1727  // Arguments are passed in registers, but the space for those arguments are allocated
  1728  // in the caller's stack frame. These stubs write the args into that stack space and
  1729  // then tail call to the corresponding runtime handler.
  1730  // The tail call makes these stubs disappear in backtraces.
  1731  // Defined as ABIInternal since they do not use the stack-based Go ABI.
  1732  TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
  1733  	MOVQ	AX, x+0(FP)
  1734  	MOVQ	CX, y+8(FP)
  1735  	JMP	runtime·goPanicIndex(SB)
  1736  TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
  1737  	MOVQ	AX, x+0(FP)
  1738  	MOVQ	CX, y+8(FP)
  1739  	JMP	runtime·goPanicIndexU(SB)
  1740  TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
  1741  	MOVQ	CX, x+0(FP)
  1742  	MOVQ	DX, y+8(FP)
  1743  	JMP	runtime·goPanicSliceAlen(SB)
  1744  TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
  1745  	MOVQ	CX, x+0(FP)
  1746  	MOVQ	DX, y+8(FP)
  1747  	JMP	runtime·goPanicSliceAlenU(SB)
  1748  TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
  1749  	MOVQ	CX, x+0(FP)
  1750  	MOVQ	DX, y+8(FP)
  1751  	JMP	runtime·goPanicSliceAcap(SB)
  1752  TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
  1753  	MOVQ	CX, x+0(FP)
  1754  	MOVQ	DX, y+8(FP)
  1755  	JMP	runtime·goPanicSliceAcapU(SB)
  1756  TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
  1757  	MOVQ	AX, x+0(FP)
  1758  	MOVQ	CX, y+8(FP)
  1759  	JMP	runtime·goPanicSliceB(SB)
  1760  TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
  1761  	MOVQ	AX, x+0(FP)
  1762  	MOVQ	CX, y+8(FP)
  1763  	JMP	runtime·goPanicSliceBU(SB)
  1764  TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
  1765  	MOVQ	DX, x+0(FP)
  1766  	MOVQ	BX, y+8(FP)
  1767  	JMP	runtime·goPanicSlice3Alen(SB)
  1768  TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
  1769  	MOVQ	DX, x+0(FP)
  1770  	MOVQ	BX, y+8(FP)
  1771  	JMP	runtime·goPanicSlice3AlenU(SB)
  1772  TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
  1773  	MOVQ	DX, x+0(FP)
  1774  	MOVQ	BX, y+8(FP)
  1775  	JMP	runtime·goPanicSlice3Acap(SB)
  1776  TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
  1777  	MOVQ	DX, x+0(FP)
  1778  	MOVQ	BX, y+8(FP)
  1779  	JMP	runtime·goPanicSlice3AcapU(SB)
  1780  TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
  1781  	MOVQ	CX, x+0(FP)
  1782  	MOVQ	DX, y+8(FP)
  1783  	JMP	runtime·goPanicSlice3B(SB)
  1784  TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
  1785  	MOVQ	CX, x+0(FP)
  1786  	MOVQ	DX, y+8(FP)
  1787  	JMP	runtime·goPanicSlice3BU(SB)
  1788  TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
  1789  	MOVQ	AX, x+0(FP)
  1790  	MOVQ	CX, y+8(FP)
  1791  	JMP	runtime·goPanicSlice3C(SB)
  1792  TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
  1793  	MOVQ	AX, x+0(FP)
  1794  	MOVQ	CX, y+8(FP)
  1795  	JMP	runtime·goPanicSlice3CU(SB)
  1796  
  1797  #ifdef GOOS_android
  1798  // Use the free TLS_SLOT_APP slot #2 on Android Q.
  1799  // Earlier androids are set up in gcc_android.c.
  1800  DATA runtime·tls_g+0(SB)/8, $16
  1801  GLOBL runtime·tls_g+0(SB), NOPTR, $8
  1802  #endif
  1803  
  1804  // The compiler and assembler's -spectre=ret mode rewrites
  1805  // all indirect CALL AX / JMP AX instructions to be
  1806  // CALL retpolineAX / JMP retpolineAX.
  1807  // See https://support.google.com/faqs/answer/7625886.
  1808  #define RETPOLINE(reg) \
  1809  	/*   CALL setup */     BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0;	\
  1810  	/* nospec: */									\
  1811  	/*   PAUSE */           BYTE $0xF3; BYTE $0x90;					\
  1812  	/*   JMP nospec */      BYTE $0xEB; BYTE $-(2+2);				\
  1813  	/* setup: */									\
  1814  	/*   MOVQ AX, 0(SP) */  BYTE $0x48|((reg&8)>>1); BYTE $0x89;			\
  1815  	                        BYTE $0x04|((reg&7)<<3); BYTE $0x24;			\
  1816  	/*   RET */             BYTE $0xC3
  1817  
  1818  TEXT runtime·retpolineAX(SB),NOSPLIT,$0; RETPOLINE(0)
  1819  TEXT runtime·retpolineCX(SB),NOSPLIT,$0; RETPOLINE(1)
  1820  TEXT runtime·retpolineDX(SB),NOSPLIT,$0; RETPOLINE(2)
  1821  TEXT runtime·retpolineBX(SB),NOSPLIT,$0; RETPOLINE(3)
  1822  /* SP is 4, can't happen / magic encodings */
  1823  TEXT runtime·retpolineBP(SB),NOSPLIT,$0; RETPOLINE(5)
  1824  TEXT runtime·retpolineSI(SB),NOSPLIT,$0; RETPOLINE(6)
  1825  TEXT runtime·retpolineDI(SB),NOSPLIT,$0; RETPOLINE(7)
  1826  TEXT runtime·retpolineR8(SB),NOSPLIT,$0; RETPOLINE(8)
  1827  TEXT runtime·retpolineR9(SB),NOSPLIT,$0; RETPOLINE(9)
  1828  TEXT runtime·retpolineR10(SB),NOSPLIT,$0; RETPOLINE(10)
  1829  TEXT runtime·retpolineR11(SB),NOSPLIT,$0; RETPOLINE(11)
  1830  TEXT runtime·retpolineR12(SB),NOSPLIT,$0; RETPOLINE(12)
  1831  TEXT runtime·retpolineR13(SB),NOSPLIT,$0; RETPOLINE(13)
  1832  TEXT runtime·retpolineR14(SB),NOSPLIT,$0; RETPOLINE(14)
  1833  TEXT runtime·retpolineR15(SB),NOSPLIT,$0; RETPOLINE(15)