github.com/gidoBOSSftw5731/go/src@v0.0.0-20210226122457-d24b0edbf019/runtime/asm_amd64.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  // _rt0_amd64 is common startup code for most amd64 systems when using
    11  // internal linking. This is the entry point for the program from the
    12  // kernel for an ordinary -buildmode=exe program. The stack holds the
    13  // number of arguments and the C-style argv.
    14  TEXT _rt0_amd64(SB),NOSPLIT,$-8
    15  	MOVQ	0(SP), DI	// argc
    16  	LEAQ	8(SP), SI	// argv
    17  	JMP	runtime·rt0_go(SB)
    18  
    19  // main is common startup code for most amd64 systems when using
    20  // external linking. The C startup code will call the symbol "main"
    21  // passing argc and argv in the usual C ABI registers DI and SI.
    22  TEXT main(SB),NOSPLIT,$-8
    23  	JMP	runtime·rt0_go(SB)
    24  
    25  // _rt0_amd64_lib is common startup code for most amd64 systems when
    26  // using -buildmode=c-archive or -buildmode=c-shared. The linker will
    27  // arrange to invoke this function as a global constructor (for
    28  // c-archive) or when the shared library is loaded (for c-shared).
    29  // We expect argc and argv to be passed in the usual C ABI registers
    30  // DI and SI.
    31  TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50
    32  	// Align stack per ELF ABI requirements.
    33  	MOVQ	SP, AX
    34  	ANDQ	$~15, SP
    35  	// Save C ABI callee-saved registers, as caller may need them.
    36  	MOVQ	BX, 0x10(SP)
    37  	MOVQ	BP, 0x18(SP)
    38  	MOVQ	R12, 0x20(SP)
    39  	MOVQ	R13, 0x28(SP)
    40  	MOVQ	R14, 0x30(SP)
    41  	MOVQ	R15, 0x38(SP)
    42  	MOVQ	AX, 0x40(SP)
    43  
    44  	MOVQ	DI, _rt0_amd64_lib_argc<>(SB)
    45  	MOVQ	SI, _rt0_amd64_lib_argv<>(SB)
    46  
    47  	// Synchronous initialization.
    48  	CALL	runtime·libpreinit(SB)
    49  
    50  	// Create a new thread to finish Go runtime initialization.
    51  	MOVQ	_cgo_sys_thread_create(SB), AX
    52  	TESTQ	AX, AX
    53  	JZ	nocgo
    54  	MOVQ	$_rt0_amd64_lib_go(SB), DI
    55  	MOVQ	$0, SI
    56  	CALL	AX
    57  	JMP	restore
    58  
    59  nocgo:
    60  	MOVQ	$0x800000, 0(SP)		// stacksize
    61  	MOVQ	$_rt0_amd64_lib_go(SB), AX
    62  	MOVQ	AX, 8(SP)			// fn
    63  	CALL	runtime·newosproc0(SB)
    64  
    65  restore:
    66  	MOVQ	0x10(SP), BX
    67  	MOVQ	0x18(SP), BP
    68  	MOVQ	0x20(SP), R12
    69  	MOVQ	0x28(SP), R13
    70  	MOVQ	0x30(SP), R14
    71  	MOVQ	0x38(SP), R15
    72  	MOVQ	0x40(SP), SP
    73  	RET
    74  
    75  // _rt0_amd64_lib_go initializes the Go runtime.
    76  // This is started in a separate thread by _rt0_amd64_lib.
    77  TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
    78  	MOVQ	_rt0_amd64_lib_argc<>(SB), DI
    79  	MOVQ	_rt0_amd64_lib_argv<>(SB), SI
    80  	JMP	runtime·rt0_go(SB)
    81  
    82  DATA _rt0_amd64_lib_argc<>(SB)/8, $0
    83  GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
    84  DATA _rt0_amd64_lib_argv<>(SB)/8, $0
    85  GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
    86  
    87  TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
    88  	// copy arguments forward on an even stack
    89  	MOVQ	DI, AX		// argc
    90  	MOVQ	SI, BX		// argv
    91  	SUBQ	$(4*8+7), SP		// 2args 2auto
    92  	ANDQ	$~15, SP
    93  	MOVQ	AX, 16(SP)
    94  	MOVQ	BX, 24(SP)
    95  
    96  	// create istack out of the given (operating system) stack.
    97  	// _cgo_init may update stackguard.
    98  	MOVQ	$runtime·g0(SB), DI
    99  	LEAQ	(-64*1024+104)(SP), BX
   100  	MOVQ	BX, g_stackguard0(DI)
   101  	MOVQ	BX, g_stackguard1(DI)
   102  	MOVQ	BX, (g_stack+stack_lo)(DI)
   103  	MOVQ	SP, (g_stack+stack_hi)(DI)
   104  
   105  	// find out information about the processor we're on
   106  	MOVL	$0, AX
   107  	CPUID
   108  	MOVL	AX, SI
   109  	CMPL	AX, $0
   110  	JE	nocpuinfo
   111  
   112  	// Figure out how to serialize RDTSC.
   113  	// On Intel processors LFENCE is enough. AMD requires MFENCE.
   114  	// Don't know about the rest, so let's do MFENCE.
   115  	CMPL	BX, $0x756E6547  // "Genu"
   116  	JNE	notintel
   117  	CMPL	DX, $0x49656E69  // "ineI"
   118  	JNE	notintel
   119  	CMPL	CX, $0x6C65746E  // "ntel"
   120  	JNE	notintel
   121  	MOVB	$1, runtime·isIntel(SB)
   122  	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
   123  notintel:
   124  
   125  	// Load EAX=1 cpuid flags
   126  	MOVL	$1, AX
   127  	CPUID
   128  	MOVL	AX, runtime·processorVersionInfo(SB)
   129  
   130  nocpuinfo:
   131  	// if there is an _cgo_init, call it.
   132  	MOVQ	_cgo_init(SB), AX
   133  	TESTQ	AX, AX
   134  	JZ	needtls
   135  	// arg 1: g0, already in DI
   136  	MOVQ	$setg_gcc<>(SB), SI // arg 2: setg_gcc
   137  #ifdef GOOS_android
   138  	MOVQ	$runtime·tls_g(SB), DX 	// arg 3: &tls_g
   139  	// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
   140  	// Compensate for tls_g (+16).
   141  	MOVQ	-16(TLS), CX
   142  #else
   143  	MOVQ	$0, DX	// arg 3, 4: not used when using platform's TLS
   144  	MOVQ	$0, CX
   145  #endif
   146  #ifdef GOOS_windows
   147  	// Adjust for the Win64 calling convention.
   148  	MOVQ	CX, R9 // arg 4
   149  	MOVQ	DX, R8 // arg 3
   150  	MOVQ	SI, DX // arg 2
   151  	MOVQ	DI, CX // arg 1
   152  #endif
   153  	CALL	AX
   154  
   155  	// update stackguard after _cgo_init
   156  	MOVQ	$runtime·g0(SB), CX
   157  	MOVQ	(g_stack+stack_lo)(CX), AX
   158  	ADDQ	$const__StackGuard, AX
   159  	MOVQ	AX, g_stackguard0(CX)
   160  	MOVQ	AX, g_stackguard1(CX)
   161  
   162  #ifndef GOOS_windows
   163  	JMP ok
   164  #endif
   165  needtls:
   166  #ifdef GOOS_plan9
   167  	// skip TLS setup on Plan 9
   168  	JMP ok
   169  #endif
   170  #ifdef GOOS_solaris
   171  	// skip TLS setup on Solaris
   172  	JMP ok
   173  #endif
   174  #ifdef GOOS_illumos
   175  	// skip TLS setup on illumos
   176  	JMP ok
   177  #endif
   178  #ifdef GOOS_darwin
   179  	// skip TLS setup on Darwin
   180  	JMP ok
   181  #endif
   182  #ifdef GOOS_openbsd
   183  	// skip TLS setup on OpenBSD
   184  	JMP ok
   185  #endif
   186  
   187  	LEAQ	runtime·m0+m_tls(SB), DI
   188  	CALL	runtime·settls(SB)
   189  
   190  	// store through it, to make sure it works
   191  	get_tls(BX)
   192  	MOVQ	$0x123, g(BX)
   193  	MOVQ	runtime·m0+m_tls(SB), AX
   194  	CMPQ	AX, $0x123
   195  	JEQ 2(PC)
   196  	CALL	runtime·abort(SB)
   197  ok:
   198  	// set the per-goroutine and per-mach "registers"
   199  	get_tls(BX)
   200  	LEAQ	runtime·g0(SB), CX
   201  	MOVQ	CX, g(BX)
   202  	LEAQ	runtime·m0(SB), AX
   203  
   204  	// save m->g0 = g0
   205  	MOVQ	CX, m_g0(AX)
   206  	// save m0 to g0->m
   207  	MOVQ	AX, g_m(CX)
   208  
   209  	CLD				// convention is D is always left cleared
   210  	CALL	runtime·check(SB)
   211  
   212  	MOVL	16(SP), AX		// copy argc
   213  	MOVL	AX, 0(SP)
   214  	MOVQ	24(SP), AX		// copy argv
   215  	MOVQ	AX, 8(SP)
   216  	CALL	runtime·args(SB)
   217  	CALL	runtime·osinit(SB)
   218  	CALL	runtime·schedinit(SB)
   219  
   220  	// create a new goroutine to start program
   221  	MOVQ	$runtime·mainPC(SB), AX		// entry
   222  	PUSHQ	AX
   223  	PUSHQ	$0			// arg size
   224  	CALL	runtime·newproc(SB)
   225  	POPQ	AX
   226  	POPQ	AX
   227  
   228  	// start this M
   229  	CALL	runtime·mstart(SB)
   230  
   231  	CALL	runtime·abort(SB)	// mstart should never return
   232  	RET
   233  
   234  	// Prevent dead-code elimination of debugCallV1, which is
   235  	// intended to be called by debuggers.
   236  	MOVQ	$runtime·debugCallV1<ABIInternal>(SB), AX
   237  	RET
   238  
   239  // mainPC is a function value for runtime.main, to be passed to newproc.
   240  // The reference to runtime.main is made via ABIInternal, since the
   241  // actual function (not the ABI0 wrapper) is needed by newproc.
   242  DATA	runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
   243  GLOBL	runtime·mainPC(SB),RODATA,$8
   244  
   245  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   246  	BYTE	$0xcc
   247  	RET
   248  
   249  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   250  	// No per-thread init.
   251  	RET
   252  
   253  TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
   254  	CALL	runtime·mstart0(SB)
   255  	RET // not reached
   256  
   257  /*
   258   *  go-routine
   259   */
   260  
   261  // func gogo(buf *gobuf)
   262  // restore state from Gobuf; longjmp
   263  TEXT runtime·gogo(SB), NOSPLIT, $0-8
   264  	MOVQ	buf+0(FP), BX		// gobuf
   265  	MOVQ	gobuf_g(BX), DX
   266  	MOVQ	0(DX), CX		// make sure g != nil
   267  	JMP	gogo<>(SB)
   268  
   269  TEXT gogo<>(SB), NOSPLIT, $0
   270  	get_tls(CX)
   271  	MOVQ	DX, g(CX)
   272  	MOVQ	DX, R14		// set the g register
   273  	MOVQ	gobuf_sp(BX), SP	// restore SP
   274  	MOVQ	gobuf_ret(BX), AX
   275  	MOVQ	gobuf_ctxt(BX), DX
   276  	MOVQ	gobuf_bp(BX), BP
   277  	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
   278  	MOVQ	$0, gobuf_ret(BX)
   279  	MOVQ	$0, gobuf_ctxt(BX)
   280  	MOVQ	$0, gobuf_bp(BX)
   281  	MOVQ	gobuf_pc(BX), BX
   282  	JMP	BX
   283  
   284  // func mcall(fn func(*g))
   285  // Switch to m->g0's stack, call fn(g).
   286  // Fn must never return. It should gogo(&g->sched)
   287  // to keep running g.
   288  TEXT runtime·mcall(SB), NOSPLIT, $0-8
   289  	MOVQ	fn+0(FP), DI
   290  
   291  	get_tls(CX)
   292  	MOVQ	g(CX), AX	// save state in g->sched
   293  	MOVQ	0(SP), BX	// caller's PC
   294  	MOVQ	BX, (g_sched+gobuf_pc)(AX)
   295  	LEAQ	fn+0(FP), BX	// caller's SP
   296  	MOVQ	BX, (g_sched+gobuf_sp)(AX)
   297  	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   298  
   299  	// switch to m->g0 & its stack, call fn
   300  	MOVQ	g(CX), BX
   301  	MOVQ	g_m(BX), BX
   302  	MOVQ	m_g0(BX), SI
   303  	CMPQ	SI, AX	// if g == m->g0 call badmcall
   304  	JNE	3(PC)
   305  	MOVQ	$runtime·badmcall(SB), AX
   306  	JMP	AX
   307  	MOVQ	SI, g(CX)	// g = m->g0
   308  	MOVQ	SI, R14	// set the g register
   309  	MOVQ	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   310  	PUSHQ	AX
   311  	MOVQ	DI, DX
   312  	MOVQ	0(DI), DI
   313  	CALL	DI
   314  	POPQ	AX
   315  	MOVQ	$runtime·badmcall2(SB), AX
   316  	JMP	AX
   317  	RET
   318  
   319  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   320  // of the G stack. We need to distinguish the routine that
   321  // lives at the bottom of the G stack from the one that lives
   322  // at the top of the system stack because the one at the top of
   323  // the system stack terminates the stack walk (see topofstack()).
   324  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   325  	RET
   326  
   327  // func systemstack(fn func())
   328  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   329  	MOVQ	fn+0(FP), DI	// DI = fn
   330  	get_tls(CX)
   331  	MOVQ	g(CX), AX	// AX = g
   332  	MOVQ	g_m(AX), BX	// BX = m
   333  
   334  	CMPQ	AX, m_gsignal(BX)
   335  	JEQ	noswitch
   336  
   337  	MOVQ	m_g0(BX), DX	// DX = g0
   338  	CMPQ	AX, DX
   339  	JEQ	noswitch
   340  
   341  	CMPQ	AX, m_curg(BX)
   342  	JNE	bad
   343  
   344  	// switch stacks
   345  	// save our state in g->sched. Pretend to
   346  	// be systemstack_switch if the G stack is scanned.
   347  	CALL	gosave_systemstack_switch<>(SB)
   348  
   349  	// switch to g0
   350  	MOVQ	DX, g(CX)
   351  	MOVQ	DX, R14 // set the g register
   352  	MOVQ	(g_sched+gobuf_sp)(DX), BX
   353  	MOVQ	BX, SP
   354  
   355  	// call target function
   356  	MOVQ	DI, DX
   357  	MOVQ	0(DI), DI
   358  	CALL	DI
   359  
   360  	// switch back to g
   361  	get_tls(CX)
   362  	MOVQ	g(CX), AX
   363  	MOVQ	g_m(AX), BX
   364  	MOVQ	m_curg(BX), AX
   365  	MOVQ	AX, g(CX)
   366  	MOVQ	(g_sched+gobuf_sp)(AX), SP
   367  	MOVQ	$0, (g_sched+gobuf_sp)(AX)
   368  	RET
   369  
   370  noswitch:
   371  	// already on m stack; tail call the function
   372  	// Using a tail call here cleans up tracebacks since we won't stop
   373  	// at an intermediate systemstack.
   374  	MOVQ	DI, DX
   375  	MOVQ	0(DI), DI
   376  	JMP	DI
   377  
   378  bad:
   379  	// Bad: g is not gsignal, not g0, not curg. What is it?
   380  	MOVQ	$runtime·badsystemstack(SB), AX
   381  	CALL	AX
   382  	INT	$3
   383  
   384  
   385  /*
   386   * support for morestack
   387   */
   388  
   389  // Called during function prolog when more stack is needed.
   390  //
   391  // The traceback routines see morestack on a g0 as being
   392  // the top of a stack (for example, morestack calling newstack
   393  // calling the scheduler calling newm calling gc), so we must
   394  // record an argument size. For that purpose, it has no arguments.
   395  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   396  	// Cannot grow scheduler stack (m->g0).
   397  	get_tls(CX)
   398  	MOVQ	g(CX), BX
   399  	MOVQ	g_m(BX), BX
   400  	MOVQ	m_g0(BX), SI
   401  	CMPQ	g(CX), SI
   402  	JNE	3(PC)
   403  	CALL	runtime·badmorestackg0(SB)
   404  	CALL	runtime·abort(SB)
   405  
   406  	// Cannot grow signal stack (m->gsignal).
   407  	MOVQ	m_gsignal(BX), SI
   408  	CMPQ	g(CX), SI
   409  	JNE	3(PC)
   410  	CALL	runtime·badmorestackgsignal(SB)
   411  	CALL	runtime·abort(SB)
   412  
   413  	// Called from f.
   414  	// Set m->morebuf to f's caller.
   415  	NOP	SP	// tell vet SP changed - stop checking offsets
   416  	MOVQ	8(SP), AX	// f's caller's PC
   417  	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
   418  	LEAQ	16(SP), AX	// f's caller's SP
   419  	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
   420  	get_tls(CX)
   421  	MOVQ	g(CX), SI
   422  	MOVQ	SI, (m_morebuf+gobuf_g)(BX)
   423  
   424  	// Set g->sched to context in f.
   425  	MOVQ	0(SP), AX // f's PC
   426  	MOVQ	AX, (g_sched+gobuf_pc)(SI)
   427  	LEAQ	8(SP), AX // f's SP
   428  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   429  	MOVQ	BP, (g_sched+gobuf_bp)(SI)
   430  	MOVQ	DX, (g_sched+gobuf_ctxt)(SI)
   431  
   432  	// Call newstack on m->g0's stack.
   433  	MOVQ	m_g0(BX), BX
   434  	MOVQ	BX, g(CX)
   435  	MOVQ	(g_sched+gobuf_sp)(BX), SP
   436  	CALL	runtime·newstack(SB)
   437  	CALL	runtime·abort(SB)	// crash if newstack returns
   438  	RET
   439  
   440  // morestack but not preserving ctxt.
   441  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   442  	MOVL	$0, DX
   443  	JMP	runtime·morestack(SB)
   444  
   445  // REFLECTCALL_USE_REGABI is not defined. It must be defined in conjunction with the
   446  // register constants in the internal/abi package.
   447  
   448  #ifdef REFLECTCALL_USE_REGABI
   449  // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
   450  TEXT spillArgs<>(SB),NOSPLIT,$0-0
   451  	MOVQ AX, 0(R12)
   452  	MOVQ BX, 8(R12)
   453  	MOVQ CX, 16(R12)
   454  	MOVQ DI, 24(R12)
   455  	MOVQ SI, 32(R12)
   456  	MOVQ R8, 40(R12)
   457  	MOVQ R9, 48(R12)
   458  	MOVQ R10, 56(R12)
   459  	MOVQ R11, 64(R12)
   460  	MOVQ X0, 72(R12)
   461  	MOVQ X1, 80(R12)
   462  	MOVQ X2, 88(R12)
   463  	MOVQ X3, 96(R12)
   464  	MOVQ X4, 104(R12)
   465  	MOVQ X5, 112(R12)
   466  	MOVQ X6, 120(R12)
   467  	MOVQ X7, 128(R12)
   468  	MOVQ X8, 136(R12)
   469  	MOVQ X9, 144(R12)
   470  	MOVQ X10, 152(R12)
   471  	MOVQ X11, 160(R12)
   472  	MOVQ X12, 168(R12)
   473  	MOVQ X13, 176(R12)
   474  	MOVQ X14, 184(R12)
   475  	RET
   476  
   477  // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
   478  TEXT unspillArgs<>(SB),NOSPLIT,$0-0
   479  	MOVQ 0(R12), AX
   480  	MOVQ 8(R12), BX
   481  	MOVQ 16(R12), CX
   482  	MOVQ 24(R12), DI
   483  	MOVQ 32(R12), SI
   484  	MOVQ 40(R12), R8
   485  	MOVQ 48(R12), R9
   486  	MOVQ 56(R12), R10
   487  	MOVQ 64(R12), R11
   488  	MOVQ 72(R12), X0
   489  	MOVQ 80(R12), X1
   490  	MOVQ 88(R12), X2
   491  	MOVQ 96(R12), X3
   492  	MOVQ 104(R12), X4
   493  	MOVQ 112(R12), X5
   494  	MOVQ 120(R12), X6
   495  	MOVQ 128(R12), X7
   496  	MOVQ 136(R12), X8
   497  	MOVQ 144(R12), X9
   498  	MOVQ 152(R12), X10
   499  	MOVQ 160(R12), X11
   500  	MOVQ 168(R12), X12
   501  	MOVQ 176(R12), X13
   502  	MOVQ 184(R12), X14
   503  	RET
   504  #else
   505  // spillArgs stores return values from registers to a pointer in R12.
   506  TEXT spillArgs<>(SB),NOSPLIT,$0-0
   507  	RET
   508  
   509  // unspillArgs loads args into registers from a pointer in R12.
   510  TEXT unspillArgs<>(SB),NOSPLIT,$0-0
   511  	RET
   512  #endif
   513  
   514  // reflectcall: call a function with the given argument list
   515  // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
   516  // we don't have variable-sized frames, so we use a small number
   517  // of constant-sized-frame functions to encode a few bits of size in the pc.
   518  // Caution: ugly multiline assembly macros in your future!
   519  
   520  #define DISPATCH(NAME,MAXSIZE)		\
   521  	CMPQ	CX, $MAXSIZE;		\
   522  	JA	3(PC);			\
   523  	MOVQ	$NAME(SB), AX;		\
   524  	JMP	AX
   525  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   526  
   527  TEXT ·reflectcall<ABIInternal>(SB), NOSPLIT, $0-48
   528  	MOVLQZX frameSize+32(FP), CX
   529  	DISPATCH(runtime·call16, 16)
   530  	DISPATCH(runtime·call32, 32)
   531  	DISPATCH(runtime·call64, 64)
   532  	DISPATCH(runtime·call128, 128)
   533  	DISPATCH(runtime·call256, 256)
   534  	DISPATCH(runtime·call512, 512)
   535  	DISPATCH(runtime·call1024, 1024)
   536  	DISPATCH(runtime·call2048, 2048)
   537  	DISPATCH(runtime·call4096, 4096)
   538  	DISPATCH(runtime·call8192, 8192)
   539  	DISPATCH(runtime·call16384, 16384)
   540  	DISPATCH(runtime·call32768, 32768)
   541  	DISPATCH(runtime·call65536, 65536)
   542  	DISPATCH(runtime·call131072, 131072)
   543  	DISPATCH(runtime·call262144, 262144)
   544  	DISPATCH(runtime·call524288, 524288)
   545  	DISPATCH(runtime·call1048576, 1048576)
   546  	DISPATCH(runtime·call2097152, 2097152)
   547  	DISPATCH(runtime·call4194304, 4194304)
   548  	DISPATCH(runtime·call8388608, 8388608)
   549  	DISPATCH(runtime·call16777216, 16777216)
   550  	DISPATCH(runtime·call33554432, 33554432)
   551  	DISPATCH(runtime·call67108864, 67108864)
   552  	DISPATCH(runtime·call134217728, 134217728)
   553  	DISPATCH(runtime·call268435456, 268435456)
   554  	DISPATCH(runtime·call536870912, 536870912)
   555  	DISPATCH(runtime·call1073741824, 1073741824)
   556  	MOVQ	$runtime·badreflectcall(SB), AX
   557  	JMP	AX
   558  
   559  #define CALLFN(NAME,MAXSIZE)			\
   560  TEXT NAME(SB), WRAPPER, $MAXSIZE-48;		\
   561  	NO_LOCAL_POINTERS;			\
   562  	/* copy arguments to stack */		\
   563  	MOVQ	stackArgs+16(FP), SI;		\
   564  	MOVLQZX stackArgsSize+24(FP), CX;		\
   565  	MOVQ	SP, DI;				\
   566  	REP;MOVSB;				\
   567  	/* set up argument registers */		\
   568  	MOVQ    regArgs+40(FP), R12;		\
   569  	CALL    unspillArgs<>(SB);		\
   570  	/* call function */			\
   571  	MOVQ	f+8(FP), DX;			\
   572  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   573  	MOVQ	(DX), R12;			\
   574  	CALL	R12;				\
   575  	/* copy register return values back */		\
   576  	MOVQ    regArgs+40(FP), R12;		\
   577  	CALL    spillArgs<>(SB);		\
   578  	MOVLQZX	stackArgsSize+24(FP), CX;		\
   579  	MOVLQZX	stackRetOffset+28(FP), BX;		\
   580  	MOVQ	stackArgs+16(FP), DI;		\
   581  	MOVQ	stackArgsType+0(FP), DX;		\
   582  	MOVQ	SP, SI;				\
   583  	ADDQ	BX, DI;				\
   584  	ADDQ	BX, SI;				\
   585  	SUBQ	BX, CX;				\
   586  	CALL	callRet<>(SB);			\
   587  	RET
   588  
   589  // callRet copies return values back at the end of call*. This is a
   590  // separate function so it can allocate stack space for the arguments
   591  // to reflectcallmove. It does not follow the Go ABI; it expects its
   592  // arguments in registers.
   593  TEXT callRet<>(SB), NOSPLIT, $40-0
   594  	NO_LOCAL_POINTERS
   595  	MOVQ	DX, 0(SP)
   596  	MOVQ	DI, 8(SP)
   597  	MOVQ	SI, 16(SP)
   598  	MOVQ	CX, 24(SP)
   599  	MOVQ	R12, 32(SP)
   600  	CALL	runtime·reflectcallmove(SB)
   601  	RET
   602  
   603  CALLFN(·call16, 16)
   604  CALLFN(·call32, 32)
   605  CALLFN(·call64, 64)
   606  CALLFN(·call128, 128)
   607  CALLFN(·call256, 256)
   608  CALLFN(·call512, 512)
   609  CALLFN(·call1024, 1024)
   610  CALLFN(·call2048, 2048)
   611  CALLFN(·call4096, 4096)
   612  CALLFN(·call8192, 8192)
   613  CALLFN(·call16384, 16384)
   614  CALLFN(·call32768, 32768)
   615  CALLFN(·call65536, 65536)
   616  CALLFN(·call131072, 131072)
   617  CALLFN(·call262144, 262144)
   618  CALLFN(·call524288, 524288)
   619  CALLFN(·call1048576, 1048576)
   620  CALLFN(·call2097152, 2097152)
   621  CALLFN(·call4194304, 4194304)
   622  CALLFN(·call8388608, 8388608)
   623  CALLFN(·call16777216, 16777216)
   624  CALLFN(·call33554432, 33554432)
   625  CALLFN(·call67108864, 67108864)
   626  CALLFN(·call134217728, 134217728)
   627  CALLFN(·call268435456, 268435456)
   628  CALLFN(·call536870912, 536870912)
   629  CALLFN(·call1073741824, 1073741824)
   630  
   631  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   632  	MOVL	cycles+0(FP), AX
   633  again:
   634  	PAUSE
   635  	SUBL	$1, AX
   636  	JNZ	again
   637  	RET
   638  
   639  
   640  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   641  	// Stores are already ordered on x86, so this is just a
   642  	// compile barrier.
   643  	RET
   644  
   645  // func jmpdefer(fv *funcval, argp uintptr)
   646  // argp is a caller SP.
   647  // called from deferreturn.
   648  // 1. pop the caller
   649  // 2. sub 5 bytes from the callers return
   650  // 3. jmp to the argument
   651  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
   652  	MOVQ	fv+0(FP), DX	// fn
   653  	MOVQ	argp+8(FP), BX	// caller sp
   654  	LEAQ	-8(BX), SP	// caller sp after CALL
   655  	MOVQ	-8(SP), BP	// restore BP as if deferreturn returned (harmless if framepointers not in use)
   656  	SUBQ	$5, (SP)	// return to CALL again
   657  	MOVQ	0(DX), BX
   658  	JMP	BX	// but first run the deferred function
   659  
   660  // Save state of caller into g->sched,
   661  // but using fake PC from systemstack_switch.
   662  // Must only be called from functions with no locals ($0)
   663  // or else unwinding from systemstack_switch is incorrect.
   664  // Smashes R9.
   665  TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0
   666  #ifndef GOEXPERIMENT_REGABI
   667  	get_tls(R14)
   668  	MOVQ	g(R14), R14
   669  #endif
   670  	MOVQ	$runtime·systemstack_switch(SB), R9
   671  	MOVQ	R9, (g_sched+gobuf_pc)(R14)
   672  	LEAQ	8(SP), R9
   673  	MOVQ	R9, (g_sched+gobuf_sp)(R14)
   674  	MOVQ	$0, (g_sched+gobuf_ret)(R14)
   675  	MOVQ	BP, (g_sched+gobuf_bp)(R14)
   676  	// Assert ctxt is zero. See func save.
   677  	MOVQ	(g_sched+gobuf_ctxt)(R14), R9
   678  	TESTQ	R9, R9
   679  	JZ	2(PC)
   680  	CALL	runtime·abort(SB)
   681  	RET
   682  
   683  // func asmcgocall_no_g(fn, arg unsafe.Pointer)
   684  // Call fn(arg) aligned appropriately for the gcc ABI.
   685  // Called on a system stack, and there may be no g yet (during needm).
   686  TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
   687  	MOVQ	fn+0(FP), AX
   688  	MOVQ	arg+8(FP), BX
   689  	MOVQ	SP, DX
   690  	SUBQ	$32, SP
   691  	ANDQ	$~15, SP	// alignment
   692  	MOVQ	DX, 8(SP)
   693  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   694  	MOVQ	BX, CX		// CX = first argument in Win64
   695  	CALL	AX
   696  	MOVQ	8(SP), DX
   697  	MOVQ	DX, SP
   698  	RET
   699  
   700  // func asmcgocall(fn, arg unsafe.Pointer) int32
   701  // Call fn(arg) on the scheduler stack,
   702  // aligned appropriately for the gcc ABI.
   703  // See cgocall.go for more details.
   704  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   705  	MOVQ	fn+0(FP), AX
   706  	MOVQ	arg+8(FP), BX
   707  
   708  	MOVQ	SP, DX
   709  
   710  	// Figure out if we need to switch to m->g0 stack.
   711  	// We get called to create new OS threads too, and those
   712  	// come in on the m->g0 stack already.
   713  	get_tls(CX)
   714  	MOVQ	g(CX), R8
   715  	CMPQ	R8, $0
   716  	JEQ	nosave
   717  	MOVQ	g_m(R8), R8
   718  	MOVQ	m_g0(R8), SI
   719  	MOVQ	g(CX), DI
   720  	CMPQ	SI, DI
   721  	JEQ	nosave
   722  	MOVQ	m_gsignal(R8), SI
   723  	CMPQ	SI, DI
   724  	JEQ	nosave
   725  
   726  	// Switch to system stack.
   727  	MOVQ	m_g0(R8), SI
   728  	CALL	gosave_systemstack_switch<>(SB)
   729  	MOVQ	SI, g(CX)
   730  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   731  
   732  	// Now on a scheduling stack (a pthread-created stack).
   733  	// Make sure we have enough room for 4 stack-backed fast-call
   734  	// registers as per windows amd64 calling convention.
   735  	SUBQ	$64, SP
   736  	ANDQ	$~15, SP	// alignment for gcc ABI
   737  	MOVQ	DI, 48(SP)	// save g
   738  	MOVQ	(g_stack+stack_hi)(DI), DI
   739  	SUBQ	DX, DI
   740  	MOVQ	DI, 40(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   741  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   742  	MOVQ	BX, CX		// CX = first argument in Win64
   743  	CALL	AX
   744  
   745  	// Restore registers, g, stack pointer.
   746  	get_tls(CX)
   747  	MOVQ	48(SP), DI
   748  	MOVQ	(g_stack+stack_hi)(DI), SI
   749  	SUBQ	40(SP), SI
   750  	MOVQ	DI, g(CX)
   751  	MOVQ	SI, SP
   752  
   753  	MOVL	AX, ret+16(FP)
   754  	RET
   755  
   756  nosave:
   757  	// Running on a system stack, perhaps even without a g.
   758  	// Having no g can happen during thread creation or thread teardown
   759  	// (see needm/dropm on Solaris, for example).
   760  	// This code is like the above sequence but without saving/restoring g
   761  	// and without worrying about the stack moving out from under us
   762  	// (because we're on a system stack, not a goroutine stack).
   763  	// The above code could be used directly if already on a system stack,
   764  	// but then the only path through this code would be a rare case on Solaris.
   765  	// Using this code for all "already on system stack" calls exercises it more,
   766  	// which should help keep it correct.
   767  	SUBQ	$64, SP
   768  	ANDQ	$~15, SP
   769  	MOVQ	$0, 48(SP)		// where above code stores g, in case someone looks during debugging
   770  	MOVQ	DX, 40(SP)	// save original stack pointer
   771  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   772  	MOVQ	BX, CX		// CX = first argument in Win64
   773  	CALL	AX
   774  	MOVQ	40(SP), SI	// restore original stack pointer
   775  	MOVQ	SI, SP
   776  	MOVL	AX, ret+16(FP)
   777  	RET
   778  
   779  // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
   780  // See cgocall.go for more details.
   781  TEXT ·cgocallback(SB),NOSPLIT,$24-24
   782  	NO_LOCAL_POINTERS
   783  
   784  	// If g is nil, Go did not create the current thread.
   785  	// Call needm to obtain one m for temporary use.
   786  	// In this case, we're running on the thread stack, so there's
   787  	// lots of space, but the linker doesn't know. Hide the call from
   788  	// the linker analysis by using an indirect call through AX.
   789  	get_tls(CX)
   790  #ifdef GOOS_windows
   791  	MOVL	$0, BX
   792  	CMPQ	CX, $0
   793  	JEQ	2(PC)
   794  #endif
   795  	MOVQ	g(CX), BX
   796  	CMPQ	BX, $0
   797  	JEQ	needm
   798  	MOVQ	g_m(BX), BX
   799  	MOVQ	BX, savedm-8(SP)	// saved copy of oldm
   800  	JMP	havem
   801  needm:
   802  	MOVQ    $runtime·needm(SB), AX
   803  	CALL	AX
   804  	MOVQ	$0, savedm-8(SP) // dropm on return
   805  	get_tls(CX)
   806  	MOVQ	g(CX), BX
   807  	MOVQ	g_m(BX), BX
   808  
   809  	// Set m->sched.sp = SP, so that if a panic happens
   810  	// during the function we are about to execute, it will
   811  	// have a valid SP to run on the g0 stack.
   812  	// The next few lines (after the havem label)
   813  	// will save this SP onto the stack and then write
   814  	// the same SP back to m->sched.sp. That seems redundant,
   815  	// but if an unrecovered panic happens, unwindm will
   816  	// restore the g->sched.sp from the stack location
   817  	// and then systemstack will try to use it. If we don't set it here,
   818  	// that restored SP will be uninitialized (typically 0) and
   819  	// will not be usable.
   820  	MOVQ	m_g0(BX), SI
   821  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   822  
   823  havem:
   824  	// Now there's a valid m, and we're running on its m->g0.
   825  	// Save current m->g0->sched.sp on stack and then set it to SP.
   826  	// Save current sp in m->g0->sched.sp in preparation for
   827  	// switch back to m->curg stack.
   828  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   829  	MOVQ	m_g0(BX), SI
   830  	MOVQ	(g_sched+gobuf_sp)(SI), AX
   831  	MOVQ	AX, 0(SP)
   832  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   833  
   834  	// Switch to m->curg stack and call runtime.cgocallbackg.
   835  	// Because we are taking over the execution of m->curg
   836  	// but *not* resuming what had been running, we need to
   837  	// save that information (m->curg->sched) so we can restore it.
   838  	// We can restore m->curg->sched.sp easily, because calling
   839  	// runtime.cgocallbackg leaves SP unchanged upon return.
   840  	// To save m->curg->sched.pc, we push it onto the curg stack and
   841  	// open a frame the same size as cgocallback's g0 frame.
   842  	// Once we switch to the curg stack, the pushed PC will appear
   843  	// to be the return PC of cgocallback, so that the traceback
   844  	// will seamlessly trace back into the earlier calls.
   845  	MOVQ	m_curg(BX), SI
   846  	MOVQ	SI, g(CX)
   847  	MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
   848  	MOVQ	(g_sched+gobuf_pc)(SI), BX
   849  	MOVQ	BX, -8(DI)  // "push" return PC on the g stack
   850  	// Gather our arguments into registers.
   851  	MOVQ	fn+0(FP), BX
   852  	MOVQ	frame+8(FP), CX
   853  	MOVQ	ctxt+16(FP), DX
   854  	// Compute the size of the frame, including return PC and, if
   855  	// GOEXPERIMENT=framepointer, the saved base pointer
   856  	LEAQ	fn+0(FP), AX
   857  	SUBQ	SP, AX   // AX is our actual frame size
   858  	SUBQ	AX, DI   // Allocate the same frame size on the g stack
   859  	MOVQ	DI, SP
   860  
   861  	MOVQ	BX, 0(SP)
   862  	MOVQ	CX, 8(SP)
   863  	MOVQ	DX, 16(SP)
   864  	CALL	runtime·cgocallbackg(SB)
   865  
   866  	// Compute the size of the frame again. FP and SP have
   867  	// completely different values here than they did above,
   868  	// but only their difference matters.
   869  	LEAQ	fn+0(FP), AX
   870  	SUBQ	SP, AX
   871  
   872  	// Restore g->sched (== m->curg->sched) from saved values.
   873  	get_tls(CX)
   874  	MOVQ	g(CX), SI
   875  	MOVQ	SP, DI
   876  	ADDQ	AX, DI
   877  	MOVQ	-8(DI), BX
   878  	MOVQ	BX, (g_sched+gobuf_pc)(SI)
   879  	MOVQ	DI, (g_sched+gobuf_sp)(SI)
   880  
   881  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   882  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   883  	// so we do not have to restore it.)
   884  	MOVQ	g(CX), BX
   885  	MOVQ	g_m(BX), BX
   886  	MOVQ	m_g0(BX), SI
   887  	MOVQ	SI, g(CX)
   888  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   889  	MOVQ	0(SP), AX
   890  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   891  
   892  	// If the m on entry was nil, we called needm above to borrow an m
   893  	// for the duration of the call. Since the call is over, return it with dropm.
   894  	MOVQ	savedm-8(SP), BX
   895  	CMPQ	BX, $0
   896  	JNE 3(PC)
   897  	MOVQ	$runtime·dropm(SB), AX
   898  	CALL	AX
   899  
   900  	// Done!
   901  	RET
   902  
   903  // func setg(gg *g)
   904  // set g. for use by needm.
   905  TEXT runtime·setg(SB), NOSPLIT, $0-8
   906  	MOVQ	gg+0(FP), BX
   907  #ifdef GOOS_windows
   908  	CMPQ	BX, $0
   909  	JNE	settls
   910  	MOVQ	$0, 0x28(GS)
   911  	RET
   912  settls:
   913  	MOVQ	g_m(BX), AX
   914  	LEAQ	m_tls(AX), AX
   915  	MOVQ	AX, 0x28(GS)
   916  #endif
   917  	get_tls(CX)
   918  	MOVQ	BX, g(CX)
   919  	RET
   920  
   921  // void setg_gcc(G*); set g called from gcc.
   922  TEXT setg_gcc<>(SB),NOSPLIT,$0
   923  	get_tls(AX)
   924  	MOVQ	DI, g(AX)
   925  	MOVQ	DI, R14 // set the g register
   926  	RET
   927  
   928  TEXT runtime·abort(SB),NOSPLIT,$0-0
   929  	INT	$3
   930  loop:
   931  	JMP	loop
   932  
   933  // check that SP is in range [g->stack.lo, g->stack.hi)
   934  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   935  	get_tls(CX)
   936  	MOVQ	g(CX), AX
   937  	CMPQ	(g_stack+stack_hi)(AX), SP
   938  	JHI	2(PC)
   939  	CALL	runtime·abort(SB)
   940  	CMPQ	SP, (g_stack+stack_lo)(AX)
   941  	JHI	2(PC)
   942  	CALL	runtime·abort(SB)
   943  	RET
   944  
   945  // func cputicks() int64
   946  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   947  	CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   948  	JNE	mfence
   949  	LFENCE
   950  	JMP	done
   951  mfence:
   952  	MFENCE
   953  done:
   954  	RDTSC
   955  	SHLQ	$32, DX
   956  	ADDQ	DX, AX
   957  	MOVQ	AX, ret+0(FP)
   958  	RET
   959  
   960  // func memhash(p unsafe.Pointer, h, s uintptr) uintptr
   961  // hash function using AES hardware instructions
   962  TEXT runtime·memhash(SB),NOSPLIT,$0-32
   963  	CMPB	runtime·useAeshash(SB), $0
   964  	JEQ	noaes
   965  	MOVQ	p+0(FP), AX	// ptr to data
   966  	MOVQ	s+16(FP), CX	// size
   967  	LEAQ	ret+24(FP), DX
   968  	JMP	aeshashbody<>(SB)
   969  noaes:
   970  	JMP	runtime·memhashFallback(SB)
   971  
   972  // func strhash(p unsafe.Pointer, h uintptr) uintptr
   973  TEXT runtime·strhash(SB),NOSPLIT,$0-24
   974  	CMPB	runtime·useAeshash(SB), $0
   975  	JEQ	noaes
   976  	MOVQ	p+0(FP), AX	// ptr to string struct
   977  	MOVQ	8(AX), CX	// length of string
   978  	MOVQ	(AX), AX	// string data
   979  	LEAQ	ret+16(FP), DX
   980  	JMP	aeshashbody<>(SB)
   981  noaes:
   982  	JMP	runtime·strhashFallback(SB)
   983  
   984  // AX: data
   985  // CX: length
   986  // DX: address to put return value
   987  TEXT aeshashbody<>(SB),NOSPLIT,$0-0
   988  	// Fill an SSE register with our seeds.
   989  	MOVQ	h+8(FP), X0			// 64 bits of per-table hash seed
   990  	PINSRW	$4, CX, X0			// 16 bits of length
   991  	PSHUFHW $0, X0, X0			// repeat length 4 times total
   992  	MOVO	X0, X1				// save unscrambled seed
   993  	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
   994  	AESENC	X0, X0				// scramble seed
   995  
   996  	CMPQ	CX, $16
   997  	JB	aes0to15
   998  	JE	aes16
   999  	CMPQ	CX, $32
  1000  	JBE	aes17to32
  1001  	CMPQ	CX, $64
  1002  	JBE	aes33to64
  1003  	CMPQ	CX, $128
  1004  	JBE	aes65to128
  1005  	JMP	aes129plus
  1006  
  1007  aes0to15:
  1008  	TESTQ	CX, CX
  1009  	JE	aes0
  1010  
  1011  	ADDQ	$16, AX
  1012  	TESTW	$0xff0, AX
  1013  	JE	endofpage
  1014  
  1015  	// 16 bytes loaded at this address won't cross
  1016  	// a page boundary, so we can load it directly.
  1017  	MOVOU	-16(AX), X1
  1018  	ADDQ	CX, CX
  1019  	MOVQ	$masks<>(SB), AX
  1020  	PAND	(AX)(CX*8), X1
  1021  final1:
  1022  	PXOR	X0, X1	// xor data with seed
  1023  	AESENC	X1, X1	// scramble combo 3 times
  1024  	AESENC	X1, X1
  1025  	AESENC	X1, X1
  1026  	MOVQ	X1, (DX)
  1027  	RET
  1028  
  1029  endofpage:
  1030  	// address ends in 1111xxxx. Might be up against
  1031  	// a page boundary, so load ending at last byte.
  1032  	// Then shift bytes down using pshufb.
  1033  	MOVOU	-32(AX)(CX*1), X1
  1034  	ADDQ	CX, CX
  1035  	MOVQ	$shifts<>(SB), AX
  1036  	PSHUFB	(AX)(CX*8), X1
  1037  	JMP	final1
  1038  
  1039  aes0:
  1040  	// Return scrambled input seed
  1041  	AESENC	X0, X0
  1042  	MOVQ	X0, (DX)
  1043  	RET
  1044  
  1045  aes16:
  1046  	MOVOU	(AX), X1
  1047  	JMP	final1
  1048  
  1049  aes17to32:
  1050  	// make second starting seed
  1051  	PXOR	runtime·aeskeysched+16(SB), X1
  1052  	AESENC	X1, X1
  1053  
  1054  	// load data to be hashed
  1055  	MOVOU	(AX), X2
  1056  	MOVOU	-16(AX)(CX*1), X3
  1057  
  1058  	// xor with seed
  1059  	PXOR	X0, X2
  1060  	PXOR	X1, X3
  1061  
  1062  	// scramble 3 times
  1063  	AESENC	X2, X2
  1064  	AESENC	X3, X3
  1065  	AESENC	X2, X2
  1066  	AESENC	X3, X3
  1067  	AESENC	X2, X2
  1068  	AESENC	X3, X3
  1069  
  1070  	// combine results
  1071  	PXOR	X3, X2
  1072  	MOVQ	X2, (DX)
  1073  	RET
  1074  
  1075  aes33to64:
  1076  	// make 3 more starting seeds
  1077  	MOVO	X1, X2
  1078  	MOVO	X1, X3
  1079  	PXOR	runtime·aeskeysched+16(SB), X1
  1080  	PXOR	runtime·aeskeysched+32(SB), X2
  1081  	PXOR	runtime·aeskeysched+48(SB), X3
  1082  	AESENC	X1, X1
  1083  	AESENC	X2, X2
  1084  	AESENC	X3, X3
  1085  
  1086  	MOVOU	(AX), X4
  1087  	MOVOU	16(AX), X5
  1088  	MOVOU	-32(AX)(CX*1), X6
  1089  	MOVOU	-16(AX)(CX*1), X7
  1090  
  1091  	PXOR	X0, X4
  1092  	PXOR	X1, X5
  1093  	PXOR	X2, X6
  1094  	PXOR	X3, X7
  1095  
  1096  	AESENC	X4, X4
  1097  	AESENC	X5, X5
  1098  	AESENC	X6, X6
  1099  	AESENC	X7, X7
  1100  
  1101  	AESENC	X4, X4
  1102  	AESENC	X5, X5
  1103  	AESENC	X6, X6
  1104  	AESENC	X7, X7
  1105  
  1106  	AESENC	X4, X4
  1107  	AESENC	X5, X5
  1108  	AESENC	X6, X6
  1109  	AESENC	X7, X7
  1110  
  1111  	PXOR	X6, X4
  1112  	PXOR	X7, X5
  1113  	PXOR	X5, X4
  1114  	MOVQ	X4, (DX)
  1115  	RET
  1116  
  1117  aes65to128:
  1118  	// make 7 more starting seeds
  1119  	MOVO	X1, X2
  1120  	MOVO	X1, X3
  1121  	MOVO	X1, X4
  1122  	MOVO	X1, X5
  1123  	MOVO	X1, X6
  1124  	MOVO	X1, X7
  1125  	PXOR	runtime·aeskeysched+16(SB), X1
  1126  	PXOR	runtime·aeskeysched+32(SB), X2
  1127  	PXOR	runtime·aeskeysched+48(SB), X3
  1128  	PXOR	runtime·aeskeysched+64(SB), X4
  1129  	PXOR	runtime·aeskeysched+80(SB), X5
  1130  	PXOR	runtime·aeskeysched+96(SB), X6
  1131  	PXOR	runtime·aeskeysched+112(SB), X7
  1132  	AESENC	X1, X1
  1133  	AESENC	X2, X2
  1134  	AESENC	X3, X3
  1135  	AESENC	X4, X4
  1136  	AESENC	X5, X5
  1137  	AESENC	X6, X6
  1138  	AESENC	X7, X7
  1139  
  1140  	// load data
  1141  	MOVOU	(AX), X8
  1142  	MOVOU	16(AX), X9
  1143  	MOVOU	32(AX), X10
  1144  	MOVOU	48(AX), X11
  1145  	MOVOU	-64(AX)(CX*1), X12
  1146  	MOVOU	-48(AX)(CX*1), X13
  1147  	MOVOU	-32(AX)(CX*1), X14
  1148  	MOVOU	-16(AX)(CX*1), X15
  1149  
  1150  	// xor with seed
  1151  	PXOR	X0, X8
  1152  	PXOR	X1, X9
  1153  	PXOR	X2, X10
  1154  	PXOR	X3, X11
  1155  	PXOR	X4, X12
  1156  	PXOR	X5, X13
  1157  	PXOR	X6, X14
  1158  	PXOR	X7, X15
  1159  
  1160  	// scramble 3 times
  1161  	AESENC	X8, X8
  1162  	AESENC	X9, X9
  1163  	AESENC	X10, X10
  1164  	AESENC	X11, X11
  1165  	AESENC	X12, X12
  1166  	AESENC	X13, X13
  1167  	AESENC	X14, X14
  1168  	AESENC	X15, X15
  1169  
  1170  	AESENC	X8, X8
  1171  	AESENC	X9, X9
  1172  	AESENC	X10, X10
  1173  	AESENC	X11, X11
  1174  	AESENC	X12, X12
  1175  	AESENC	X13, X13
  1176  	AESENC	X14, X14
  1177  	AESENC	X15, X15
  1178  
  1179  	AESENC	X8, X8
  1180  	AESENC	X9, X9
  1181  	AESENC	X10, X10
  1182  	AESENC	X11, X11
  1183  	AESENC	X12, X12
  1184  	AESENC	X13, X13
  1185  	AESENC	X14, X14
  1186  	AESENC	X15, X15
  1187  
  1188  	// combine results
  1189  	PXOR	X12, X8
  1190  	PXOR	X13, X9
  1191  	PXOR	X14, X10
  1192  	PXOR	X15, X11
  1193  	PXOR	X10, X8
  1194  	PXOR	X11, X9
  1195  	PXOR	X9, X8
  1196  	MOVQ	X8, (DX)
  1197  	RET
  1198  
  1199  aes129plus:
  1200  	// make 7 more starting seeds
  1201  	MOVO	X1, X2
  1202  	MOVO	X1, X3
  1203  	MOVO	X1, X4
  1204  	MOVO	X1, X5
  1205  	MOVO	X1, X6
  1206  	MOVO	X1, X7
  1207  	PXOR	runtime·aeskeysched+16(SB), X1
  1208  	PXOR	runtime·aeskeysched+32(SB), X2
  1209  	PXOR	runtime·aeskeysched+48(SB), X3
  1210  	PXOR	runtime·aeskeysched+64(SB), X4
  1211  	PXOR	runtime·aeskeysched+80(SB), X5
  1212  	PXOR	runtime·aeskeysched+96(SB), X6
  1213  	PXOR	runtime·aeskeysched+112(SB), X7
  1214  	AESENC	X1, X1
  1215  	AESENC	X2, X2
  1216  	AESENC	X3, X3
  1217  	AESENC	X4, X4
  1218  	AESENC	X5, X5
  1219  	AESENC	X6, X6
  1220  	AESENC	X7, X7
  1221  
  1222  	// start with last (possibly overlapping) block
  1223  	MOVOU	-128(AX)(CX*1), X8
  1224  	MOVOU	-112(AX)(CX*1), X9
  1225  	MOVOU	-96(AX)(CX*1), X10
  1226  	MOVOU	-80(AX)(CX*1), X11
  1227  	MOVOU	-64(AX)(CX*1), X12
  1228  	MOVOU	-48(AX)(CX*1), X13
  1229  	MOVOU	-32(AX)(CX*1), X14
  1230  	MOVOU	-16(AX)(CX*1), X15
  1231  
  1232  	// xor in seed
  1233  	PXOR	X0, X8
  1234  	PXOR	X1, X9
  1235  	PXOR	X2, X10
  1236  	PXOR	X3, X11
  1237  	PXOR	X4, X12
  1238  	PXOR	X5, X13
  1239  	PXOR	X6, X14
  1240  	PXOR	X7, X15
  1241  
  1242  	// compute number of remaining 128-byte blocks
  1243  	DECQ	CX
  1244  	SHRQ	$7, CX
  1245  
  1246  aesloop:
  1247  	// scramble state
  1248  	AESENC	X8, X8
  1249  	AESENC	X9, X9
  1250  	AESENC	X10, X10
  1251  	AESENC	X11, X11
  1252  	AESENC	X12, X12
  1253  	AESENC	X13, X13
  1254  	AESENC	X14, X14
  1255  	AESENC	X15, X15
  1256  
  1257  	// scramble state, xor in a block
  1258  	MOVOU	(AX), X0
  1259  	MOVOU	16(AX), X1
  1260  	MOVOU	32(AX), X2
  1261  	MOVOU	48(AX), X3
  1262  	AESENC	X0, X8
  1263  	AESENC	X1, X9
  1264  	AESENC	X2, X10
  1265  	AESENC	X3, X11
  1266  	MOVOU	64(AX), X4
  1267  	MOVOU	80(AX), X5
  1268  	MOVOU	96(AX), X6
  1269  	MOVOU	112(AX), X7
  1270  	AESENC	X4, X12
  1271  	AESENC	X5, X13
  1272  	AESENC	X6, X14
  1273  	AESENC	X7, X15
  1274  
  1275  	ADDQ	$128, AX
  1276  	DECQ	CX
  1277  	JNE	aesloop
  1278  
  1279  	// 3 more scrambles to finish
  1280  	AESENC	X8, X8
  1281  	AESENC	X9, X9
  1282  	AESENC	X10, X10
  1283  	AESENC	X11, X11
  1284  	AESENC	X12, X12
  1285  	AESENC	X13, X13
  1286  	AESENC	X14, X14
  1287  	AESENC	X15, X15
  1288  	AESENC	X8, X8
  1289  	AESENC	X9, X9
  1290  	AESENC	X10, X10
  1291  	AESENC	X11, X11
  1292  	AESENC	X12, X12
  1293  	AESENC	X13, X13
  1294  	AESENC	X14, X14
  1295  	AESENC	X15, X15
  1296  	AESENC	X8, X8
  1297  	AESENC	X9, X9
  1298  	AESENC	X10, X10
  1299  	AESENC	X11, X11
  1300  	AESENC	X12, X12
  1301  	AESENC	X13, X13
  1302  	AESENC	X14, X14
  1303  	AESENC	X15, X15
  1304  
  1305  	PXOR	X12, X8
  1306  	PXOR	X13, X9
  1307  	PXOR	X14, X10
  1308  	PXOR	X15, X11
  1309  	PXOR	X10, X8
  1310  	PXOR	X11, X9
  1311  	PXOR	X9, X8
  1312  	MOVQ	X8, (DX)
  1313  	RET
  1314  
  1315  // func memhash32(p unsafe.Pointer, h uintptr) uintptr
  1316  TEXT runtime·memhash32(SB),NOSPLIT,$0-24
  1317  	CMPB	runtime·useAeshash(SB), $0
  1318  	JEQ	noaes
  1319  	MOVQ	p+0(FP), AX	// ptr to data
  1320  	MOVQ	h+8(FP), X0	// seed
  1321  	PINSRD	$2, (AX), X0	// data
  1322  	AESENC	runtime·aeskeysched+0(SB), X0
  1323  	AESENC	runtime·aeskeysched+16(SB), X0
  1324  	AESENC	runtime·aeskeysched+32(SB), X0
  1325  	MOVQ	X0, ret+16(FP)
  1326  	RET
  1327  noaes:
  1328  	JMP	runtime·memhash32Fallback(SB)
  1329  
  1330  // func memhash64(p unsafe.Pointer, h uintptr) uintptr
  1331  TEXT runtime·memhash64(SB),NOSPLIT,$0-24
  1332  	CMPB	runtime·useAeshash(SB), $0
  1333  	JEQ	noaes
  1334  	MOVQ	p+0(FP), AX	// ptr to data
  1335  	MOVQ	h+8(FP), X0	// seed
  1336  	PINSRQ	$1, (AX), X0	// data
  1337  	AESENC	runtime·aeskeysched+0(SB), X0
  1338  	AESENC	runtime·aeskeysched+16(SB), X0
  1339  	AESENC	runtime·aeskeysched+32(SB), X0
  1340  	MOVQ	X0, ret+16(FP)
  1341  	RET
  1342  noaes:
  1343  	JMP	runtime·memhash64Fallback(SB)
  1344  
  1345  // simple mask to get rid of data in the high part of the register.
  1346  DATA masks<>+0x00(SB)/8, $0x0000000000000000
  1347  DATA masks<>+0x08(SB)/8, $0x0000000000000000
  1348  DATA masks<>+0x10(SB)/8, $0x00000000000000ff
  1349  DATA masks<>+0x18(SB)/8, $0x0000000000000000
  1350  DATA masks<>+0x20(SB)/8, $0x000000000000ffff
  1351  DATA masks<>+0x28(SB)/8, $0x0000000000000000
  1352  DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
  1353  DATA masks<>+0x38(SB)/8, $0x0000000000000000
  1354  DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
  1355  DATA masks<>+0x48(SB)/8, $0x0000000000000000
  1356  DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
  1357  DATA masks<>+0x58(SB)/8, $0x0000000000000000
  1358  DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
  1359  DATA masks<>+0x68(SB)/8, $0x0000000000000000
  1360  DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
  1361  DATA masks<>+0x78(SB)/8, $0x0000000000000000
  1362  DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
  1363  DATA masks<>+0x88(SB)/8, $0x0000000000000000
  1364  DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
  1365  DATA masks<>+0x98(SB)/8, $0x00000000000000ff
  1366  DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
  1367  DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
  1368  DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
  1369  DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
  1370  DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
  1371  DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
  1372  DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
  1373  DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
  1374  DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
  1375  DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
  1376  DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
  1377  DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
  1378  GLOBL masks<>(SB),RODATA,$256
  1379  
  1380  // func checkASM() bool
  1381  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1382  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1383  	MOVQ	$masks<>(SB), AX
  1384  	MOVQ	$shifts<>(SB), BX
  1385  	ORQ	BX, AX
  1386  	TESTQ	$15, AX
  1387  	SETEQ	ret+0(FP)
  1388  	RET
  1389  
  1390  // these are arguments to pshufb. They move data down from
  1391  // the high bytes of the register to the low bytes of the register.
  1392  // index is how many bytes to move.
  1393  DATA shifts<>+0x00(SB)/8, $0x0000000000000000
  1394  DATA shifts<>+0x08(SB)/8, $0x0000000000000000
  1395  DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
  1396  DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
  1397  DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
  1398  DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
  1399  DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
  1400  DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
  1401  DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
  1402  DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
  1403  DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
  1404  DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
  1405  DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
  1406  DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
  1407  DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
  1408  DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
  1409  DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
  1410  DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
  1411  DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
  1412  DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
  1413  DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
  1414  DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
  1415  DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
  1416  DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
  1417  DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
  1418  DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
  1419  DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
  1420  DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
  1421  DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
  1422  DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
  1423  DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
  1424  DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
  1425  GLOBL shifts<>(SB),RODATA,$256
  1426  
  1427  TEXT runtime·return0(SB), NOSPLIT, $0
  1428  	MOVL	$0, AX
  1429  	RET
  1430  
  1431  
  1432  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1433  // Must obey the gcc calling convention.
  1434  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1435  	get_tls(CX)
  1436  	MOVQ	g(CX), AX
  1437  	MOVQ	g_m(AX), AX
  1438  	MOVQ	m_curg(AX), AX
  1439  	MOVQ	(g_stack+stack_hi)(AX), AX
  1440  	RET
  1441  
  1442  // The top-most function running on a goroutine
  1443  // returns to goexit+PCQuantum. Defined as ABIInternal
  1444  // so as to make it identifiable to traceback (this
  1445  // function it used as a sentinel; traceback wants to
  1446  // see the func PC, not a wrapper PC).
  1447  TEXT runtime·goexit<ABIInternal>(SB),NOSPLIT|TOPFRAME,$0-0
  1448  	BYTE	$0x90	// NOP
  1449  	CALL	runtime·goexit1(SB)	// does not return
  1450  	// traceback from goexit1 must hit code range of goexit
  1451  	BYTE	$0x90	// NOP
  1452  
  1453  // This is called from .init_array and follows the platform, not Go, ABI.
  1454  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1455  	PUSHQ	R15 // The access to global variables below implicitly uses R15, which is callee-save
  1456  	MOVQ	runtime·lastmoduledatap(SB), AX
  1457  	MOVQ	DI, moduledata_next(AX)
  1458  	MOVQ	DI, runtime·lastmoduledatap(SB)
  1459  	POPQ	R15
  1460  	RET
  1461  
  1462  // Initialize special registers then jump to sigpanic.
  1463  // This function is injected from the signal handler for panicking
  1464  // signals. It is quite painful to set X15 in the signal context,
  1465  // so we do it here.
  1466  TEXT ·sigpanic0<ABIInternal>(SB),NOSPLIT,$0-0
  1467  #ifdef GOEXPERIMENT_REGABI
  1468  	get_tls(R14)
  1469  	MOVQ	g(R14), R14
  1470  	XORPS	X15, X15
  1471  #endif
  1472  	JMP	·sigpanic<ABIInternal>(SB)
  1473  
  1474  // gcWriteBarrier performs a heap pointer write and informs the GC.
  1475  //
  1476  // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
  1477  // - DI is the destination of the write
  1478  // - AX is the value being written at DI
  1479  // It clobbers FLAGS. It does not clobber any general-purpose registers,
  1480  // but may clobber others (e.g., SSE registers).
  1481  // Defined as ABIInternal since it does not use the stack-based Go ABI.
  1482  TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$112
  1483  	// Save the registers clobbered by the fast path. This is slightly
  1484  	// faster than having the caller spill these.
  1485  	MOVQ	R12, 96(SP)
  1486  	MOVQ	R13, 104(SP)
  1487  	// TODO: Consider passing g.m.p in as an argument so they can be shared
  1488  	// across a sequence of write barriers.
  1489  #ifdef GOEXPERIMENT_REGABI
  1490  	MOVQ	g_m(R14), R13
  1491  #else
  1492  	get_tls(R13)
  1493  	MOVQ	g(R13), R13
  1494  	MOVQ	g_m(R13), R13
  1495  #endif
  1496  	MOVQ	m_p(R13), R13
  1497  	MOVQ	(p_wbBuf+wbBuf_next)(R13), R12
  1498  	// Increment wbBuf.next position.
  1499  	LEAQ	16(R12), R12
  1500  	MOVQ	R12, (p_wbBuf+wbBuf_next)(R13)
  1501  	CMPQ	R12, (p_wbBuf+wbBuf_end)(R13)
  1502  	// Record the write.
  1503  	MOVQ	AX, -16(R12)	// Record value
  1504  	// Note: This turns bad pointer writes into bad
  1505  	// pointer reads, which could be confusing. We could avoid
  1506  	// reading from obviously bad pointers, which would
  1507  	// take care of the vast majority of these. We could
  1508  	// patch this up in the signal handler, or use XCHG to
  1509  	// combine the read and the write.
  1510  	MOVQ	(DI), R13
  1511  	MOVQ	R13, -8(R12)	// Record *slot
  1512  	// Is the buffer full? (flags set in CMPQ above)
  1513  	JEQ	flush
  1514  ret:
  1515  	MOVQ	96(SP), R12
  1516  	MOVQ	104(SP), R13
  1517  	// Do the write.
  1518  	MOVQ	AX, (DI)
  1519  	RET
  1520  
  1521  flush:
  1522  	// Save all general purpose registers since these could be
  1523  	// clobbered by wbBufFlush and were not saved by the caller.
  1524  	// It is possible for wbBufFlush to clobber other registers
  1525  	// (e.g., SSE registers), but the compiler takes care of saving
  1526  	// those in the caller if necessary. This strikes a balance
  1527  	// with registers that are likely to be used.
  1528  	//
  1529  	// We don't have type information for these, but all code under
  1530  	// here is NOSPLIT, so nothing will observe these.
  1531  	//
  1532  	// TODO: We could strike a different balance; e.g., saving X0
  1533  	// and not saving GP registers that are less likely to be used.
  1534  	MOVQ	DI, 0(SP)	// Also first argument to wbBufFlush
  1535  	MOVQ	AX, 8(SP)	// Also second argument to wbBufFlush
  1536  	MOVQ	BX, 16(SP)
  1537  	MOVQ	CX, 24(SP)
  1538  	MOVQ	DX, 32(SP)
  1539  	// DI already saved
  1540  	MOVQ	SI, 40(SP)
  1541  	MOVQ	BP, 48(SP)
  1542  	MOVQ	R8, 56(SP)
  1543  	MOVQ	R9, 64(SP)
  1544  	MOVQ	R10, 72(SP)
  1545  	MOVQ	R11, 80(SP)
  1546  	// R12 already saved
  1547  	// R13 already saved
  1548  	// R14 is g
  1549  	MOVQ	R15, 88(SP)
  1550  
  1551  	// This takes arguments DI and AX
  1552  	CALL	runtime·wbBufFlush(SB)
  1553  
  1554  	MOVQ	0(SP), DI
  1555  	MOVQ	8(SP), AX
  1556  	MOVQ	16(SP), BX
  1557  	MOVQ	24(SP), CX
  1558  	MOVQ	32(SP), DX
  1559  	MOVQ	40(SP), SI
  1560  	MOVQ	48(SP), BP
  1561  	MOVQ	56(SP), R8
  1562  	MOVQ	64(SP), R9
  1563  	MOVQ	72(SP), R10
  1564  	MOVQ	80(SP), R11
  1565  	MOVQ	88(SP), R15
  1566  	JMP	ret
  1567  
  1568  // gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX.
  1569  // Defined as ABIInternal since it does not use the stable Go ABI.
  1570  TEXT runtime·gcWriteBarrierCX<ABIInternal>(SB),NOSPLIT,$0
  1571  	XCHGQ CX, AX
  1572  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1573  	XCHGQ CX, AX
  1574  	RET
  1575  
  1576  // gcWriteBarrierDX is gcWriteBarrier, but with args in DI and DX.
  1577  // Defined as ABIInternal since it does not use the stable Go ABI.
  1578  TEXT runtime·gcWriteBarrierDX<ABIInternal>(SB),NOSPLIT,$0
  1579  	XCHGQ DX, AX
  1580  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1581  	XCHGQ DX, AX
  1582  	RET
  1583  
  1584  // gcWriteBarrierBX is gcWriteBarrier, but with args in DI and BX.
  1585  // Defined as ABIInternal since it does not use the stable Go ABI.
  1586  TEXT runtime·gcWriteBarrierBX<ABIInternal>(SB),NOSPLIT,$0
  1587  	XCHGQ BX, AX
  1588  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1589  	XCHGQ BX, AX
  1590  	RET
  1591  
  1592  // gcWriteBarrierBP is gcWriteBarrier, but with args in DI and BP.
  1593  // Defined as ABIInternal since it does not use the stable Go ABI.
  1594  TEXT runtime·gcWriteBarrierBP<ABIInternal>(SB),NOSPLIT,$0
  1595  	XCHGQ BP, AX
  1596  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1597  	XCHGQ BP, AX
  1598  	RET
  1599  
  1600  // gcWriteBarrierSI is gcWriteBarrier, but with args in DI and SI.
  1601  // Defined as ABIInternal since it does not use the stable Go ABI.
  1602  TEXT runtime·gcWriteBarrierSI<ABIInternal>(SB),NOSPLIT,$0
  1603  	XCHGQ SI, AX
  1604  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1605  	XCHGQ SI, AX
  1606  	RET
  1607  
  1608  // gcWriteBarrierR8 is gcWriteBarrier, but with args in DI and R8.
  1609  // Defined as ABIInternal since it does not use the stable Go ABI.
  1610  TEXT runtime·gcWriteBarrierR8<ABIInternal>(SB),NOSPLIT,$0
  1611  	XCHGQ R8, AX
  1612  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1613  	XCHGQ R8, AX
  1614  	RET
  1615  
  1616  // gcWriteBarrierR9 is gcWriteBarrier, but with args in DI and R9.
  1617  // Defined as ABIInternal since it does not use the stable Go ABI.
  1618  TEXT runtime·gcWriteBarrierR9<ABIInternal>(SB),NOSPLIT,$0
  1619  	XCHGQ R9, AX
  1620  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1621  	XCHGQ R9, AX
  1622  	RET
  1623  
  1624  DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
  1625  GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
  1626  
  1627  // debugCallV1 is the entry point for debugger-injected function
  1628  // calls on running goroutines. It informs the runtime that a
  1629  // debug call has been injected and creates a call frame for the
  1630  // debugger to fill in.
  1631  //
  1632  // To inject a function call, a debugger should:
  1633  // 1. Check that the goroutine is in state _Grunning and that
  1634  //    there are at least 256 bytes free on the stack.
  1635  // 2. Push the current PC on the stack (updating SP).
  1636  // 3. Write the desired argument frame size at SP-16 (using the SP
  1637  //    after step 2).
  1638  // 4. Save all machine registers (including flags and XMM reigsters)
  1639  //    so they can be restored later by the debugger.
  1640  // 5. Set the PC to debugCallV1 and resume execution.
  1641  //
  1642  // If the goroutine is in state _Grunnable, then it's not generally
  1643  // safe to inject a call because it may return out via other runtime
  1644  // operations. Instead, the debugger should unwind the stack to find
  1645  // the return to non-runtime code, add a temporary breakpoint there,
  1646  // and inject the call once that breakpoint is hit.
  1647  //
  1648  // If the goroutine is in any other state, it's not safe to inject a call.
  1649  //
  1650  // This function communicates back to the debugger by setting RAX and
  1651  // invoking INT3 to raise a breakpoint signal. See the comments in the
  1652  // implementation for the protocol the debugger is expected to
  1653  // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
  1654  //
  1655  // The debugger must ensure that any pointers passed to the function
  1656  // obey escape analysis requirements. Specifically, it must not pass
  1657  // a stack pointer to an escaping argument. debugCallV1 cannot check
  1658  // this invariant.
  1659  //
  1660  // This is ABIInternal because Go code injects its PC directly into new
  1661  // goroutine stacks.
  1662  TEXT runtime·debugCallV1<ABIInternal>(SB),NOSPLIT,$152-0
  1663  	// Save all registers that may contain pointers so they can be
  1664  	// conservatively scanned.
  1665  	//
  1666  	// We can't do anything that might clobber any of these
  1667  	// registers before this.
  1668  	MOVQ	R15, r15-(14*8+8)(SP)
  1669  	MOVQ	R14, r14-(13*8+8)(SP)
  1670  	MOVQ	R13, r13-(12*8+8)(SP)
  1671  	MOVQ	R12, r12-(11*8+8)(SP)
  1672  	MOVQ	R11, r11-(10*8+8)(SP)
  1673  	MOVQ	R10, r10-(9*8+8)(SP)
  1674  	MOVQ	R9, r9-(8*8+8)(SP)
  1675  	MOVQ	R8, r8-(7*8+8)(SP)
  1676  	MOVQ	DI, di-(6*8+8)(SP)
  1677  	MOVQ	SI, si-(5*8+8)(SP)
  1678  	MOVQ	BP, bp-(4*8+8)(SP)
  1679  	MOVQ	BX, bx-(3*8+8)(SP)
  1680  	MOVQ	DX, dx-(2*8+8)(SP)
  1681  	// Save the frame size before we clobber it. Either of the last
  1682  	// saves could clobber this depending on whether there's a saved BP.
  1683  	MOVQ	frameSize-24(FP), DX	// aka -16(RSP) before prologue
  1684  	MOVQ	CX, cx-(1*8+8)(SP)
  1685  	MOVQ	AX, ax-(0*8+8)(SP)
  1686  
  1687  	// Save the argument frame size.
  1688  	MOVQ	DX, frameSize-128(SP)
  1689  
  1690  	// Perform a safe-point check.
  1691  	MOVQ	retpc-8(FP), AX	// Caller's PC
  1692  	MOVQ	AX, 0(SP)
  1693  	CALL	runtime·debugCallCheck(SB)
  1694  	MOVQ	8(SP), AX
  1695  	TESTQ	AX, AX
  1696  	JZ	good
  1697  	// The safety check failed. Put the reason string at the top
  1698  	// of the stack.
  1699  	MOVQ	AX, 0(SP)
  1700  	MOVQ	16(SP), AX
  1701  	MOVQ	AX, 8(SP)
  1702  	// Set AX to 8 and invoke INT3. The debugger should get the
  1703  	// reason a call can't be injected from the top of the stack
  1704  	// and resume execution.
  1705  	MOVQ	$8, AX
  1706  	BYTE	$0xcc
  1707  	JMP	restore
  1708  
  1709  good:
  1710  	// Registers are saved and it's safe to make a call.
  1711  	// Open up a call frame, moving the stack if necessary.
  1712  	//
  1713  	// Once the frame is allocated, this will set AX to 0 and
  1714  	// invoke INT3. The debugger should write the argument
  1715  	// frame for the call at SP, push the trapping PC on the
  1716  	// stack, set the PC to the function to call, set RCX to point
  1717  	// to the closure (if a closure call), and resume execution.
  1718  	//
  1719  	// If the function returns, this will set AX to 1 and invoke
  1720  	// INT3. The debugger can then inspect any return value saved
  1721  	// on the stack at SP and resume execution again.
  1722  	//
  1723  	// If the function panics, this will set AX to 2 and invoke INT3.
  1724  	// The interface{} value of the panic will be at SP. The debugger
  1725  	// can inspect the panic value and resume execution again.
  1726  #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1727  	CMPQ	AX, $MAXSIZE;			\
  1728  	JA	5(PC);				\
  1729  	MOVQ	$NAME(SB), AX;			\
  1730  	MOVQ	AX, 0(SP);			\
  1731  	CALL	runtime·debugCallWrap(SB);	\
  1732  	JMP	restore
  1733  
  1734  	MOVQ	frameSize-128(SP), AX
  1735  	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1736  	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1737  	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1738  	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1739  	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1740  	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1741  	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1742  	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1743  	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1744  	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1745  	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1746  	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1747  	// The frame size is too large. Report the error.
  1748  	MOVQ	$debugCallFrameTooLarge<>(SB), AX
  1749  	MOVQ	AX, 0(SP)
  1750  	MOVQ	$20, 8(SP) // length of debugCallFrameTooLarge string
  1751  	MOVQ	$8, AX
  1752  	BYTE	$0xcc
  1753  	JMP	restore
  1754  
  1755  restore:
  1756  	// Calls and failures resume here.
  1757  	//
  1758  	// Set AX to 16 and invoke INT3. The debugger should restore
  1759  	// all registers except RIP and RSP and resume execution.
  1760  	MOVQ	$16, AX
  1761  	BYTE	$0xcc
  1762  	// We must not modify flags after this point.
  1763  
  1764  	// Restore pointer-containing registers, which may have been
  1765  	// modified from the debugger's copy by stack copying.
  1766  	MOVQ	ax-(0*8+8)(SP), AX
  1767  	MOVQ	cx-(1*8+8)(SP), CX
  1768  	MOVQ	dx-(2*8+8)(SP), DX
  1769  	MOVQ	bx-(3*8+8)(SP), BX
  1770  	MOVQ	bp-(4*8+8)(SP), BP
  1771  	MOVQ	si-(5*8+8)(SP), SI
  1772  	MOVQ	di-(6*8+8)(SP), DI
  1773  	MOVQ	r8-(7*8+8)(SP), R8
  1774  	MOVQ	r9-(8*8+8)(SP), R9
  1775  	MOVQ	r10-(9*8+8)(SP), R10
  1776  	MOVQ	r11-(10*8+8)(SP), R11
  1777  	MOVQ	r12-(11*8+8)(SP), R12
  1778  	MOVQ	r13-(12*8+8)(SP), R13
  1779  	MOVQ	r14-(13*8+8)(SP), R14
  1780  	MOVQ	r15-(14*8+8)(SP), R15
  1781  
  1782  	RET
  1783  
  1784  // runtime.debugCallCheck assumes that functions defined with the
  1785  // DEBUG_CALL_FN macro are safe points to inject calls.
  1786  #define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  1787  TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  1788  	NO_LOCAL_POINTERS;			\
  1789  	MOVQ	$0, AX;				\
  1790  	BYTE	$0xcc;				\
  1791  	MOVQ	$1, AX;				\
  1792  	BYTE	$0xcc;				\
  1793  	RET
  1794  DEBUG_CALL_FN(debugCall32<>, 32)
  1795  DEBUG_CALL_FN(debugCall64<>, 64)
  1796  DEBUG_CALL_FN(debugCall128<>, 128)
  1797  DEBUG_CALL_FN(debugCall256<>, 256)
  1798  DEBUG_CALL_FN(debugCall512<>, 512)
  1799  DEBUG_CALL_FN(debugCall1024<>, 1024)
  1800  DEBUG_CALL_FN(debugCall2048<>, 2048)
  1801  DEBUG_CALL_FN(debugCall4096<>, 4096)
  1802  DEBUG_CALL_FN(debugCall8192<>, 8192)
  1803  DEBUG_CALL_FN(debugCall16384<>, 16384)
  1804  DEBUG_CALL_FN(debugCall32768<>, 32768)
  1805  DEBUG_CALL_FN(debugCall65536<>, 65536)
  1806  
  1807  // func debugCallPanicked(val interface{})
  1808  TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  1809  	// Copy the panic value to the top of stack.
  1810  	MOVQ	val_type+0(FP), AX
  1811  	MOVQ	AX, 0(SP)
  1812  	MOVQ	val_data+8(FP), AX
  1813  	MOVQ	AX, 8(SP)
  1814  	MOVQ	$2, AX
  1815  	BYTE	$0xcc
  1816  	RET
  1817  
  1818  // Note: these functions use a special calling convention to save generated code space.
  1819  // Arguments are passed in registers, but the space for those arguments are allocated
  1820  // in the caller's stack frame. These stubs write the args into that stack space and
  1821  // then tail call to the corresponding runtime handler.
  1822  // The tail call makes these stubs disappear in backtraces.
  1823  // Defined as ABIInternal since they do not use the stack-based Go ABI.
  1824  TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
  1825  	MOVQ	AX, x+0(FP)
  1826  	MOVQ	CX, y+8(FP)
  1827  	JMP	runtime·goPanicIndex<ABIInternal>(SB)
  1828  TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
  1829  	MOVQ	AX, x+0(FP)
  1830  	MOVQ	CX, y+8(FP)
  1831  	JMP	runtime·goPanicIndexU<ABIInternal>(SB)
  1832  TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
  1833  	MOVQ	CX, x+0(FP)
  1834  	MOVQ	DX, y+8(FP)
  1835  	JMP	runtime·goPanicSliceAlen<ABIInternal>(SB)
  1836  TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
  1837  	MOVQ	CX, x+0(FP)
  1838  	MOVQ	DX, y+8(FP)
  1839  	JMP	runtime·goPanicSliceAlenU<ABIInternal>(SB)
  1840  TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
  1841  	MOVQ	CX, x+0(FP)
  1842  	MOVQ	DX, y+8(FP)
  1843  	JMP	runtime·goPanicSliceAcap<ABIInternal>(SB)
  1844  TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
  1845  	MOVQ	CX, x+0(FP)
  1846  	MOVQ	DX, y+8(FP)
  1847  	JMP	runtime·goPanicSliceAcapU<ABIInternal>(SB)
  1848  TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
  1849  	MOVQ	AX, x+0(FP)
  1850  	MOVQ	CX, y+8(FP)
  1851  	JMP	runtime·goPanicSliceB<ABIInternal>(SB)
  1852  TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
  1853  	MOVQ	AX, x+0(FP)
  1854  	MOVQ	CX, y+8(FP)
  1855  	JMP	runtime·goPanicSliceBU<ABIInternal>(SB)
  1856  TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
  1857  	MOVQ	DX, x+0(FP)
  1858  	MOVQ	BX, y+8(FP)
  1859  	JMP	runtime·goPanicSlice3Alen<ABIInternal>(SB)
  1860  TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
  1861  	MOVQ	DX, x+0(FP)
  1862  	MOVQ	BX, y+8(FP)
  1863  	JMP	runtime·goPanicSlice3AlenU<ABIInternal>(SB)
  1864  TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
  1865  	MOVQ	DX, x+0(FP)
  1866  	MOVQ	BX, y+8(FP)
  1867  	JMP	runtime·goPanicSlice3Acap<ABIInternal>(SB)
  1868  TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
  1869  	MOVQ	DX, x+0(FP)
  1870  	MOVQ	BX, y+8(FP)
  1871  	JMP	runtime·goPanicSlice3AcapU<ABIInternal>(SB)
  1872  TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
  1873  	MOVQ	CX, x+0(FP)
  1874  	MOVQ	DX, y+8(FP)
  1875  	JMP	runtime·goPanicSlice3B<ABIInternal>(SB)
  1876  TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
  1877  	MOVQ	CX, x+0(FP)
  1878  	MOVQ	DX, y+8(FP)
  1879  	JMP	runtime·goPanicSlice3BU<ABIInternal>(SB)
  1880  TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
  1881  	MOVQ	AX, x+0(FP)
  1882  	MOVQ	CX, y+8(FP)
  1883  	JMP	runtime·goPanicSlice3C<ABIInternal>(SB)
  1884  TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
  1885  	MOVQ	AX, x+0(FP)
  1886  	MOVQ	CX, y+8(FP)
  1887  	JMP	runtime·goPanicSlice3CU<ABIInternal>(SB)
  1888  
  1889  #ifdef GOOS_android
  1890  // Use the free TLS_SLOT_APP slot #2 on Android Q.
  1891  // Earlier androids are set up in gcc_android.c.
  1892  DATA runtime·tls_g+0(SB)/8, $16
  1893  GLOBL runtime·tls_g+0(SB), NOPTR, $8
  1894  #endif
  1895  
  1896  // The compiler and assembler's -spectre=ret mode rewrites
  1897  // all indirect CALL AX / JMP AX instructions to be
  1898  // CALL retpolineAX / JMP retpolineAX.
  1899  // See https://support.google.com/faqs/answer/7625886.
  1900  #define RETPOLINE(reg) \
  1901  	/*   CALL setup */     BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0;	\
  1902  	/* nospec: */									\
  1903  	/*   PAUSE */           BYTE $0xF3; BYTE $0x90;					\
  1904  	/*   JMP nospec */      BYTE $0xEB; BYTE $-(2+2);				\
  1905  	/* setup: */									\
  1906  	/*   MOVQ AX, 0(SP) */  BYTE $0x48|((reg&8)>>1); BYTE $0x89;			\
  1907  	                        BYTE $0x04|((reg&7)<<3); BYTE $0x24;			\
  1908  	/*   RET */             BYTE $0xC3
  1909  
  1910  TEXT runtime·retpolineAX(SB),NOSPLIT,$0; RETPOLINE(0)
  1911  TEXT runtime·retpolineCX(SB),NOSPLIT,$0; RETPOLINE(1)
  1912  TEXT runtime·retpolineDX(SB),NOSPLIT,$0; RETPOLINE(2)
  1913  TEXT runtime·retpolineBX(SB),NOSPLIT,$0; RETPOLINE(3)
  1914  /* SP is 4, can't happen / magic encodings */
  1915  TEXT runtime·retpolineBP(SB),NOSPLIT,$0; RETPOLINE(5)
  1916  TEXT runtime·retpolineSI(SB),NOSPLIT,$0; RETPOLINE(6)
  1917  TEXT runtime·retpolineDI(SB),NOSPLIT,$0; RETPOLINE(7)
  1918  TEXT runtime·retpolineR8(SB),NOSPLIT,$0; RETPOLINE(8)
  1919  TEXT runtime·retpolineR9(SB),NOSPLIT,$0; RETPOLINE(9)
  1920  TEXT runtime·retpolineR10(SB),NOSPLIT,$0; RETPOLINE(10)
  1921  TEXT runtime·retpolineR11(SB),NOSPLIT,$0; RETPOLINE(11)
  1922  TEXT runtime·retpolineR12(SB),NOSPLIT,$0; RETPOLINE(12)
  1923  TEXT runtime·retpolineR13(SB),NOSPLIT,$0; RETPOLINE(13)
  1924  TEXT runtime·retpolineR14(SB),NOSPLIT,$0; RETPOLINE(14)
  1925  TEXT runtime·retpolineR15(SB),NOSPLIT,$0; RETPOLINE(15)