github.com/x04/go/src@v0.0.0-20200202162449-3d481ceb3525/runtime/asm_amd64.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  // _rt0_amd64 is common startup code for most amd64 systems when using
    11  // internal linking. This is the entry point for the program from the
    12  // kernel for an ordinary -buildmode=exe program. The stack holds the
    13  // number of arguments and the C-style argv.
    14  TEXT _rt0_amd64(SB),NOSPLIT,$-8
    15  	MOVQ	0(SP), DI	// argc
    16  	LEAQ	8(SP), SI	// argv
    17  	JMP	runtime·rt0_go(SB)
    18  
    19  // main is common startup code for most amd64 systems when using
    20  // external linking. The C startup code will call the symbol "main"
    21  // passing argc and argv in the usual C ABI registers DI and SI.
    22  TEXT main(SB),NOSPLIT,$-8
    23  	JMP	runtime·rt0_go(SB)
    24  
    25  // _rt0_amd64_lib is common startup code for most amd64 systems when
    26  // using -buildmode=c-archive or -buildmode=c-shared. The linker will
    27  // arrange to invoke this function as a global constructor (for
    28  // c-archive) or when the shared library is loaded (for c-shared).
    29  // We expect argc and argv to be passed in the usual C ABI registers
    30  // DI and SI.
    31  TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50
    32  	// Align stack per ELF ABI requirements.
    33  	MOVQ	SP, AX
    34  	ANDQ	$~15, SP
    35  	// Save C ABI callee-saved registers, as caller may need them.
    36  	MOVQ	BX, 0x10(SP)
    37  	MOVQ	BP, 0x18(SP)
    38  	MOVQ	R12, 0x20(SP)
    39  	MOVQ	R13, 0x28(SP)
    40  	MOVQ	R14, 0x30(SP)
    41  	MOVQ	R15, 0x38(SP)
    42  	MOVQ	AX, 0x40(SP)
    43  
    44  	MOVQ	DI, _rt0_amd64_lib_argc<>(SB)
    45  	MOVQ	SI, _rt0_amd64_lib_argv<>(SB)
    46  
    47  	// Synchronous initialization.
    48  	CALL	runtime·libpreinit(SB)
    49  
    50  	// Create a new thread to finish Go runtime initialization.
    51  	MOVQ	_cgo_sys_thread_create(SB), AX
    52  	TESTQ	AX, AX
    53  	JZ	nocgo
    54  	MOVQ	$_rt0_amd64_lib_go(SB), DI
    55  	MOVQ	$0, SI
    56  	CALL	AX
    57  	JMP	restore
    58  
    59  nocgo:
    60  	MOVQ	$0x800000, 0(SP)		// stacksize
    61  	MOVQ	$_rt0_amd64_lib_go(SB), AX
    62  	MOVQ	AX, 8(SP)			// fn
    63  	CALL	runtime·newosproc0(SB)
    64  
    65  restore:
    66  	MOVQ	0x10(SP), BX
    67  	MOVQ	0x18(SP), BP
    68  	MOVQ	0x20(SP), R12
    69  	MOVQ	0x28(SP), R13
    70  	MOVQ	0x30(SP), R14
    71  	MOVQ	0x38(SP), R15
    72  	MOVQ	0x40(SP), SP
    73  	RET
    74  
    75  // _rt0_amd64_lib_go initializes the Go runtime.
    76  // This is started in a separate thread by _rt0_amd64_lib.
    77  TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
    78  	MOVQ	_rt0_amd64_lib_argc<>(SB), DI
    79  	MOVQ	_rt0_amd64_lib_argv<>(SB), SI
    80  	JMP	runtime·rt0_go(SB)
    81  
    82  DATA _rt0_amd64_lib_argc<>(SB)/8, $0
    83  GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
    84  DATA _rt0_amd64_lib_argv<>(SB)/8, $0
    85  GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
    86  
    87  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    88  	// copy arguments forward on an even stack
    89  	MOVQ	DI, AX		// argc
    90  	MOVQ	SI, BX		// argv
    91  	SUBQ	$(4*8+7), SP		// 2args 2auto
    92  	ANDQ	$~15, SP
    93  	MOVQ	AX, 16(SP)
    94  	MOVQ	BX, 24(SP)
    95  
    96  	// create istack out of the given (operating system) stack.
    97  	// _cgo_init may update stackguard.
    98  	MOVQ	$runtime·g0(SB), DI
    99  	LEAQ	(-64*1024+104)(SP), BX
   100  	MOVQ	BX, g_stackguard0(DI)
   101  	MOVQ	BX, g_stackguard1(DI)
   102  	MOVQ	BX, (g_stack+stack_lo)(DI)
   103  	MOVQ	SP, (g_stack+stack_hi)(DI)
   104  
   105  	// find out information about the processor we're on
   106  	MOVL	$0, AX
   107  	CPUID
   108  	MOVL	AX, SI
   109  	CMPL	AX, $0
   110  	JE	nocpuinfo
   111  
   112  	// Figure out how to serialize RDTSC.
   113  	// On Intel processors LFENCE is enough. AMD requires MFENCE.
   114  	// Don't know about the rest, so let's do MFENCE.
   115  	CMPL	BX, $0x756E6547  // "Genu"
   116  	JNE	notintel
   117  	CMPL	DX, $0x49656E69  // "ineI"
   118  	JNE	notintel
   119  	CMPL	CX, $0x6C65746E  // "ntel"
   120  	JNE	notintel
   121  	MOVB	$1, runtime·isIntel(SB)
   122  	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
   123  notintel:
   124  
   125  	// Load EAX=1 cpuid flags
   126  	MOVL	$1, AX
   127  	CPUID
   128  	MOVL	AX, runtime·processorVersionInfo(SB)
   129  
   130  nocpuinfo:
   131  	// if there is an _cgo_init, call it.
   132  	MOVQ	_cgo_init(SB), AX
   133  	TESTQ	AX, AX
   134  	JZ	needtls
   135  	// arg 1: g0, already in DI
   136  	MOVQ	$setg_gcc<>(SB), SI // arg 2: setg_gcc
   137  #ifdef GOOS_android
   138  	MOVQ	$runtime·tls_g(SB), DX 	// arg 3: &tls_g
   139  	// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
   140  	// Compensate for tls_g (+16).
   141  	MOVQ	-16(TLS), CX
   142  #else
   143  	MOVQ	$0, DX	// arg 3, 4: not used when using platform's TLS
   144  	MOVQ	$0, CX
   145  #endif
   146  #ifdef GOOS_windows
   147  	// Adjust for the Win64 calling convention.
   148  	MOVQ	CX, R9 // arg 4
   149  	MOVQ	DX, R8 // arg 3
   150  	MOVQ	SI, DX // arg 2
   151  	MOVQ	DI, CX // arg 1
   152  #endif
   153  	CALL	AX
   154  
   155  	// update stackguard after _cgo_init
   156  	MOVQ	$runtime·g0(SB), CX
   157  	MOVQ	(g_stack+stack_lo)(CX), AX
   158  	ADDQ	$const__StackGuard, AX
   159  	MOVQ	AX, g_stackguard0(CX)
   160  	MOVQ	AX, g_stackguard1(CX)
   161  
   162  #ifndef GOOS_windows
   163  	JMP ok
   164  #endif
   165  needtls:
   166  #ifdef GOOS_plan9
   167  	// skip TLS setup on Plan 9
   168  	JMP ok
   169  #endif
   170  #ifdef GOOS_solaris
   171  	// skip TLS setup on Solaris
   172  	JMP ok
   173  #endif
   174  #ifdef GOOS_illumos
   175  	// skip TLS setup on illumos
   176  	JMP ok
   177  #endif
   178  #ifdef GOOS_darwin
   179  	// skip TLS setup on Darwin
   180  	JMP ok
   181  #endif
   182  
   183  	LEAQ	runtime·m0+m_tls(SB), DI
   184  	CALL	runtime·settls(SB)
   185  
   186  	// store through it, to make sure it works
   187  	get_tls(BX)
   188  	MOVQ	$0x123, g(BX)
   189  	MOVQ	runtime·m0+m_tls(SB), AX
   190  	CMPQ	AX, $0x123
   191  	JEQ 2(PC)
   192  	CALL	runtime·abort(SB)
   193  ok:
   194  	// set the per-goroutine and per-mach "registers"
   195  	get_tls(BX)
   196  	LEAQ	runtime·g0(SB), CX
   197  	MOVQ	CX, g(BX)
   198  	LEAQ	runtime·m0(SB), AX
   199  
   200  	// save m->g0 = g0
   201  	MOVQ	CX, m_g0(AX)
   202  	// save m0 to g0->m
   203  	MOVQ	AX, g_m(CX)
   204  
   205  	CLD				// convention is D is always left cleared
   206  	CALL	runtime·check(SB)
   207  
   208  	MOVL	16(SP), AX		// copy argc
   209  	MOVL	AX, 0(SP)
   210  	MOVQ	24(SP), AX		// copy argv
   211  	MOVQ	AX, 8(SP)
   212  	CALL	runtime·args(SB)
   213  	CALL	runtime·osinit(SB)
   214  	CALL	runtime·schedinit(SB)
   215  
   216  	// create a new goroutine to start program
   217  	MOVQ	$runtime·mainPC(SB), AX		// entry
   218  	PUSHQ	AX
   219  	PUSHQ	$0			// arg size
   220  	CALL	runtime·newproc(SB)
   221  	POPQ	AX
   222  	POPQ	AX
   223  
   224  	// start this M
   225  	CALL	runtime·mstart(SB)
   226  
   227  	CALL	runtime·abort(SB)	// mstart should never return
   228  	RET
   229  
   230  	// Prevent dead-code elimination of debugCallV1, which is
   231  	// intended to be called by debuggers.
   232  	MOVQ	$runtime·debugCallV1(SB), AX
   233  	RET
   234  
   235  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
   236  GLOBL	runtime·mainPC(SB),RODATA,$8
   237  
   238  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   239  	BYTE	$0xcc
   240  	RET
   241  
   242  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   243  	// No per-thread init.
   244  	RET
   245  
   246  /*
   247   *  go-routine
   248   */
   249  
   250  // func gosave(buf *gobuf)
   251  // save state in Gobuf; setjmp
   252  TEXT runtime·gosave(SB), NOSPLIT, $0-8
   253  	MOVQ	buf+0(FP), AX		// gobuf
   254  	LEAQ	buf+0(FP), BX		// caller's SP
   255  	MOVQ	BX, gobuf_sp(AX)
   256  	MOVQ	0(SP), BX		// caller's PC
   257  	MOVQ	BX, gobuf_pc(AX)
   258  	MOVQ	$0, gobuf_ret(AX)
   259  	MOVQ	BP, gobuf_bp(AX)
   260  	// Assert ctxt is zero. See func save.
   261  	MOVQ	gobuf_ctxt(AX), BX
   262  	TESTQ	BX, BX
   263  	JZ	2(PC)
   264  	CALL	runtime·badctxt(SB)
   265  	get_tls(CX)
   266  	MOVQ	g(CX), BX
   267  	MOVQ	BX, gobuf_g(AX)
   268  	RET
   269  
   270  // func gogo(buf *gobuf)
   271  // restore state from Gobuf; longjmp
   272  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   273  	MOVQ	buf+0(FP), BX		// gobuf
   274  	MOVQ	gobuf_g(BX), DX
   275  	MOVQ	0(DX), CX		// make sure g != nil
   276  	get_tls(CX)
   277  	MOVQ	DX, g(CX)
   278  	MOVQ	gobuf_sp(BX), SP	// restore SP
   279  	MOVQ	gobuf_ret(BX), AX
   280  	MOVQ	gobuf_ctxt(BX), DX
   281  	MOVQ	gobuf_bp(BX), BP
   282  	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
   283  	MOVQ	$0, gobuf_ret(BX)
   284  	MOVQ	$0, gobuf_ctxt(BX)
   285  	MOVQ	$0, gobuf_bp(BX)
   286  	MOVQ	gobuf_pc(BX), BX
   287  	JMP	BX
   288  
   289  // func mcall(fn func(*g))
   290  // Switch to m->g0's stack, call fn(g).
   291  // Fn must never return. It should gogo(&g->sched)
   292  // to keep running g.
   293  TEXT runtime·mcall(SB), NOSPLIT, $0-8
   294  	MOVQ	fn+0(FP), DI
   295  
   296  	get_tls(CX)
   297  	MOVQ	g(CX), AX	// save state in g->sched
   298  	MOVQ	0(SP), BX	// caller's PC
   299  	MOVQ	BX, (g_sched+gobuf_pc)(AX)
   300  	LEAQ	fn+0(FP), BX	// caller's SP
   301  	MOVQ	BX, (g_sched+gobuf_sp)(AX)
   302  	MOVQ	AX, (g_sched+gobuf_g)(AX)
   303  	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   304  
   305  	// switch to m->g0 & its stack, call fn
   306  	MOVQ	g(CX), BX
   307  	MOVQ	g_m(BX), BX
   308  	MOVQ	m_g0(BX), SI
   309  	CMPQ	SI, AX	// if g == m->g0 call badmcall
   310  	JNE	3(PC)
   311  	MOVQ	$runtime·badmcall(SB), AX
   312  	JMP	AX
   313  	MOVQ	SI, g(CX)	// g = m->g0
   314  	MOVQ	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   315  	PUSHQ	AX
   316  	MOVQ	DI, DX
   317  	MOVQ	0(DI), DI
   318  	CALL	DI
   319  	POPQ	AX
   320  	MOVQ	$runtime·badmcall2(SB), AX
   321  	JMP	AX
   322  	RET
   323  
   324  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   325  // of the G stack. We need to distinguish the routine that
   326  // lives at the bottom of the G stack from the one that lives
   327  // at the top of the system stack because the one at the top of
   328  // the system stack terminates the stack walk (see topofstack()).
   329  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   330  	RET
   331  
   332  // func systemstack(fn func())
   333  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   334  	MOVQ	fn+0(FP), DI	// DI = fn
   335  	get_tls(CX)
   336  	MOVQ	g(CX), AX	// AX = g
   337  	MOVQ	g_m(AX), BX	// BX = m
   338  
   339  	CMPQ	AX, m_gsignal(BX)
   340  	JEQ	noswitch
   341  
   342  	MOVQ	m_g0(BX), DX	// DX = g0
   343  	CMPQ	AX, DX
   344  	JEQ	noswitch
   345  
   346  	CMPQ	AX, m_curg(BX)
   347  	JNE	bad
   348  
   349  	// switch stacks
   350  	// save our state in g->sched. Pretend to
   351  	// be systemstack_switch if the G stack is scanned.
   352  	MOVQ	$runtime·systemstack_switch(SB), SI
   353  	MOVQ	SI, (g_sched+gobuf_pc)(AX)
   354  	MOVQ	SP, (g_sched+gobuf_sp)(AX)
   355  	MOVQ	AX, (g_sched+gobuf_g)(AX)
   356  	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   357  
   358  	// switch to g0
   359  	MOVQ	DX, g(CX)
   360  	MOVQ	(g_sched+gobuf_sp)(DX), BX
   361  	// make it look like mstart called systemstack on g0, to stop traceback
   362  	SUBQ	$8, BX
   363  	MOVQ	$runtime·mstart(SB), DX
   364  	MOVQ	DX, 0(BX)
   365  	MOVQ	BX, SP
   366  
   367  	// call target function
   368  	MOVQ	DI, DX
   369  	MOVQ	0(DI), DI
   370  	CALL	DI
   371  
   372  	// switch back to g
   373  	get_tls(CX)
   374  	MOVQ	g(CX), AX
   375  	MOVQ	g_m(AX), BX
   376  	MOVQ	m_curg(BX), AX
   377  	MOVQ	AX, g(CX)
   378  	MOVQ	(g_sched+gobuf_sp)(AX), SP
   379  	MOVQ	$0, (g_sched+gobuf_sp)(AX)
   380  	RET
   381  
   382  noswitch:
   383  	// already on m stack; tail call the function
   384  	// Using a tail call here cleans up tracebacks since we won't stop
   385  	// at an intermediate systemstack.
   386  	MOVQ	DI, DX
   387  	MOVQ	0(DI), DI
   388  	JMP	DI
   389  
   390  bad:
   391  	// Bad: g is not gsignal, not g0, not curg. What is it?
   392  	MOVQ	$runtime·badsystemstack(SB), AX
   393  	CALL	AX
   394  	INT	$3
   395  
   396  
   397  /*
   398   * support for morestack
   399   */
   400  
   401  // Called during function prolog when more stack is needed.
   402  //
   403  // The traceback routines see morestack on a g0 as being
   404  // the top of a stack (for example, morestack calling newstack
   405  // calling the scheduler calling newm calling gc), so we must
   406  // record an argument size. For that purpose, it has no arguments.
   407  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   408  	// Cannot grow scheduler stack (m->g0).
   409  	get_tls(CX)
   410  	MOVQ	g(CX), BX
   411  	MOVQ	g_m(BX), BX
   412  	MOVQ	m_g0(BX), SI
   413  	CMPQ	g(CX), SI
   414  	JNE	3(PC)
   415  	CALL	runtime·badmorestackg0(SB)
   416  	CALL	runtime·abort(SB)
   417  
   418  	// Cannot grow signal stack (m->gsignal).
   419  	MOVQ	m_gsignal(BX), SI
   420  	CMPQ	g(CX), SI
   421  	JNE	3(PC)
   422  	CALL	runtime·badmorestackgsignal(SB)
   423  	CALL	runtime·abort(SB)
   424  
   425  	// Called from f.
   426  	// Set m->morebuf to f's caller.
   427  	NOP	SP	// tell vet SP changed - stop checking offsets
   428  	MOVQ	8(SP), AX	// f's caller's PC
   429  	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
   430  	LEAQ	16(SP), AX	// f's caller's SP
   431  	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
   432  	get_tls(CX)
   433  	MOVQ	g(CX), SI
   434  	MOVQ	SI, (m_morebuf+gobuf_g)(BX)
   435  
   436  	// Set g->sched to context in f.
   437  	MOVQ	0(SP), AX // f's PC
   438  	MOVQ	AX, (g_sched+gobuf_pc)(SI)
   439  	MOVQ	SI, (g_sched+gobuf_g)(SI)
   440  	LEAQ	8(SP), AX // f's SP
   441  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   442  	MOVQ	BP, (g_sched+gobuf_bp)(SI)
   443  	MOVQ	DX, (g_sched+gobuf_ctxt)(SI)
   444  
   445  	// Call newstack on m->g0's stack.
   446  	MOVQ	m_g0(BX), BX
   447  	MOVQ	BX, g(CX)
   448  	MOVQ	(g_sched+gobuf_sp)(BX), SP
   449  	CALL	runtime·newstack(SB)
   450  	CALL	runtime·abort(SB)	// crash if newstack returns
   451  	RET
   452  
   453  // morestack but not preserving ctxt.
   454  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   455  	MOVL	$0, DX
   456  	JMP	runtime·morestack(SB)
   457  
   458  // reflectcall: call a function with the given argument list
   459  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   460  // we don't have variable-sized frames, so we use a small number
   461  // of constant-sized-frame functions to encode a few bits of size in the pc.
   462  // Caution: ugly multiline assembly macros in your future!
   463  
   464  #define DISPATCH(NAME,MAXSIZE)		\
   465  	CMPQ	CX, $MAXSIZE;		\
   466  	JA	3(PC);			\
   467  	MOVQ	$NAME(SB), AX;		\
   468  	JMP	AX
   469  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   470  
   471  TEXT ·reflectcall(SB), NOSPLIT, $0-32
   472  	MOVLQZX argsize+24(FP), CX
   473  	DISPATCH(runtime·call32, 32)
   474  	DISPATCH(runtime·call64, 64)
   475  	DISPATCH(runtime·call128, 128)
   476  	DISPATCH(runtime·call256, 256)
   477  	DISPATCH(runtime·call512, 512)
   478  	DISPATCH(runtime·call1024, 1024)
   479  	DISPATCH(runtime·call2048, 2048)
   480  	DISPATCH(runtime·call4096, 4096)
   481  	DISPATCH(runtime·call8192, 8192)
   482  	DISPATCH(runtime·call16384, 16384)
   483  	DISPATCH(runtime·call32768, 32768)
   484  	DISPATCH(runtime·call65536, 65536)
   485  	DISPATCH(runtime·call131072, 131072)
   486  	DISPATCH(runtime·call262144, 262144)
   487  	DISPATCH(runtime·call524288, 524288)
   488  	DISPATCH(runtime·call1048576, 1048576)
   489  	DISPATCH(runtime·call2097152, 2097152)
   490  	DISPATCH(runtime·call4194304, 4194304)
   491  	DISPATCH(runtime·call8388608, 8388608)
   492  	DISPATCH(runtime·call16777216, 16777216)
   493  	DISPATCH(runtime·call33554432, 33554432)
   494  	DISPATCH(runtime·call67108864, 67108864)
   495  	DISPATCH(runtime·call134217728, 134217728)
   496  	DISPATCH(runtime·call268435456, 268435456)
   497  	DISPATCH(runtime·call536870912, 536870912)
   498  	DISPATCH(runtime·call1073741824, 1073741824)
   499  	MOVQ	$runtime·badreflectcall(SB), AX
   500  	JMP	AX
   501  
   502  #define CALLFN(NAME,MAXSIZE)			\
   503  TEXT NAME(SB), WRAPPER, $MAXSIZE-32;		\
   504  	NO_LOCAL_POINTERS;			\
   505  	/* copy arguments to stack */		\
   506  	MOVQ	argptr+16(FP), SI;		\
   507  	MOVLQZX argsize+24(FP), CX;		\
   508  	MOVQ	SP, DI;				\
   509  	REP;MOVSB;				\
   510  	/* call function */			\
   511  	MOVQ	f+8(FP), DX;			\
   512  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   513  	CALL	(DX);				\
   514  	/* copy return values back */		\
   515  	MOVQ	argtype+0(FP), DX;		\
   516  	MOVQ	argptr+16(FP), DI;		\
   517  	MOVLQZX	argsize+24(FP), CX;		\
   518  	MOVLQZX	retoffset+28(FP), BX;		\
   519  	MOVQ	SP, SI;				\
   520  	ADDQ	BX, DI;				\
   521  	ADDQ	BX, SI;				\
   522  	SUBQ	BX, CX;				\
   523  	CALL	callRet<>(SB);			\
   524  	RET
   525  
   526  // callRet copies return values back at the end of call*. This is a
   527  // separate function so it can allocate stack space for the arguments
   528  // to reflectcallmove. It does not follow the Go ABI; it expects its
   529  // arguments in registers.
   530  TEXT callRet<>(SB), NOSPLIT, $32-0
   531  	NO_LOCAL_POINTERS
   532  	MOVQ	DX, 0(SP)
   533  	MOVQ	DI, 8(SP)
   534  	MOVQ	SI, 16(SP)
   535  	MOVQ	CX, 24(SP)
   536  	CALL	runtime·reflectcallmove(SB)
   537  	RET
   538  
   539  CALLFN(·call32, 32)
   540  CALLFN(·call64, 64)
   541  CALLFN(·call128, 128)
   542  CALLFN(·call256, 256)
   543  CALLFN(·call512, 512)
   544  CALLFN(·call1024, 1024)
   545  CALLFN(·call2048, 2048)
   546  CALLFN(·call4096, 4096)
   547  CALLFN(·call8192, 8192)
   548  CALLFN(·call16384, 16384)
   549  CALLFN(·call32768, 32768)
   550  CALLFN(·call65536, 65536)
   551  CALLFN(·call131072, 131072)
   552  CALLFN(·call262144, 262144)
   553  CALLFN(·call524288, 524288)
   554  CALLFN(·call1048576, 1048576)
   555  CALLFN(·call2097152, 2097152)
   556  CALLFN(·call4194304, 4194304)
   557  CALLFN(·call8388608, 8388608)
   558  CALLFN(·call16777216, 16777216)
   559  CALLFN(·call33554432, 33554432)
   560  CALLFN(·call67108864, 67108864)
   561  CALLFN(·call134217728, 134217728)
   562  CALLFN(·call268435456, 268435456)
   563  CALLFN(·call536870912, 536870912)
   564  CALLFN(·call1073741824, 1073741824)
   565  
   566  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   567  	MOVL	cycles+0(FP), AX
   568  again:
   569  	PAUSE
   570  	SUBL	$1, AX
   571  	JNZ	again
   572  	RET
   573  
   574  
   575  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   576  	// Stores are already ordered on x86, so this is just a
   577  	// compile barrier.
   578  	RET
   579  
   580  // func jmpdefer(fv *funcval, argp uintptr)
   581  // argp is a caller SP.
   582  // called from deferreturn.
   583  // 1. pop the caller
   584  // 2. sub 5 bytes from the callers return
   585  // 3. jmp to the argument
   586  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
   587  	MOVQ	fv+0(FP), DX	// fn
   588  	MOVQ	argp+8(FP), BX	// caller sp
   589  	LEAQ	-8(BX), SP	// caller sp after CALL
   590  	MOVQ	-8(SP), BP	// restore BP as if deferreturn returned (harmless if framepointers not in use)
   591  	SUBQ	$5, (SP)	// return to CALL again
   592  	MOVQ	0(DX), BX
   593  	JMP	BX	// but first run the deferred function
   594  
   595  // Save state of caller into g->sched. Smashes R8, R9.
   596  TEXT gosave<>(SB),NOSPLIT,$0
   597  	get_tls(R8)
   598  	MOVQ	g(R8), R8
   599  	MOVQ	0(SP), R9
   600  	MOVQ	R9, (g_sched+gobuf_pc)(R8)
   601  	LEAQ	8(SP), R9
   602  	MOVQ	R9, (g_sched+gobuf_sp)(R8)
   603  	MOVQ	$0, (g_sched+gobuf_ret)(R8)
   604  	MOVQ	BP, (g_sched+gobuf_bp)(R8)
   605  	// Assert ctxt is zero. See func save.
   606  	MOVQ	(g_sched+gobuf_ctxt)(R8), R9
   607  	TESTQ	R9, R9
   608  	JZ	2(PC)
   609  	CALL	runtime·badctxt(SB)
   610  	RET
   611  
   612  // func asmcgocall(fn, arg unsafe.Pointer) int32
   613  // Call fn(arg) on the scheduler stack,
   614  // aligned appropriately for the gcc ABI.
   615  // See cgocall.go for more details.
   616  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   617  	MOVQ	fn+0(FP), AX
   618  	MOVQ	arg+8(FP), BX
   619  
   620  	MOVQ	SP, DX
   621  
   622  	// Figure out if we need to switch to m->g0 stack.
   623  	// We get called to create new OS threads too, and those
   624  	// come in on the m->g0 stack already.
   625  	get_tls(CX)
   626  	MOVQ	g(CX), R8
   627  	CMPQ	R8, $0
   628  	JEQ	nosave
   629  	MOVQ	g_m(R8), R8
   630  	MOVQ	m_g0(R8), SI
   631  	MOVQ	g(CX), DI
   632  	CMPQ	SI, DI
   633  	JEQ	nosave
   634  	MOVQ	m_gsignal(R8), SI
   635  	CMPQ	SI, DI
   636  	JEQ	nosave
   637  
   638  	// Switch to system stack.
   639  	MOVQ	m_g0(R8), SI
   640  	CALL	gosave<>(SB)
   641  	MOVQ	SI, g(CX)
   642  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   643  
   644  	// Now on a scheduling stack (a pthread-created stack).
   645  	// Make sure we have enough room for 4 stack-backed fast-call
   646  	// registers as per windows amd64 calling convention.
   647  	SUBQ	$64, SP
   648  	ANDQ	$~15, SP	// alignment for gcc ABI
   649  	MOVQ	DI, 48(SP)	// save g
   650  	MOVQ	(g_stack+stack_hi)(DI), DI
   651  	SUBQ	DX, DI
   652  	MOVQ	DI, 40(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   653  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   654  	MOVQ	BX, CX		// CX = first argument in Win64
   655  	CALL	AX
   656  
   657  	// Restore registers, g, stack pointer.
   658  	get_tls(CX)
   659  	MOVQ	48(SP), DI
   660  	MOVQ	(g_stack+stack_hi)(DI), SI
   661  	SUBQ	40(SP), SI
   662  	MOVQ	DI, g(CX)
   663  	MOVQ	SI, SP
   664  
   665  	MOVL	AX, ret+16(FP)
   666  	RET
   667  
   668  nosave:
   669  	// Running on a system stack, perhaps even without a g.
   670  	// Having no g can happen during thread creation or thread teardown
   671  	// (see needm/dropm on Solaris, for example).
   672  	// This code is like the above sequence but without saving/restoring g
   673  	// and without worrying about the stack moving out from under us
   674  	// (because we're on a system stack, not a goroutine stack).
   675  	// The above code could be used directly if already on a system stack,
   676  	// but then the only path through this code would be a rare case on Solaris.
   677  	// Using this code for all "already on system stack" calls exercises it more,
   678  	// which should help keep it correct.
   679  	SUBQ	$64, SP
   680  	ANDQ	$~15, SP
   681  	MOVQ	$0, 48(SP)		// where above code stores g, in case someone looks during debugging
   682  	MOVQ	DX, 40(SP)	// save original stack pointer
   683  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   684  	MOVQ	BX, CX		// CX = first argument in Win64
   685  	CALL	AX
   686  	MOVQ	40(SP), SI	// restore original stack pointer
   687  	MOVQ	SI, SP
   688  	MOVL	AX, ret+16(FP)
   689  	RET
   690  
   691  // func cgocallback(fn, frame unsafe.Pointer, framesize, ctxt uintptr)
   692  // Turn the fn into a Go func (by taking its address) and call
   693  // cgocallback_gofunc.
   694  TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   695  	LEAQ	fn+0(FP), AX
   696  	MOVQ	AX, 0(SP)
   697  	MOVQ	frame+8(FP), AX
   698  	MOVQ	AX, 8(SP)
   699  	MOVQ	framesize+16(FP), AX
   700  	MOVQ	AX, 16(SP)
   701  	MOVQ	ctxt+24(FP), AX
   702  	MOVQ	AX, 24(SP)
   703  	MOVQ	$runtime·cgocallback_gofunc(SB), AX
   704  	CALL	AX
   705  	RET
   706  
   707  // func cgocallback_gofunc(fn, frame, framesize, ctxt uintptr)
   708  // See cgocall.go for more details.
   709  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   710  	NO_LOCAL_POINTERS
   711  
   712  	// If g is nil, Go did not create the current thread.
   713  	// Call needm to obtain one m for temporary use.
   714  	// In this case, we're running on the thread stack, so there's
   715  	// lots of space, but the linker doesn't know. Hide the call from
   716  	// the linker analysis by using an indirect call through AX.
   717  	get_tls(CX)
   718  #ifdef GOOS_windows
   719  	MOVL	$0, BX
   720  	CMPQ	CX, $0
   721  	JEQ	2(PC)
   722  #endif
   723  	MOVQ	g(CX), BX
   724  	CMPQ	BX, $0
   725  	JEQ	needm
   726  	MOVQ	g_m(BX), BX
   727  	MOVQ	BX, R8 // holds oldm until end of function
   728  	JMP	havem
   729  needm:
   730  	MOVQ	$0, 0(SP)
   731  	MOVQ	$runtime·needm(SB), AX
   732  	CALL	AX
   733  	MOVQ	0(SP), R8
   734  	get_tls(CX)
   735  	MOVQ	g(CX), BX
   736  	MOVQ	g_m(BX), BX
   737  
   738  	// Set m->sched.sp = SP, so that if a panic happens
   739  	// during the function we are about to execute, it will
   740  	// have a valid SP to run on the g0 stack.
   741  	// The next few lines (after the havem label)
   742  	// will save this SP onto the stack and then write
   743  	// the same SP back to m->sched.sp. That seems redundant,
   744  	// but if an unrecovered panic happens, unwindm will
   745  	// restore the g->sched.sp from the stack location
   746  	// and then systemstack will try to use it. If we don't set it here,
   747  	// that restored SP will be uninitialized (typically 0) and
   748  	// will not be usable.
   749  	MOVQ	m_g0(BX), SI
   750  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   751  
   752  havem:
   753  	// Now there's a valid m, and we're running on its m->g0.
   754  	// Save current m->g0->sched.sp on stack and then set it to SP.
   755  	// Save current sp in m->g0->sched.sp in preparation for
   756  	// switch back to m->curg stack.
   757  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   758  	MOVQ	m_g0(BX), SI
   759  	MOVQ	(g_sched+gobuf_sp)(SI), AX
   760  	MOVQ	AX, 0(SP)
   761  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   762  
   763  	// Switch to m->curg stack and call runtime.cgocallbackg.
   764  	// Because we are taking over the execution of m->curg
   765  	// but *not* resuming what had been running, we need to
   766  	// save that information (m->curg->sched) so we can restore it.
   767  	// We can restore m->curg->sched.sp easily, because calling
   768  	// runtime.cgocallbackg leaves SP unchanged upon return.
   769  	// To save m->curg->sched.pc, we push it onto the stack.
   770  	// This has the added benefit that it looks to the traceback
   771  	// routine like cgocallbackg is going to return to that
   772  	// PC (because the frame we allocate below has the same
   773  	// size as cgocallback_gofunc's frame declared above)
   774  	// so that the traceback will seamlessly trace back into
   775  	// the earlier calls.
   776  	//
   777  	// In the new goroutine, 8(SP) holds the saved R8.
   778  	MOVQ	m_curg(BX), SI
   779  	MOVQ	SI, g(CX)
   780  	MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
   781  	MOVQ	(g_sched+gobuf_pc)(SI), BX
   782  	MOVQ	BX, -8(DI)
   783  	// Compute the size of the frame, including return PC and, if
   784  	// GOEXPERIMENT=framepointer, the saved base pointer
   785  	MOVQ	ctxt+24(FP), BX
   786  	LEAQ	fv+0(FP), AX
   787  	SUBQ	SP, AX
   788  	SUBQ	AX, DI
   789  	MOVQ	DI, SP
   790  
   791  	MOVQ	R8, 8(SP)
   792  	MOVQ	BX, 0(SP)
   793  	CALL	runtime·cgocallbackg(SB)
   794  	MOVQ	8(SP), R8
   795  
   796  	// Compute the size of the frame again. FP and SP have
   797  	// completely different values here than they did above,
   798  	// but only their difference matters.
   799  	LEAQ	fv+0(FP), AX
   800  	SUBQ	SP, AX
   801  
   802  	// Restore g->sched (== m->curg->sched) from saved values.
   803  	get_tls(CX)
   804  	MOVQ	g(CX), SI
   805  	MOVQ	SP, DI
   806  	ADDQ	AX, DI
   807  	MOVQ	-8(DI), BX
   808  	MOVQ	BX, (g_sched+gobuf_pc)(SI)
   809  	MOVQ	DI, (g_sched+gobuf_sp)(SI)
   810  
   811  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   812  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   813  	// so we do not have to restore it.)
   814  	MOVQ	g(CX), BX
   815  	MOVQ	g_m(BX), BX
   816  	MOVQ	m_g0(BX), SI
   817  	MOVQ	SI, g(CX)
   818  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   819  	MOVQ	0(SP), AX
   820  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   821  
   822  	// If the m on entry was nil, we called needm above to borrow an m
   823  	// for the duration of the call. Since the call is over, return it with dropm.
   824  	CMPQ	R8, $0
   825  	JNE 3(PC)
   826  	MOVQ	$runtime·dropm(SB), AX
   827  	CALL	AX
   828  
   829  	// Done!
   830  	RET
   831  
   832  // func setg(gg *g)
   833  // set g. for use by needm.
   834  TEXT runtime·setg(SB), NOSPLIT, $0-8
   835  	MOVQ	gg+0(FP), BX
   836  #ifdef GOOS_windows
   837  	CMPQ	BX, $0
   838  	JNE	settls
   839  	MOVQ	$0, 0x28(GS)
   840  	RET
   841  settls:
   842  	MOVQ	g_m(BX), AX
   843  	LEAQ	m_tls(AX), AX
   844  	MOVQ	AX, 0x28(GS)
   845  #endif
   846  	get_tls(CX)
   847  	MOVQ	BX, g(CX)
   848  	RET
   849  
   850  // void setg_gcc(G*); set g called from gcc.
   851  TEXT setg_gcc<>(SB),NOSPLIT,$0
   852  	get_tls(AX)
   853  	MOVQ	DI, g(AX)
   854  	RET
   855  
   856  TEXT runtime·abort(SB),NOSPLIT,$0-0
   857  	INT	$3
   858  loop:
   859  	JMP	loop
   860  
   861  // check that SP is in range [g->stack.lo, g->stack.hi)
   862  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   863  	get_tls(CX)
   864  	MOVQ	g(CX), AX
   865  	CMPQ	(g_stack+stack_hi)(AX), SP
   866  	JHI	2(PC)
   867  	CALL	runtime·abort(SB)
   868  	CMPQ	SP, (g_stack+stack_lo)(AX)
   869  	JHI	2(PC)
   870  	CALL	runtime·abort(SB)
   871  	RET
   872  
   873  // func cputicks() int64
   874  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   875  	CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   876  	JNE	mfence
   877  	LFENCE
   878  	JMP	done
   879  mfence:
   880  	MFENCE
   881  done:
   882  	RDTSC
   883  	SHLQ	$32, DX
   884  	ADDQ	DX, AX
   885  	MOVQ	AX, ret+0(FP)
   886  	RET
   887  
   888  // func memhash(p unsafe.Pointer, h, s uintptr) uintptr
   889  // hash function using AES hardware instructions
   890  TEXT runtime·memhash(SB),NOSPLIT,$0-32
   891  	CMPB	runtime·useAeshash(SB), $0
   892  	JEQ	noaes
   893  	MOVQ	p+0(FP), AX	// ptr to data
   894  	MOVQ	s+16(FP), CX	// size
   895  	LEAQ	ret+24(FP), DX
   896  	JMP	aeshashbody<>(SB)
   897  noaes:
   898  	JMP	runtime·memhashFallback(SB)
   899  
   900  // func strhash(p unsafe.Pointer, h uintptr) uintptr
   901  TEXT runtime·strhash(SB),NOSPLIT,$0-24
   902  	CMPB	runtime·useAeshash(SB), $0
   903  	JEQ	noaes
   904  	MOVQ	p+0(FP), AX	// ptr to string struct
   905  	MOVQ	8(AX), CX	// length of string
   906  	MOVQ	(AX), AX	// string data
   907  	LEAQ	ret+16(FP), DX
   908  	JMP	aeshashbody<>(SB)
   909  noaes:
   910  	JMP	runtime·strhashFallback(SB)
   911  
   912  // AX: data
   913  // CX: length
   914  // DX: address to put return value
   915  TEXT aeshashbody<>(SB),NOSPLIT,$0-0
   916  	// Fill an SSE register with our seeds.
   917  	MOVQ	h+8(FP), X0			// 64 bits of per-table hash seed
   918  	PINSRW	$4, CX, X0			// 16 bits of length
   919  	PSHUFHW $0, X0, X0			// repeat length 4 times total
   920  	MOVO	X0, X1				// save unscrambled seed
   921  	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
   922  	AESENC	X0, X0				// scramble seed
   923  
   924  	CMPQ	CX, $16
   925  	JB	aes0to15
   926  	JE	aes16
   927  	CMPQ	CX, $32
   928  	JBE	aes17to32
   929  	CMPQ	CX, $64
   930  	JBE	aes33to64
   931  	CMPQ	CX, $128
   932  	JBE	aes65to128
   933  	JMP	aes129plus
   934  
   935  aes0to15:
   936  	TESTQ	CX, CX
   937  	JE	aes0
   938  
   939  	ADDQ	$16, AX
   940  	TESTW	$0xff0, AX
   941  	JE	endofpage
   942  
   943  	// 16 bytes loaded at this address won't cross
   944  	// a page boundary, so we can load it directly.
   945  	MOVOU	-16(AX), X1
   946  	ADDQ	CX, CX
   947  	MOVQ	$masks<>(SB), AX
   948  	PAND	(AX)(CX*8), X1
   949  final1:
   950  	PXOR	X0, X1	// xor data with seed
   951  	AESENC	X1, X1	// scramble combo 3 times
   952  	AESENC	X1, X1
   953  	AESENC	X1, X1
   954  	MOVQ	X1, (DX)
   955  	RET
   956  
   957  endofpage:
   958  	// address ends in 1111xxxx. Might be up against
   959  	// a page boundary, so load ending at last byte.
   960  	// Then shift bytes down using pshufb.
   961  	MOVOU	-32(AX)(CX*1), X1
   962  	ADDQ	CX, CX
   963  	MOVQ	$shifts<>(SB), AX
   964  	PSHUFB	(AX)(CX*8), X1
   965  	JMP	final1
   966  
   967  aes0:
   968  	// Return scrambled input seed
   969  	AESENC	X0, X0
   970  	MOVQ	X0, (DX)
   971  	RET
   972  
   973  aes16:
   974  	MOVOU	(AX), X1
   975  	JMP	final1
   976  
   977  aes17to32:
   978  	// make second starting seed
   979  	PXOR	runtime·aeskeysched+16(SB), X1
   980  	AESENC	X1, X1
   981  
   982  	// load data to be hashed
   983  	MOVOU	(AX), X2
   984  	MOVOU	-16(AX)(CX*1), X3
   985  
   986  	// xor with seed
   987  	PXOR	X0, X2
   988  	PXOR	X1, X3
   989  
   990  	// scramble 3 times
   991  	AESENC	X2, X2
   992  	AESENC	X3, X3
   993  	AESENC	X2, X2
   994  	AESENC	X3, X3
   995  	AESENC	X2, X2
   996  	AESENC	X3, X3
   997  
   998  	// combine results
   999  	PXOR	X3, X2
  1000  	MOVQ	X2, (DX)
  1001  	RET
  1002  
  1003  aes33to64:
  1004  	// make 3 more starting seeds
  1005  	MOVO	X1, X2
  1006  	MOVO	X1, X3
  1007  	PXOR	runtime·aeskeysched+16(SB), X1
  1008  	PXOR	runtime·aeskeysched+32(SB), X2
  1009  	PXOR	runtime·aeskeysched+48(SB), X3
  1010  	AESENC	X1, X1
  1011  	AESENC	X2, X2
  1012  	AESENC	X3, X3
  1013  
  1014  	MOVOU	(AX), X4
  1015  	MOVOU	16(AX), X5
  1016  	MOVOU	-32(AX)(CX*1), X6
  1017  	MOVOU	-16(AX)(CX*1), X7
  1018  
  1019  	PXOR	X0, X4
  1020  	PXOR	X1, X5
  1021  	PXOR	X2, X6
  1022  	PXOR	X3, X7
  1023  
  1024  	AESENC	X4, X4
  1025  	AESENC	X5, X5
  1026  	AESENC	X6, X6
  1027  	AESENC	X7, X7
  1028  
  1029  	AESENC	X4, X4
  1030  	AESENC	X5, X5
  1031  	AESENC	X6, X6
  1032  	AESENC	X7, X7
  1033  
  1034  	AESENC	X4, X4
  1035  	AESENC	X5, X5
  1036  	AESENC	X6, X6
  1037  	AESENC	X7, X7
  1038  
  1039  	PXOR	X6, X4
  1040  	PXOR	X7, X5
  1041  	PXOR	X5, X4
  1042  	MOVQ	X4, (DX)
  1043  	RET
  1044  
  1045  aes65to128:
  1046  	// make 7 more starting seeds
  1047  	MOVO	X1, X2
  1048  	MOVO	X1, X3
  1049  	MOVO	X1, X4
  1050  	MOVO	X1, X5
  1051  	MOVO	X1, X6
  1052  	MOVO	X1, X7
  1053  	PXOR	runtime·aeskeysched+16(SB), X1
  1054  	PXOR	runtime·aeskeysched+32(SB), X2
  1055  	PXOR	runtime·aeskeysched+48(SB), X3
  1056  	PXOR	runtime·aeskeysched+64(SB), X4
  1057  	PXOR	runtime·aeskeysched+80(SB), X5
  1058  	PXOR	runtime·aeskeysched+96(SB), X6
  1059  	PXOR	runtime·aeskeysched+112(SB), X7
  1060  	AESENC	X1, X1
  1061  	AESENC	X2, X2
  1062  	AESENC	X3, X3
  1063  	AESENC	X4, X4
  1064  	AESENC	X5, X5
  1065  	AESENC	X6, X6
  1066  	AESENC	X7, X7
  1067  
  1068  	// load data
  1069  	MOVOU	(AX), X8
  1070  	MOVOU	16(AX), X9
  1071  	MOVOU	32(AX), X10
  1072  	MOVOU	48(AX), X11
  1073  	MOVOU	-64(AX)(CX*1), X12
  1074  	MOVOU	-48(AX)(CX*1), X13
  1075  	MOVOU	-32(AX)(CX*1), X14
  1076  	MOVOU	-16(AX)(CX*1), X15
  1077  
  1078  	// xor with seed
  1079  	PXOR	X0, X8
  1080  	PXOR	X1, X9
  1081  	PXOR	X2, X10
  1082  	PXOR	X3, X11
  1083  	PXOR	X4, X12
  1084  	PXOR	X5, X13
  1085  	PXOR	X6, X14
  1086  	PXOR	X7, X15
  1087  
  1088  	// scramble 3 times
  1089  	AESENC	X8, X8
  1090  	AESENC	X9, X9
  1091  	AESENC	X10, X10
  1092  	AESENC	X11, X11
  1093  	AESENC	X12, X12
  1094  	AESENC	X13, X13
  1095  	AESENC	X14, X14
  1096  	AESENC	X15, X15
  1097  
  1098  	AESENC	X8, X8
  1099  	AESENC	X9, X9
  1100  	AESENC	X10, X10
  1101  	AESENC	X11, X11
  1102  	AESENC	X12, X12
  1103  	AESENC	X13, X13
  1104  	AESENC	X14, X14
  1105  	AESENC	X15, X15
  1106  
  1107  	AESENC	X8, X8
  1108  	AESENC	X9, X9
  1109  	AESENC	X10, X10
  1110  	AESENC	X11, X11
  1111  	AESENC	X12, X12
  1112  	AESENC	X13, X13
  1113  	AESENC	X14, X14
  1114  	AESENC	X15, X15
  1115  
  1116  	// combine results
  1117  	PXOR	X12, X8
  1118  	PXOR	X13, X9
  1119  	PXOR	X14, X10
  1120  	PXOR	X15, X11
  1121  	PXOR	X10, X8
  1122  	PXOR	X11, X9
  1123  	PXOR	X9, X8
  1124  	MOVQ	X8, (DX)
  1125  	RET
  1126  
  1127  aes129plus:
  1128  	// make 7 more starting seeds
  1129  	MOVO	X1, X2
  1130  	MOVO	X1, X3
  1131  	MOVO	X1, X4
  1132  	MOVO	X1, X5
  1133  	MOVO	X1, X6
  1134  	MOVO	X1, X7
  1135  	PXOR	runtime·aeskeysched+16(SB), X1
  1136  	PXOR	runtime·aeskeysched+32(SB), X2
  1137  	PXOR	runtime·aeskeysched+48(SB), X3
  1138  	PXOR	runtime·aeskeysched+64(SB), X4
  1139  	PXOR	runtime·aeskeysched+80(SB), X5
  1140  	PXOR	runtime·aeskeysched+96(SB), X6
  1141  	PXOR	runtime·aeskeysched+112(SB), X7
  1142  	AESENC	X1, X1
  1143  	AESENC	X2, X2
  1144  	AESENC	X3, X3
  1145  	AESENC	X4, X4
  1146  	AESENC	X5, X5
  1147  	AESENC	X6, X6
  1148  	AESENC	X7, X7
  1149  
  1150  	// start with last (possibly overlapping) block
  1151  	MOVOU	-128(AX)(CX*1), X8
  1152  	MOVOU	-112(AX)(CX*1), X9
  1153  	MOVOU	-96(AX)(CX*1), X10
  1154  	MOVOU	-80(AX)(CX*1), X11
  1155  	MOVOU	-64(AX)(CX*1), X12
  1156  	MOVOU	-48(AX)(CX*1), X13
  1157  	MOVOU	-32(AX)(CX*1), X14
  1158  	MOVOU	-16(AX)(CX*1), X15
  1159  
  1160  	// xor in seed
  1161  	PXOR	X0, X8
  1162  	PXOR	X1, X9
  1163  	PXOR	X2, X10
  1164  	PXOR	X3, X11
  1165  	PXOR	X4, X12
  1166  	PXOR	X5, X13
  1167  	PXOR	X6, X14
  1168  	PXOR	X7, X15
  1169  
  1170  	// compute number of remaining 128-byte blocks
  1171  	DECQ	CX
  1172  	SHRQ	$7, CX
  1173  
  1174  aesloop:
  1175  	// scramble state
  1176  	AESENC	X8, X8
  1177  	AESENC	X9, X9
  1178  	AESENC	X10, X10
  1179  	AESENC	X11, X11
  1180  	AESENC	X12, X12
  1181  	AESENC	X13, X13
  1182  	AESENC	X14, X14
  1183  	AESENC	X15, X15
  1184  
  1185  	// scramble state, xor in a block
  1186  	MOVOU	(AX), X0
  1187  	MOVOU	16(AX), X1
  1188  	MOVOU	32(AX), X2
  1189  	MOVOU	48(AX), X3
  1190  	AESENC	X0, X8
  1191  	AESENC	X1, X9
  1192  	AESENC	X2, X10
  1193  	AESENC	X3, X11
  1194  	MOVOU	64(AX), X4
  1195  	MOVOU	80(AX), X5
  1196  	MOVOU	96(AX), X6
  1197  	MOVOU	112(AX), X7
  1198  	AESENC	X4, X12
  1199  	AESENC	X5, X13
  1200  	AESENC	X6, X14
  1201  	AESENC	X7, X15
  1202  
  1203  	ADDQ	$128, AX
  1204  	DECQ	CX
  1205  	JNE	aesloop
  1206  
  1207  	// 3 more scrambles to finish
  1208  	AESENC	X8, X8
  1209  	AESENC	X9, X9
  1210  	AESENC	X10, X10
  1211  	AESENC	X11, X11
  1212  	AESENC	X12, X12
  1213  	AESENC	X13, X13
  1214  	AESENC	X14, X14
  1215  	AESENC	X15, X15
  1216  	AESENC	X8, X8
  1217  	AESENC	X9, X9
  1218  	AESENC	X10, X10
  1219  	AESENC	X11, X11
  1220  	AESENC	X12, X12
  1221  	AESENC	X13, X13
  1222  	AESENC	X14, X14
  1223  	AESENC	X15, X15
  1224  	AESENC	X8, X8
  1225  	AESENC	X9, X9
  1226  	AESENC	X10, X10
  1227  	AESENC	X11, X11
  1228  	AESENC	X12, X12
  1229  	AESENC	X13, X13
  1230  	AESENC	X14, X14
  1231  	AESENC	X15, X15
  1232  
  1233  	PXOR	X12, X8
  1234  	PXOR	X13, X9
  1235  	PXOR	X14, X10
  1236  	PXOR	X15, X11
  1237  	PXOR	X10, X8
  1238  	PXOR	X11, X9
  1239  	PXOR	X9, X8
  1240  	MOVQ	X8, (DX)
  1241  	RET
  1242  
  1243  // func memhash32(p unsafe.Pointer, h uintptr) uintptr
  1244  TEXT runtime·memhash32(SB),NOSPLIT,$0-24
  1245  	CMPB	runtime·useAeshash(SB), $0
  1246  	JEQ	noaes
  1247  	JMP	runtime·memhash32Fallback(SB)
  1248  	MOVQ	p+0(FP), AX	// ptr to data
  1249  	MOVQ	h+8(FP), X0	// seed
  1250  	PINSRD	$2, (AX), X0	// data
  1251  	AESENC	runtime·aeskeysched+0(SB), X0
  1252  	AESENC	runtime·aeskeysched+16(SB), X0
  1253  	AESENC	runtime·aeskeysched+32(SB), X0
  1254  	MOVQ	X0, ret+16(FP)
  1255  	RET
  1256  noaes:
  1257  	JMP	runtime·memhash32Fallback(SB)
  1258  
  1259  // func memhash64(p unsafe.Pointer, h uintptr) uintptr
  1260  TEXT runtime·memhash64(SB),NOSPLIT,$0-24
  1261  	CMPB	runtime·useAeshash(SB), $0
  1262  	JEQ	noaes
  1263  	JMP	runtime·memhash64Fallback(SB)
  1264  	MOVQ	p+0(FP), AX	// ptr to data
  1265  	MOVQ	h+8(FP), X0	// seed
  1266  	PINSRQ	$1, (AX), X0	// data
  1267  	AESENC	runtime·aeskeysched+0(SB), X0
  1268  	AESENC	runtime·aeskeysched+16(SB), X0
  1269  	AESENC	runtime·aeskeysched+32(SB), X0
  1270  	MOVQ	X0, ret+16(FP)
  1271  	RET
  1272  noaes:
  1273  	JMP	runtime·memhash64Fallback(SB)
  1274  
  1275  // simple mask to get rid of data in the high part of the register.
  1276  DATA masks<>+0x00(SB)/8, $0x0000000000000000
  1277  DATA masks<>+0x08(SB)/8, $0x0000000000000000
  1278  DATA masks<>+0x10(SB)/8, $0x00000000000000ff
  1279  DATA masks<>+0x18(SB)/8, $0x0000000000000000
  1280  DATA masks<>+0x20(SB)/8, $0x000000000000ffff
  1281  DATA masks<>+0x28(SB)/8, $0x0000000000000000
  1282  DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
  1283  DATA masks<>+0x38(SB)/8, $0x0000000000000000
  1284  DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
  1285  DATA masks<>+0x48(SB)/8, $0x0000000000000000
  1286  DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
  1287  DATA masks<>+0x58(SB)/8, $0x0000000000000000
  1288  DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
  1289  DATA masks<>+0x68(SB)/8, $0x0000000000000000
  1290  DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
  1291  DATA masks<>+0x78(SB)/8, $0x0000000000000000
  1292  DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
  1293  DATA masks<>+0x88(SB)/8, $0x0000000000000000
  1294  DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
  1295  DATA masks<>+0x98(SB)/8, $0x00000000000000ff
  1296  DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
  1297  DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
  1298  DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
  1299  DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
  1300  DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
  1301  DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
  1302  DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
  1303  DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
  1304  DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
  1305  DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
  1306  DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
  1307  DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
  1308  GLOBL masks<>(SB),RODATA,$256
  1309  
  1310  // func checkASM() bool
  1311  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1312  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1313  	MOVQ	$masks<>(SB), AX
  1314  	MOVQ	$shifts<>(SB), BX
  1315  	ORQ	BX, AX
  1316  	TESTQ	$15, AX
  1317  	SETEQ	ret+0(FP)
  1318  	RET
  1319  
  1320  // these are arguments to pshufb. They move data down from
  1321  // the high bytes of the register to the low bytes of the register.
  1322  // index is how many bytes to move.
  1323  DATA shifts<>+0x00(SB)/8, $0x0000000000000000
  1324  DATA shifts<>+0x08(SB)/8, $0x0000000000000000
  1325  DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
  1326  DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
  1327  DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
  1328  DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
  1329  DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
  1330  DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
  1331  DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
  1332  DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
  1333  DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
  1334  DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
  1335  DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
  1336  DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
  1337  DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
  1338  DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
  1339  DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
  1340  DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
  1341  DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
  1342  DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
  1343  DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
  1344  DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
  1345  DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
  1346  DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
  1347  DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
  1348  DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
  1349  DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
  1350  DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
  1351  DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
  1352  DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
  1353  DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
  1354  DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
  1355  GLOBL shifts<>(SB),RODATA,$256
  1356  
  1357  TEXT runtime·return0(SB), NOSPLIT, $0
  1358  	MOVL	$0, AX
  1359  	RET
  1360  
  1361  
  1362  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1363  // Must obey the gcc calling convention.
  1364  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1365  	get_tls(CX)
  1366  	MOVQ	g(CX), AX
  1367  	MOVQ	g_m(AX), AX
  1368  	MOVQ	m_curg(AX), AX
  1369  	MOVQ	(g_stack+stack_hi)(AX), AX
  1370  	RET
  1371  
  1372  // The top-most function running on a goroutine
  1373  // returns to goexit+PCQuantum.
  1374  TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1375  	BYTE	$0x90	// NOP
  1376  	CALL	runtime·goexit1(SB)	// does not return
  1377  	// traceback from goexit1 must hit code range of goexit
  1378  	BYTE	$0x90	// NOP
  1379  
  1380  // This is called from .init_array and follows the platform, not Go, ABI.
  1381  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1382  	PUSHQ	R15 // The access to global variables below implicitly uses R15, which is callee-save
  1383  	MOVQ	runtime·lastmoduledatap(SB), AX
  1384  	MOVQ	DI, moduledata_next(AX)
  1385  	MOVQ	DI, runtime·lastmoduledatap(SB)
  1386  	POPQ	R15
  1387  	RET
  1388  
  1389  // gcWriteBarrier performs a heap pointer write and informs the GC.
  1390  //
  1391  // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
  1392  // - DI is the destination of the write
  1393  // - AX is the value being written at DI
  1394  // It clobbers FLAGS. It does not clobber any general-purpose registers,
  1395  // but may clobber others (e.g., SSE registers).
  1396  TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120
  1397  	// Save the registers clobbered by the fast path. This is slightly
  1398  	// faster than having the caller spill these.
  1399  	MOVQ	R14, 104(SP)
  1400  	MOVQ	R13, 112(SP)
  1401  	// TODO: Consider passing g.m.p in as an argument so they can be shared
  1402  	// across a sequence of write barriers.
  1403  	get_tls(R13)
  1404  	MOVQ	g(R13), R13
  1405  	MOVQ	g_m(R13), R13
  1406  	MOVQ	m_p(R13), R13
  1407  	MOVQ	(p_wbBuf+wbBuf_next)(R13), R14
  1408  	// Increment wbBuf.next position.
  1409  	LEAQ	16(R14), R14
  1410  	MOVQ	R14, (p_wbBuf+wbBuf_next)(R13)
  1411  	CMPQ	R14, (p_wbBuf+wbBuf_end)(R13)
  1412  	// Record the write.
  1413  	MOVQ	AX, -16(R14)	// Record value
  1414  	// Note: This turns bad pointer writes into bad
  1415  	// pointer reads, which could be confusing. We could avoid
  1416  	// reading from obviously bad pointers, which would
  1417  	// take care of the vast majority of these. We could
  1418  	// patch this up in the signal handler, or use XCHG to
  1419  	// combine the read and the write.
  1420  	MOVQ	(DI), R13
  1421  	MOVQ	R13, -8(R14)	// Record *slot
  1422  	// Is the buffer full? (flags set in CMPQ above)
  1423  	JEQ	flush
  1424  ret:
  1425  	MOVQ	104(SP), R14
  1426  	MOVQ	112(SP), R13
  1427  	// Do the write.
  1428  	MOVQ	AX, (DI)
  1429  	RET
  1430  
  1431  flush:
  1432  	// Save all general purpose registers since these could be
  1433  	// clobbered by wbBufFlush and were not saved by the caller.
  1434  	// It is possible for wbBufFlush to clobber other registers
  1435  	// (e.g., SSE registers), but the compiler takes care of saving
  1436  	// those in the caller if necessary. This strikes a balance
  1437  	// with registers that are likely to be used.
  1438  	//
  1439  	// We don't have type information for these, but all code under
  1440  	// here is NOSPLIT, so nothing will observe these.
  1441  	//
  1442  	// TODO: We could strike a different balance; e.g., saving X0
  1443  	// and not saving GP registers that are less likely to be used.
  1444  	MOVQ	DI, 0(SP)	// Also first argument to wbBufFlush
  1445  	MOVQ	AX, 8(SP)	// Also second argument to wbBufFlush
  1446  	MOVQ	BX, 16(SP)
  1447  	MOVQ	CX, 24(SP)
  1448  	MOVQ	DX, 32(SP)
  1449  	// DI already saved
  1450  	MOVQ	SI, 40(SP)
  1451  	MOVQ	BP, 48(SP)
  1452  	MOVQ	R8, 56(SP)
  1453  	MOVQ	R9, 64(SP)
  1454  	MOVQ	R10, 72(SP)
  1455  	MOVQ	R11, 80(SP)
  1456  	MOVQ	R12, 88(SP)
  1457  	// R13 already saved
  1458  	// R14 already saved
  1459  	MOVQ	R15, 96(SP)
  1460  
  1461  	// This takes arguments DI and AX
  1462  	CALL	runtime·wbBufFlush(SB)
  1463  
  1464  	MOVQ	0(SP), DI
  1465  	MOVQ	8(SP), AX
  1466  	MOVQ	16(SP), BX
  1467  	MOVQ	24(SP), CX
  1468  	MOVQ	32(SP), DX
  1469  	MOVQ	40(SP), SI
  1470  	MOVQ	48(SP), BP
  1471  	MOVQ	56(SP), R8
  1472  	MOVQ	64(SP), R9
  1473  	MOVQ	72(SP), R10
  1474  	MOVQ	80(SP), R11
  1475  	MOVQ	88(SP), R12
  1476  	MOVQ	96(SP), R15
  1477  	JMP	ret
  1478  
  1479  DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
  1480  GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
  1481  
  1482  // debugCallV1 is the entry point for debugger-injected function
  1483  // calls on running goroutines. It informs the runtime that a
  1484  // debug call has been injected and creates a call frame for the
  1485  // debugger to fill in.
  1486  //
  1487  // To inject a function call, a debugger should:
  1488  // 1. Check that the goroutine is in state _Grunning and that
  1489  //    there are at least 256 bytes free on the stack.
  1490  // 2. Push the current PC on the stack (updating SP).
  1491  // 3. Write the desired argument frame size at SP-16 (using the SP
  1492  //    after step 2).
  1493  // 4. Save all machine registers (including flags and XMM reigsters)
  1494  //    so they can be restored later by the debugger.
  1495  // 5. Set the PC to debugCallV1 and resume execution.
  1496  //
  1497  // If the goroutine is in state _Grunnable, then it's not generally
  1498  // safe to inject a call because it may return out via other runtime
  1499  // operations. Instead, the debugger should unwind the stack to find
  1500  // the return to non-runtime code, add a temporary breakpoint there,
  1501  // and inject the call once that breakpoint is hit.
  1502  //
  1503  // If the goroutine is in any other state, it's not safe to inject a call.
  1504  //
  1505  // This function communicates back to the debugger by setting RAX and
  1506  // invoking INT3 to raise a breakpoint signal. See the comments in the
  1507  // implementation for the protocol the debugger is expected to
  1508  // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
  1509  //
  1510  // The debugger must ensure that any pointers passed to the function
  1511  // obey escape analysis requirements. Specifically, it must not pass
  1512  // a stack pointer to an escaping argument. debugCallV1 cannot check
  1513  // this invariant.
  1514  TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0
  1515  	// Save all registers that may contain pointers in GC register
  1516  	// map order (see ssa.registersAMD64). This makes it possible
  1517  	// to copy the stack while updating pointers currently held in
  1518  	// registers, and for the GC to find roots in registers.
  1519  	//
  1520  	// We can't do anything that might clobber any of these
  1521  	// registers before this.
  1522  	MOVQ	R15, r15-(14*8+8)(SP)
  1523  	MOVQ	R14, r14-(13*8+8)(SP)
  1524  	MOVQ	R13, r13-(12*8+8)(SP)
  1525  	MOVQ	R12, r12-(11*8+8)(SP)
  1526  	MOVQ	R11, r11-(10*8+8)(SP)
  1527  	MOVQ	R10, r10-(9*8+8)(SP)
  1528  	MOVQ	R9, r9-(8*8+8)(SP)
  1529  	MOVQ	R8, r8-(7*8+8)(SP)
  1530  	MOVQ	DI, di-(6*8+8)(SP)
  1531  	MOVQ	SI, si-(5*8+8)(SP)
  1532  	MOVQ	BP, bp-(4*8+8)(SP)
  1533  	MOVQ	BX, bx-(3*8+8)(SP)
  1534  	MOVQ	DX, dx-(2*8+8)(SP)
  1535  	// Save the frame size before we clobber it. Either of the last
  1536  	// saves could clobber this depending on whether there's a saved BP.
  1537  	MOVQ	frameSize-24(FP), DX	// aka -16(RSP) before prologue
  1538  	MOVQ	CX, cx-(1*8+8)(SP)
  1539  	MOVQ	AX, ax-(0*8+8)(SP)
  1540  
  1541  	// Save the argument frame size.
  1542  	MOVQ	DX, frameSize-128(SP)
  1543  
  1544  	// Perform a safe-point check.
  1545  	MOVQ	retpc-8(FP), AX	// Caller's PC
  1546  	MOVQ	AX, 0(SP)
  1547  	CALL	runtime·debugCallCheck(SB)
  1548  	MOVQ	8(SP), AX
  1549  	TESTQ	AX, AX
  1550  	JZ	good
  1551  	// The safety check failed. Put the reason string at the top
  1552  	// of the stack.
  1553  	MOVQ	AX, 0(SP)
  1554  	MOVQ	16(SP), AX
  1555  	MOVQ	AX, 8(SP)
  1556  	// Set AX to 8 and invoke INT3. The debugger should get the
  1557  	// reason a call can't be injected from the top of the stack
  1558  	// and resume execution.
  1559  	MOVQ	$8, AX
  1560  	BYTE	$0xcc
  1561  	JMP	restore
  1562  
  1563  good:
  1564  	// Registers are saved and it's safe to make a call.
  1565  	// Open up a call frame, moving the stack if necessary.
  1566  	//
  1567  	// Once the frame is allocated, this will set AX to 0 and
  1568  	// invoke INT3. The debugger should write the argument
  1569  	// frame for the call at SP, push the trapping PC on the
  1570  	// stack, set the PC to the function to call, set RCX to point
  1571  	// to the closure (if a closure call), and resume execution.
  1572  	//
  1573  	// If the function returns, this will set AX to 1 and invoke
  1574  	// INT3. The debugger can then inspect any return value saved
  1575  	// on the stack at SP and resume execution again.
  1576  	//
  1577  	// If the function panics, this will set AX to 2 and invoke INT3.
  1578  	// The interface{} value of the panic will be at SP. The debugger
  1579  	// can inspect the panic value and resume execution again.
  1580  #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1581  	CMPQ	AX, $MAXSIZE;			\
  1582  	JA	5(PC);				\
  1583  	MOVQ	$NAME(SB), AX;			\
  1584  	MOVQ	AX, 0(SP);			\
  1585  	CALL	runtime·debugCallWrap(SB);	\
  1586  	JMP	restore
  1587  
  1588  	MOVQ	frameSize-128(SP), AX
  1589  	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1590  	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1591  	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1592  	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1593  	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1594  	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1595  	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1596  	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1597  	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1598  	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1599  	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1600  	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1601  	// The frame size is too large. Report the error.
  1602  	MOVQ	$debugCallFrameTooLarge<>(SB), AX
  1603  	MOVQ	AX, 0(SP)
  1604  	MOVQ	$20, 8(SP) // length of debugCallFrameTooLarge string
  1605  	MOVQ	$8, AX
  1606  	BYTE	$0xcc
  1607  	JMP	restore
  1608  
  1609  restore:
  1610  	// Calls and failures resume here.
  1611  	//
  1612  	// Set AX to 16 and invoke INT3. The debugger should restore
  1613  	// all registers except RIP and RSP and resume execution.
  1614  	MOVQ	$16, AX
  1615  	BYTE	$0xcc
  1616  	// We must not modify flags after this point.
  1617  
  1618  	// Restore pointer-containing registers, which may have been
  1619  	// modified from the debugger's copy by stack copying.
  1620  	MOVQ	ax-(0*8+8)(SP), AX
  1621  	MOVQ	cx-(1*8+8)(SP), CX
  1622  	MOVQ	dx-(2*8+8)(SP), DX
  1623  	MOVQ	bx-(3*8+8)(SP), BX
  1624  	MOVQ	bp-(4*8+8)(SP), BP
  1625  	MOVQ	si-(5*8+8)(SP), SI
  1626  	MOVQ	di-(6*8+8)(SP), DI
  1627  	MOVQ	r8-(7*8+8)(SP), R8
  1628  	MOVQ	r9-(8*8+8)(SP), R9
  1629  	MOVQ	r10-(9*8+8)(SP), R10
  1630  	MOVQ	r11-(10*8+8)(SP), R11
  1631  	MOVQ	r12-(11*8+8)(SP), R12
  1632  	MOVQ	r13-(12*8+8)(SP), R13
  1633  	MOVQ	r14-(13*8+8)(SP), R14
  1634  	MOVQ	r15-(14*8+8)(SP), R15
  1635  
  1636  	RET
  1637  
  1638  // runtime.debugCallCheck assumes that functions defined with the
  1639  // DEBUG_CALL_FN macro are safe points to inject calls.
  1640  #define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  1641  TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  1642  	NO_LOCAL_POINTERS;			\
  1643  	MOVQ	$0, AX;				\
  1644  	BYTE	$0xcc;				\
  1645  	MOVQ	$1, AX;				\
  1646  	BYTE	$0xcc;				\
  1647  	RET
  1648  DEBUG_CALL_FN(debugCall32<>, 32)
  1649  DEBUG_CALL_FN(debugCall64<>, 64)
  1650  DEBUG_CALL_FN(debugCall128<>, 128)
  1651  DEBUG_CALL_FN(debugCall256<>, 256)
  1652  DEBUG_CALL_FN(debugCall512<>, 512)
  1653  DEBUG_CALL_FN(debugCall1024<>, 1024)
  1654  DEBUG_CALL_FN(debugCall2048<>, 2048)
  1655  DEBUG_CALL_FN(debugCall4096<>, 4096)
  1656  DEBUG_CALL_FN(debugCall8192<>, 8192)
  1657  DEBUG_CALL_FN(debugCall16384<>, 16384)
  1658  DEBUG_CALL_FN(debugCall32768<>, 32768)
  1659  DEBUG_CALL_FN(debugCall65536<>, 65536)
  1660  
  1661  // func debugCallPanicked(val interface{})
  1662  TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  1663  	// Copy the panic value to the top of stack.
  1664  	MOVQ	val_type+0(FP), AX
  1665  	MOVQ	AX, 0(SP)
  1666  	MOVQ	val_data+8(FP), AX
  1667  	MOVQ	AX, 8(SP)
  1668  	MOVQ	$2, AX
  1669  	BYTE	$0xcc
  1670  	RET
  1671  
  1672  // Note: these functions use a special calling convention to save generated code space.
  1673  // Arguments are passed in registers, but the space for those arguments are allocated
  1674  // in the caller's stack frame. These stubs write the args into that stack space and
  1675  // then tail call to the corresponding runtime handler.
  1676  // The tail call makes these stubs disappear in backtraces.
  1677  TEXT runtime·panicIndex(SB),NOSPLIT,$0-16
  1678  	MOVQ	AX, x+0(FP)
  1679  	MOVQ	CX, y+8(FP)
  1680  	JMP	runtime·goPanicIndex(SB)
  1681  TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16
  1682  	MOVQ	AX, x+0(FP)
  1683  	MOVQ	CX, y+8(FP)
  1684  	JMP	runtime·goPanicIndexU(SB)
  1685  TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16
  1686  	MOVQ	CX, x+0(FP)
  1687  	MOVQ	DX, y+8(FP)
  1688  	JMP	runtime·goPanicSliceAlen(SB)
  1689  TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16
  1690  	MOVQ	CX, x+0(FP)
  1691  	MOVQ	DX, y+8(FP)
  1692  	JMP	runtime·goPanicSliceAlenU(SB)
  1693  TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16
  1694  	MOVQ	CX, x+0(FP)
  1695  	MOVQ	DX, y+8(FP)
  1696  	JMP	runtime·goPanicSliceAcap(SB)
  1697  TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16
  1698  	MOVQ	CX, x+0(FP)
  1699  	MOVQ	DX, y+8(FP)
  1700  	JMP	runtime·goPanicSliceAcapU(SB)
  1701  TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16
  1702  	MOVQ	AX, x+0(FP)
  1703  	MOVQ	CX, y+8(FP)
  1704  	JMP	runtime·goPanicSliceB(SB)
  1705  TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16
  1706  	MOVQ	AX, x+0(FP)
  1707  	MOVQ	CX, y+8(FP)
  1708  	JMP	runtime·goPanicSliceBU(SB)
  1709  TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16
  1710  	MOVQ	DX, x+0(FP)
  1711  	MOVQ	BX, y+8(FP)
  1712  	JMP	runtime·goPanicSlice3Alen(SB)
  1713  TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16
  1714  	MOVQ	DX, x+0(FP)
  1715  	MOVQ	BX, y+8(FP)
  1716  	JMP	runtime·goPanicSlice3AlenU(SB)
  1717  TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16
  1718  	MOVQ	DX, x+0(FP)
  1719  	MOVQ	BX, y+8(FP)
  1720  	JMP	runtime·goPanicSlice3Acap(SB)
  1721  TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16
  1722  	MOVQ	DX, x+0(FP)
  1723  	MOVQ	BX, y+8(FP)
  1724  	JMP	runtime·goPanicSlice3AcapU(SB)
  1725  TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16
  1726  	MOVQ	CX, x+0(FP)
  1727  	MOVQ	DX, y+8(FP)
  1728  	JMP	runtime·goPanicSlice3B(SB)
  1729  TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16
  1730  	MOVQ	CX, x+0(FP)
  1731  	MOVQ	DX, y+8(FP)
  1732  	JMP	runtime·goPanicSlice3BU(SB)
  1733  TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16
  1734  	MOVQ	AX, x+0(FP)
  1735  	MOVQ	CX, y+8(FP)
  1736  	JMP	runtime·goPanicSlice3C(SB)
  1737  TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16
  1738  	MOVQ	AX, x+0(FP)
  1739  	MOVQ	CX, y+8(FP)
  1740  	JMP	runtime·goPanicSlice3CU(SB)
  1741  
  1742  #ifdef GOOS_android
  1743  // Use the free TLS_SLOT_APP slot #2 on Android Q.
  1744  // Earlier androids are set up in gcc_android.c.
  1745  DATA runtime·tls_g+0(SB)/8, $16
  1746  GLOBL runtime·tls_g+0(SB), NOPTR, $8
  1747  #endif