github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/runtime/asm_amd64.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  // _rt0_amd64 is common startup code for most amd64 systems when using
    11  // internal linking. This is the entry point for the program from the
    12  // kernel for an ordinary -buildmode=exe program. The stack holds the
    13  // number of arguments and the C-style argv.
    14  TEXT _rt0_amd64(SB),NOSPLIT,$-8
    15  	MOVQ	0(SP), DI	// argc
    16  	LEAQ	8(SP), SI	// argv
    17  	JMP	runtime·rt0_go(SB)
    18  
    19  // main is common startup code for most amd64 systems when using
    20  // external linking. The C startup code will call the symbol "main"
    21  // passing argc and argv in the usual C ABI registers DI and SI.
    22  TEXT main(SB),NOSPLIT,$-8
    23  	JMP	runtime·rt0_go(SB)
    24  
    25  // _rt0_amd64_lib is common startup code for most amd64 systems when
    26  // using -buildmode=c-archive or -buildmode=c-shared. The linker will
    27  // arrange to invoke this function as a global constructor (for
    28  // c-archive) or when the shared library is loaded (for c-shared).
    29  // We expect argc and argv to be passed in the usual C ABI registers
    30  // DI and SI.
    31  TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50
    32  	// Align stack per ELF ABI requirements.
    33  	MOVQ	SP, AX
    34  	ANDQ	$~15, SP
    35  	// Save C ABI callee-saved registers, as caller may need them.
    36  	MOVQ	BX, 0x10(SP)
    37  	MOVQ	BP, 0x18(SP)
    38  	MOVQ	R12, 0x20(SP)
    39  	MOVQ	R13, 0x28(SP)
    40  	MOVQ	R14, 0x30(SP)
    41  	MOVQ	R15, 0x38(SP)
    42  	MOVQ	AX, 0x40(SP)
    43  
    44  	MOVQ	DI, _rt0_amd64_lib_argc<>(SB)
    45  	MOVQ	SI, _rt0_amd64_lib_argv<>(SB)
    46  
    47  	// Synchronous initialization.
    48  	CALL	runtime·libpreinit(SB)
    49  
    50  	// Create a new thread to finish Go runtime initialization.
    51  	MOVQ	_cgo_sys_thread_create(SB), AX
    52  	TESTQ	AX, AX
    53  	JZ	nocgo
    54  	MOVQ	$_rt0_amd64_lib_go(SB), DI
    55  	MOVQ	$0, SI
    56  	CALL	AX
    57  	JMP	restore
    58  
    59  nocgo:
    60  	MOVQ	$0x800000, 0(SP)		// stacksize
    61  	MOVQ	$_rt0_amd64_lib_go(SB), AX
    62  	MOVQ	AX, 8(SP)			// fn
    63  	CALL	runtime·newosproc0(SB)
    64  
    65  restore:
    66  	MOVQ	0x10(SP), BX
    67  	MOVQ	0x18(SP), BP
    68  	MOVQ	0x20(SP), R12
    69  	MOVQ	0x28(SP), R13
    70  	MOVQ	0x30(SP), R14
    71  	MOVQ	0x38(SP), R15
    72  	MOVQ	0x40(SP), SP
    73  	RET
    74  
    75  // _rt0_amd64_lib_go initializes the Go runtime.
    76  // This is started in a separate thread by _rt0_amd64_lib.
    77  TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
    78  	MOVQ	_rt0_amd64_lib_argc<>(SB), DI
    79  	MOVQ	_rt0_amd64_lib_argv<>(SB), SI
    80  	JMP	runtime·rt0_go(SB)
    81  
    82  DATA _rt0_amd64_lib_argc<>(SB)/8, $0
    83  GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
    84  DATA _rt0_amd64_lib_argv<>(SB)/8, $0
    85  GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
    86  
    87  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    88  	// copy arguments forward on an even stack
    89  	MOVQ	DI, AX		// argc
    90  	MOVQ	SI, BX		// argv
    91  	SUBQ	$(4*8+7), SP		// 2args 2auto
    92  	ANDQ	$~15, SP
    93  	MOVQ	AX, 16(SP)
    94  	MOVQ	BX, 24(SP)
    95  
    96  	// create istack out of the given (operating system) stack.
    97  	// _cgo_init may update stackguard.
    98  	MOVQ	$runtime·g0(SB), DI
    99  	LEAQ	(-64*1024+104)(SP), BX
   100  	MOVQ	BX, g_stackguard0(DI)
   101  	MOVQ	BX, g_stackguard1(DI)
   102  	MOVQ	BX, (g_stack+stack_lo)(DI)
   103  	MOVQ	SP, (g_stack+stack_hi)(DI)
   104  
   105  	// find out information about the processor we're on
   106  	MOVL	$0, AX
   107  	CPUID
   108  	MOVL	AX, SI
   109  	CMPL	AX, $0
   110  	JE	nocpuinfo
   111  
   112  	// Figure out how to serialize RDTSC.
   113  	// On Intel processors LFENCE is enough. AMD requires MFENCE.
   114  	// Don't know about the rest, so let's do MFENCE.
   115  	CMPL	BX, $0x756E6547  // "Genu"
   116  	JNE	notintel
   117  	CMPL	DX, $0x49656E69  // "ineI"
   118  	JNE	notintel
   119  	CMPL	CX, $0x6C65746E  // "ntel"
   120  	JNE	notintel
   121  	MOVB	$1, runtime·isIntel(SB)
   122  	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
   123  notintel:
   124  
   125  	// Load EAX=1 cpuid flags
   126  	MOVL	$1, AX
   127  	CPUID
   128  	MOVL	AX, runtime·processorVersionInfo(SB)
   129  
   130  nocpuinfo:
   131  	// if there is an _cgo_init, call it.
   132  	MOVQ	_cgo_init(SB), AX
   133  	TESTQ	AX, AX
   134  	JZ	needtls
   135  	// g0 already in DI
   136  	MOVQ	DI, CX	// Win64 uses CX for first parameter
   137  	MOVQ	$setg_gcc<>(SB), SI
   138  	CALL	AX
   139  
   140  	// update stackguard after _cgo_init
   141  	MOVQ	$runtime·g0(SB), CX
   142  	MOVQ	(g_stack+stack_lo)(CX), AX
   143  	ADDQ	$const__StackGuard, AX
   144  	MOVQ	AX, g_stackguard0(CX)
   145  	MOVQ	AX, g_stackguard1(CX)
   146  
   147  #ifndef GOOS_windows
   148  	JMP ok
   149  #endif
   150  needtls:
   151  #ifdef GOOS_plan9
   152  	// skip TLS setup on Plan 9
   153  	JMP ok
   154  #endif
   155  #ifdef GOOS_solaris
   156  	// skip TLS setup on Solaris
   157  	JMP ok
   158  #endif
   159  #ifdef GOOS_darwin
   160  	// skip TLS setup on Darwin
   161  	JMP ok
   162  #endif
   163  
   164  	LEAQ	runtime·m0+m_tls(SB), DI
   165  	CALL	runtime·settls(SB)
   166  
   167  	// store through it, to make sure it works
   168  	get_tls(BX)
   169  	MOVQ	$0x123, g(BX)
   170  	MOVQ	runtime·m0+m_tls(SB), AX
   171  	CMPQ	AX, $0x123
   172  	JEQ 2(PC)
   173  	CALL	runtime·abort(SB)
   174  ok:
   175  	// set the per-goroutine and per-mach "registers"
   176  	get_tls(BX)
   177  	LEAQ	runtime·g0(SB), CX
   178  	MOVQ	CX, g(BX)
   179  	LEAQ	runtime·m0(SB), AX
   180  
   181  	// save m->g0 = g0
   182  	MOVQ	CX, m_g0(AX)
   183  	// save m0 to g0->m
   184  	MOVQ	AX, g_m(CX)
   185  
   186  	CLD				// convention is D is always left cleared
   187  	CALL	runtime·check(SB)
   188  
   189  	MOVL	16(SP), AX		// copy argc
   190  	MOVL	AX, 0(SP)
   191  	MOVQ	24(SP), AX		// copy argv
   192  	MOVQ	AX, 8(SP)
   193  	CALL	runtime·args(SB)
   194  	CALL	runtime·osinit(SB)
   195  	CALL	runtime·schedinit(SB)
   196  
   197  	// create a new goroutine to start program
   198  	MOVQ	$runtime·mainPC(SB), AX		// entry
   199  	PUSHQ	AX
   200  	PUSHQ	$0			// arg size
   201  	CALL	runtime·newproc(SB)
   202  	POPQ	AX
   203  	POPQ	AX
   204  
   205  	// start this M
   206  	CALL	runtime·mstart(SB)
   207  
   208  	CALL	runtime·abort(SB)	// mstart should never return
   209  	RET
   210  
   211  	// Prevent dead-code elimination of debugCallV1, which is
   212  	// intended to be called by debuggers.
   213  	MOVQ	$runtime·debugCallV1(SB), AX
   214  	RET
   215  
   216  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
   217  GLOBL	runtime·mainPC(SB),RODATA,$8
   218  
   219  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   220  	BYTE	$0xcc
   221  	RET
   222  
   223  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   224  	// No per-thread init.
   225  	RET
   226  
   227  /*
   228   *  go-routine
   229   */
   230  
   231  // func gosave(buf *gobuf)
   232  // save state in Gobuf; setjmp
   233  TEXT runtime·gosave(SB), NOSPLIT, $0-8
   234  	MOVQ	buf+0(FP), AX		// gobuf
   235  	LEAQ	buf+0(FP), BX		// caller's SP
   236  	MOVQ	BX, gobuf_sp(AX)
   237  	MOVQ	0(SP), BX		// caller's PC
   238  	MOVQ	BX, gobuf_pc(AX)
   239  	MOVQ	$0, gobuf_ret(AX)
   240  	MOVQ	BP, gobuf_bp(AX)
   241  	// Assert ctxt is zero. See func save.
   242  	MOVQ	gobuf_ctxt(AX), BX
   243  	TESTQ	BX, BX
   244  	JZ	2(PC)
   245  	CALL	runtime·badctxt(SB)
   246  	get_tls(CX)
   247  	MOVQ	g(CX), BX
   248  	MOVQ	BX, gobuf_g(AX)
   249  	RET
   250  
   251  // func gogo(buf *gobuf)
   252  // restore state from Gobuf; longjmp
   253  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   254  	MOVQ	buf+0(FP), BX		// gobuf
   255  	MOVQ	gobuf_g(BX), DX
   256  	MOVQ	0(DX), CX		// make sure g != nil
   257  	get_tls(CX)
   258  	MOVQ	DX, g(CX)
   259  	MOVQ	gobuf_sp(BX), SP	// restore SP
   260  	MOVQ	gobuf_ret(BX), AX
   261  	MOVQ	gobuf_ctxt(BX), DX
   262  	MOVQ	gobuf_bp(BX), BP
   263  	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
   264  	MOVQ	$0, gobuf_ret(BX)
   265  	MOVQ	$0, gobuf_ctxt(BX)
   266  	MOVQ	$0, gobuf_bp(BX)
   267  	MOVQ	gobuf_pc(BX), BX
   268  	JMP	BX
   269  
   270  // func mcall(fn func(*g))
   271  // Switch to m->g0's stack, call fn(g).
   272  // Fn must never return. It should gogo(&g->sched)
   273  // to keep running g.
   274  TEXT runtime·mcall(SB), NOSPLIT, $0-8
   275  	MOVQ	fn+0(FP), DI
   276  
   277  	get_tls(CX)
   278  	MOVQ	g(CX), AX	// save state in g->sched
   279  	MOVQ	0(SP), BX	// caller's PC
   280  	MOVQ	BX, (g_sched+gobuf_pc)(AX)
   281  	LEAQ	fn+0(FP), BX	// caller's SP
   282  	MOVQ	BX, (g_sched+gobuf_sp)(AX)
   283  	MOVQ	AX, (g_sched+gobuf_g)(AX)
   284  	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   285  
   286  	// switch to m->g0 & its stack, call fn
   287  	MOVQ	g(CX), BX
   288  	MOVQ	g_m(BX), BX
   289  	MOVQ	m_g0(BX), SI
   290  	CMPQ	SI, AX	// if g == m->g0 call badmcall
   291  	JNE	3(PC)
   292  	MOVQ	$runtime·badmcall(SB), AX
   293  	JMP	AX
   294  	MOVQ	SI, g(CX)	// g = m->g0
   295  	MOVQ	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   296  	PUSHQ	AX
   297  	MOVQ	DI, DX
   298  	MOVQ	0(DI), DI
   299  	CALL	DI
   300  	POPQ	AX
   301  	MOVQ	$runtime·badmcall2(SB), AX
   302  	JMP	AX
   303  	RET
   304  
   305  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   306  // of the G stack. We need to distinguish the routine that
   307  // lives at the bottom of the G stack from the one that lives
   308  // at the top of the system stack because the one at the top of
   309  // the system stack terminates the stack walk (see topofstack()).
   310  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   311  	RET
   312  
   313  // func systemstack(fn func())
   314  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   315  	MOVQ	fn+0(FP), DI	// DI = fn
   316  	get_tls(CX)
   317  	MOVQ	g(CX), AX	// AX = g
   318  	MOVQ	g_m(AX), BX	// BX = m
   319  
   320  	CMPQ	AX, m_gsignal(BX)
   321  	JEQ	noswitch
   322  
   323  	MOVQ	m_g0(BX), DX	// DX = g0
   324  	CMPQ	AX, DX
   325  	JEQ	noswitch
   326  
   327  	CMPQ	AX, m_curg(BX)
   328  	JNE	bad
   329  
   330  	// switch stacks
   331  	// save our state in g->sched. Pretend to
   332  	// be systemstack_switch if the G stack is scanned.
   333  	MOVQ	$runtime·systemstack_switch(SB), SI
   334  	MOVQ	SI, (g_sched+gobuf_pc)(AX)
   335  	MOVQ	SP, (g_sched+gobuf_sp)(AX)
   336  	MOVQ	AX, (g_sched+gobuf_g)(AX)
   337  	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   338  
   339  	// switch to g0
   340  	MOVQ	DX, g(CX)
   341  	MOVQ	(g_sched+gobuf_sp)(DX), BX
   342  	// make it look like mstart called systemstack on g0, to stop traceback
   343  	SUBQ	$8, BX
   344  	MOVQ	$runtime·mstart(SB), DX
   345  	MOVQ	DX, 0(BX)
   346  	MOVQ	BX, SP
   347  
   348  	// call target function
   349  	MOVQ	DI, DX
   350  	MOVQ	0(DI), DI
   351  	CALL	DI
   352  
   353  	// switch back to g
   354  	get_tls(CX)
   355  	MOVQ	g(CX), AX
   356  	MOVQ	g_m(AX), BX
   357  	MOVQ	m_curg(BX), AX
   358  	MOVQ	AX, g(CX)
   359  	MOVQ	(g_sched+gobuf_sp)(AX), SP
   360  	MOVQ	$0, (g_sched+gobuf_sp)(AX)
   361  	RET
   362  
   363  noswitch:
   364  	// already on m stack; tail call the function
   365  	// Using a tail call here cleans up tracebacks since we won't stop
   366  	// at an intermediate systemstack.
   367  	MOVQ	DI, DX
   368  	MOVQ	0(DI), DI
   369  	JMP	DI
   370  
   371  bad:
   372  	// Bad: g is not gsignal, not g0, not curg. What is it?
   373  	MOVQ	$runtime·badsystemstack(SB), AX
   374  	CALL	AX
   375  	INT	$3
   376  
   377  
   378  /*
   379   * support for morestack
   380   */
   381  
   382  // Called during function prolog when more stack is needed.
   383  //
   384  // The traceback routines see morestack on a g0 as being
   385  // the top of a stack (for example, morestack calling newstack
   386  // calling the scheduler calling newm calling gc), so we must
   387  // record an argument size. For that purpose, it has no arguments.
   388  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   389  	// Cannot grow scheduler stack (m->g0).
   390  	get_tls(CX)
   391  	MOVQ	g(CX), BX
   392  	MOVQ	g_m(BX), BX
   393  	MOVQ	m_g0(BX), SI
   394  	CMPQ	g(CX), SI
   395  	JNE	3(PC)
   396  	CALL	runtime·badmorestackg0(SB)
   397  	CALL	runtime·abort(SB)
   398  
   399  	// Cannot grow signal stack (m->gsignal).
   400  	MOVQ	m_gsignal(BX), SI
   401  	CMPQ	g(CX), SI
   402  	JNE	3(PC)
   403  	CALL	runtime·badmorestackgsignal(SB)
   404  	CALL	runtime·abort(SB)
   405  
   406  	// Called from f.
   407  	// Set m->morebuf to f's caller.
   408  	MOVQ	8(SP), AX	// f's caller's PC
   409  	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
   410  	LEAQ	16(SP), AX	// f's caller's SP
   411  	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
   412  	get_tls(CX)
   413  	MOVQ	g(CX), SI
   414  	MOVQ	SI, (m_morebuf+gobuf_g)(BX)
   415  
   416  	// Set g->sched to context in f.
   417  	MOVQ	0(SP), AX // f's PC
   418  	MOVQ	AX, (g_sched+gobuf_pc)(SI)
   419  	MOVQ	SI, (g_sched+gobuf_g)(SI)
   420  	LEAQ	8(SP), AX // f's SP
   421  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   422  	MOVQ	BP, (g_sched+gobuf_bp)(SI)
   423  	MOVQ	DX, (g_sched+gobuf_ctxt)(SI)
   424  
   425  	// Call newstack on m->g0's stack.
   426  	MOVQ	m_g0(BX), BX
   427  	MOVQ	BX, g(CX)
   428  	MOVQ	(g_sched+gobuf_sp)(BX), SP
   429  	CALL	runtime·newstack(SB)
   430  	CALL	runtime·abort(SB)	// crash if newstack returns
   431  	RET
   432  
   433  // morestack but not preserving ctxt.
   434  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   435  	MOVL	$0, DX
   436  	JMP	runtime·morestack(SB)
   437  
   438  // reflectcall: call a function with the given argument list
   439  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   440  // we don't have variable-sized frames, so we use a small number
   441  // of constant-sized-frame functions to encode a few bits of size in the pc.
   442  // Caution: ugly multiline assembly macros in your future!
   443  
   444  #define DISPATCH(NAME,MAXSIZE)		\
   445  	CMPQ	CX, $MAXSIZE;		\
   446  	JA	3(PC);			\
   447  	MOVQ	$NAME(SB), AX;		\
   448  	JMP	AX
   449  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   450  
   451  TEXT ·reflectcall(SB), NOSPLIT, $0-32
   452  	MOVLQZX argsize+24(FP), CX
   453  	DISPATCH(runtime·call32, 32)
   454  	DISPATCH(runtime·call64, 64)
   455  	DISPATCH(runtime·call128, 128)
   456  	DISPATCH(runtime·call256, 256)
   457  	DISPATCH(runtime·call512, 512)
   458  	DISPATCH(runtime·call1024, 1024)
   459  	DISPATCH(runtime·call2048, 2048)
   460  	DISPATCH(runtime·call4096, 4096)
   461  	DISPATCH(runtime·call8192, 8192)
   462  	DISPATCH(runtime·call16384, 16384)
   463  	DISPATCH(runtime·call32768, 32768)
   464  	DISPATCH(runtime·call65536, 65536)
   465  	DISPATCH(runtime·call131072, 131072)
   466  	DISPATCH(runtime·call262144, 262144)
   467  	DISPATCH(runtime·call524288, 524288)
   468  	DISPATCH(runtime·call1048576, 1048576)
   469  	DISPATCH(runtime·call2097152, 2097152)
   470  	DISPATCH(runtime·call4194304, 4194304)
   471  	DISPATCH(runtime·call8388608, 8388608)
   472  	DISPATCH(runtime·call16777216, 16777216)
   473  	DISPATCH(runtime·call33554432, 33554432)
   474  	DISPATCH(runtime·call67108864, 67108864)
   475  	DISPATCH(runtime·call134217728, 134217728)
   476  	DISPATCH(runtime·call268435456, 268435456)
   477  	DISPATCH(runtime·call536870912, 536870912)
   478  	DISPATCH(runtime·call1073741824, 1073741824)
   479  	MOVQ	$runtime·badreflectcall(SB), AX
   480  	JMP	AX
   481  
   482  #define CALLFN(NAME,MAXSIZE)			\
   483  TEXT NAME(SB), WRAPPER, $MAXSIZE-32;		\
   484  	NO_LOCAL_POINTERS;			\
   485  	/* copy arguments to stack */		\
   486  	MOVQ	argptr+16(FP), SI;		\
   487  	MOVLQZX argsize+24(FP), CX;		\
   488  	MOVQ	SP, DI;				\
   489  	REP;MOVSB;				\
   490  	/* call function */			\
   491  	MOVQ	f+8(FP), DX;			\
   492  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   493  	CALL	(DX);				\
   494  	/* copy return values back */		\
   495  	MOVQ	argtype+0(FP), DX;		\
   496  	MOVQ	argptr+16(FP), DI;		\
   497  	MOVLQZX	argsize+24(FP), CX;		\
   498  	MOVLQZX	retoffset+28(FP), BX;		\
   499  	MOVQ	SP, SI;				\
   500  	ADDQ	BX, DI;				\
   501  	ADDQ	BX, SI;				\
   502  	SUBQ	BX, CX;				\
   503  	CALL	callRet<>(SB);			\
   504  	RET
   505  
   506  // callRet copies return values back at the end of call*. This is a
   507  // separate function so it can allocate stack space for the arguments
   508  // to reflectcallmove. It does not follow the Go ABI; it expects its
   509  // arguments in registers.
   510  TEXT callRet<>(SB), NOSPLIT, $32-0
   511  	NO_LOCAL_POINTERS
   512  	MOVQ	DX, 0(SP)
   513  	MOVQ	DI, 8(SP)
   514  	MOVQ	SI, 16(SP)
   515  	MOVQ	CX, 24(SP)
   516  	CALL	runtime·reflectcallmove(SB)
   517  	RET
   518  
   519  CALLFN(·call32, 32)
   520  CALLFN(·call64, 64)
   521  CALLFN(·call128, 128)
   522  CALLFN(·call256, 256)
   523  CALLFN(·call512, 512)
   524  CALLFN(·call1024, 1024)
   525  CALLFN(·call2048, 2048)
   526  CALLFN(·call4096, 4096)
   527  CALLFN(·call8192, 8192)
   528  CALLFN(·call16384, 16384)
   529  CALLFN(·call32768, 32768)
   530  CALLFN(·call65536, 65536)
   531  CALLFN(·call131072, 131072)
   532  CALLFN(·call262144, 262144)
   533  CALLFN(·call524288, 524288)
   534  CALLFN(·call1048576, 1048576)
   535  CALLFN(·call2097152, 2097152)
   536  CALLFN(·call4194304, 4194304)
   537  CALLFN(·call8388608, 8388608)
   538  CALLFN(·call16777216, 16777216)
   539  CALLFN(·call33554432, 33554432)
   540  CALLFN(·call67108864, 67108864)
   541  CALLFN(·call134217728, 134217728)
   542  CALLFN(·call268435456, 268435456)
   543  CALLFN(·call536870912, 536870912)
   544  CALLFN(·call1073741824, 1073741824)
   545  
   546  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   547  	MOVL	cycles+0(FP), AX
   548  again:
   549  	PAUSE
   550  	SUBL	$1, AX
   551  	JNZ	again
   552  	RET
   553  
   554  
   555  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   556  	// Stores are already ordered on x86, so this is just a
   557  	// compile barrier.
   558  	RET
   559  
   560  // func jmpdefer(fv *funcval, argp uintptr)
   561  // argp is a caller SP.
   562  // called from deferreturn.
   563  // 1. pop the caller
   564  // 2. sub 5 bytes from the callers return
   565  // 3. jmp to the argument
   566  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
   567  	MOVQ	fv+0(FP), DX	// fn
   568  	MOVQ	argp+8(FP), BX	// caller sp
   569  	LEAQ	-8(BX), SP	// caller sp after CALL
   570  	MOVQ	-8(SP), BP	// restore BP as if deferreturn returned (harmless if framepointers not in use)
   571  	SUBQ	$5, (SP)	// return to CALL again
   572  	MOVQ	0(DX), BX
   573  	JMP	BX	// but first run the deferred function
   574  
   575  // Save state of caller into g->sched. Smashes R8, R9.
   576  TEXT gosave<>(SB),NOSPLIT,$0
   577  	get_tls(R8)
   578  	MOVQ	g(R8), R8
   579  	MOVQ	0(SP), R9
   580  	MOVQ	R9, (g_sched+gobuf_pc)(R8)
   581  	LEAQ	8(SP), R9
   582  	MOVQ	R9, (g_sched+gobuf_sp)(R8)
   583  	MOVQ	$0, (g_sched+gobuf_ret)(R8)
   584  	MOVQ	BP, (g_sched+gobuf_bp)(R8)
   585  	// Assert ctxt is zero. See func save.
   586  	MOVQ	(g_sched+gobuf_ctxt)(R8), R9
   587  	TESTQ	R9, R9
   588  	JZ	2(PC)
   589  	CALL	runtime·badctxt(SB)
   590  	RET
   591  
   592  // func asmcgocall(fn, arg unsafe.Pointer) int32
   593  // Call fn(arg) on the scheduler stack,
   594  // aligned appropriately for the gcc ABI.
   595  // See cgocall.go for more details.
   596  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   597  	MOVQ	fn+0(FP), AX
   598  	MOVQ	arg+8(FP), BX
   599  
   600  	MOVQ	SP, DX
   601  
   602  	// Figure out if we need to switch to m->g0 stack.
   603  	// We get called to create new OS threads too, and those
   604  	// come in on the m->g0 stack already.
   605  	get_tls(CX)
   606  	MOVQ	g(CX), R8
   607  	CMPQ	R8, $0
   608  	JEQ	nosave
   609  	MOVQ	g_m(R8), R8
   610  	MOVQ	m_g0(R8), SI
   611  	MOVQ	g(CX), DI
   612  	CMPQ	SI, DI
   613  	JEQ	nosave
   614  	MOVQ	m_gsignal(R8), SI
   615  	CMPQ	SI, DI
   616  	JEQ	nosave
   617  
   618  	// Switch to system stack.
   619  	MOVQ	m_g0(R8), SI
   620  	CALL	gosave<>(SB)
   621  	MOVQ	SI, g(CX)
   622  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   623  
   624  	// Now on a scheduling stack (a pthread-created stack).
   625  	// Make sure we have enough room for 4 stack-backed fast-call
   626  	// registers as per windows amd64 calling convention.
   627  	SUBQ	$64, SP
   628  	ANDQ	$~15, SP	// alignment for gcc ABI
   629  	MOVQ	DI, 48(SP)	// save g
   630  	MOVQ	(g_stack+stack_hi)(DI), DI
   631  	SUBQ	DX, DI
   632  	MOVQ	DI, 40(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   633  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   634  	MOVQ	BX, CX		// CX = first argument in Win64
   635  	CALL	AX
   636  
   637  	// Restore registers, g, stack pointer.
   638  	get_tls(CX)
   639  	MOVQ	48(SP), DI
   640  	MOVQ	(g_stack+stack_hi)(DI), SI
   641  	SUBQ	40(SP), SI
   642  	MOVQ	DI, g(CX)
   643  	MOVQ	SI, SP
   644  
   645  	MOVL	AX, ret+16(FP)
   646  	RET
   647  
   648  nosave:
   649  	// Running on a system stack, perhaps even without a g.
   650  	// Having no g can happen during thread creation or thread teardown
   651  	// (see needm/dropm on Solaris, for example).
   652  	// This code is like the above sequence but without saving/restoring g
   653  	// and without worrying about the stack moving out from under us
   654  	// (because we're on a system stack, not a goroutine stack).
   655  	// The above code could be used directly if already on a system stack,
   656  	// but then the only path through this code would be a rare case on Solaris.
   657  	// Using this code for all "already on system stack" calls exercises it more,
   658  	// which should help keep it correct.
   659  	SUBQ	$64, SP
   660  	ANDQ	$~15, SP
   661  	MOVQ	$0, 48(SP)		// where above code stores g, in case someone looks during debugging
   662  	MOVQ	DX, 40(SP)	// save original stack pointer
   663  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   664  	MOVQ	BX, CX		// CX = first argument in Win64
   665  	CALL	AX
   666  	MOVQ	40(SP), SI	// restore original stack pointer
   667  	MOVQ	SI, SP
   668  	MOVL	AX, ret+16(FP)
   669  	RET
   670  
   671  // func cgocallback(fn, frame unsafe.Pointer, framesize, ctxt uintptr)
   672  // Turn the fn into a Go func (by taking its address) and call
   673  // cgocallback_gofunc.
   674  TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   675  	LEAQ	fn+0(FP), AX
   676  	MOVQ	AX, 0(SP)
   677  	MOVQ	frame+8(FP), AX
   678  	MOVQ	AX, 8(SP)
   679  	MOVQ	framesize+16(FP), AX
   680  	MOVQ	AX, 16(SP)
   681  	MOVQ	ctxt+24(FP), AX
   682  	MOVQ	AX, 24(SP)
   683  	MOVQ	$runtime·cgocallback_gofunc(SB), AX
   684  	CALL	AX
   685  	RET
   686  
   687  // func cgocallback_gofunc(fn, frame, framesize, ctxt uintptr)
   688  // See cgocall.go for more details.
   689  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   690  	NO_LOCAL_POINTERS
   691  
   692  	// If g is nil, Go did not create the current thread.
   693  	// Call needm to obtain one m for temporary use.
   694  	// In this case, we're running on the thread stack, so there's
   695  	// lots of space, but the linker doesn't know. Hide the call from
   696  	// the linker analysis by using an indirect call through AX.
   697  	get_tls(CX)
   698  #ifdef GOOS_windows
   699  	MOVL	$0, BX
   700  	CMPQ	CX, $0
   701  	JEQ	2(PC)
   702  #endif
   703  	MOVQ	g(CX), BX
   704  	CMPQ	BX, $0
   705  	JEQ	needm
   706  	MOVQ	g_m(BX), BX
   707  	MOVQ	BX, R8 // holds oldm until end of function
   708  	JMP	havem
   709  needm:
   710  	MOVQ	$0, 0(SP)
   711  	MOVQ	$runtime·needm(SB), AX
   712  	CALL	AX
   713  	MOVQ	0(SP), R8
   714  	get_tls(CX)
   715  	MOVQ	g(CX), BX
   716  	MOVQ	g_m(BX), BX
   717  
   718  	// Set m->sched.sp = SP, so that if a panic happens
   719  	// during the function we are about to execute, it will
   720  	// have a valid SP to run on the g0 stack.
   721  	// The next few lines (after the havem label)
   722  	// will save this SP onto the stack and then write
   723  	// the same SP back to m->sched.sp. That seems redundant,
   724  	// but if an unrecovered panic happens, unwindm will
   725  	// restore the g->sched.sp from the stack location
   726  	// and then systemstack will try to use it. If we don't set it here,
   727  	// that restored SP will be uninitialized (typically 0) and
   728  	// will not be usable.
   729  	MOVQ	m_g0(BX), SI
   730  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   731  
   732  havem:
   733  	// Now there's a valid m, and we're running on its m->g0.
   734  	// Save current m->g0->sched.sp on stack and then set it to SP.
   735  	// Save current sp in m->g0->sched.sp in preparation for
   736  	// switch back to m->curg stack.
   737  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   738  	MOVQ	m_g0(BX), SI
   739  	MOVQ	(g_sched+gobuf_sp)(SI), AX
   740  	MOVQ	AX, 0(SP)
   741  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   742  
   743  	// Switch to m->curg stack and call runtime.cgocallbackg.
   744  	// Because we are taking over the execution of m->curg
   745  	// but *not* resuming what had been running, we need to
   746  	// save that information (m->curg->sched) so we can restore it.
   747  	// We can restore m->curg->sched.sp easily, because calling
   748  	// runtime.cgocallbackg leaves SP unchanged upon return.
   749  	// To save m->curg->sched.pc, we push it onto the stack.
   750  	// This has the added benefit that it looks to the traceback
   751  	// routine like cgocallbackg is going to return to that
   752  	// PC (because the frame we allocate below has the same
   753  	// size as cgocallback_gofunc's frame declared above)
   754  	// so that the traceback will seamlessly trace back into
   755  	// the earlier calls.
   756  	//
   757  	// In the new goroutine, 8(SP) holds the saved R8.
   758  	MOVQ	m_curg(BX), SI
   759  	MOVQ	SI, g(CX)
   760  	MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
   761  	MOVQ	(g_sched+gobuf_pc)(SI), BX
   762  	MOVQ	BX, -8(DI)
   763  	// Compute the size of the frame, including return PC and, if
   764  	// GOEXPERIMENT=framepointer, the saved base pointer
   765  	MOVQ	ctxt+24(FP), BX
   766  	LEAQ	fv+0(FP), AX
   767  	SUBQ	SP, AX
   768  	SUBQ	AX, DI
   769  	MOVQ	DI, SP
   770  
   771  	MOVQ	R8, 8(SP)
   772  	MOVQ	BX, 0(SP)
   773  	CALL	runtime·cgocallbackg(SB)
   774  	MOVQ	8(SP), R8
   775  
   776  	// Compute the size of the frame again. FP and SP have
   777  	// completely different values here than they did above,
   778  	// but only their difference matters.
   779  	LEAQ	fv+0(FP), AX
   780  	SUBQ	SP, AX
   781  
   782  	// Restore g->sched (== m->curg->sched) from saved values.
   783  	get_tls(CX)
   784  	MOVQ	g(CX), SI
   785  	MOVQ	SP, DI
   786  	ADDQ	AX, DI
   787  	MOVQ	-8(DI), BX
   788  	MOVQ	BX, (g_sched+gobuf_pc)(SI)
   789  	MOVQ	DI, (g_sched+gobuf_sp)(SI)
   790  
   791  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   792  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   793  	// so we do not have to restore it.)
   794  	MOVQ	g(CX), BX
   795  	MOVQ	g_m(BX), BX
   796  	MOVQ	m_g0(BX), SI
   797  	MOVQ	SI, g(CX)
   798  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   799  	MOVQ	0(SP), AX
   800  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   801  
   802  	// If the m on entry was nil, we called needm above to borrow an m
   803  	// for the duration of the call. Since the call is over, return it with dropm.
   804  	CMPQ	R8, $0
   805  	JNE 3(PC)
   806  	MOVQ	$runtime·dropm(SB), AX
   807  	CALL	AX
   808  
   809  	// Done!
   810  	RET
   811  
   812  // func setg(gg *g)
   813  // set g. for use by needm.
   814  TEXT runtime·setg(SB), NOSPLIT, $0-8
   815  	MOVQ	gg+0(FP), BX
   816  #ifdef GOOS_windows
   817  	CMPQ	BX, $0
   818  	JNE	settls
   819  	MOVQ	$0, 0x28(GS)
   820  	RET
   821  settls:
   822  	MOVQ	g_m(BX), AX
   823  	LEAQ	m_tls(AX), AX
   824  	MOVQ	AX, 0x28(GS)
   825  #endif
   826  	get_tls(CX)
   827  	MOVQ	BX, g(CX)
   828  	RET
   829  
   830  // void setg_gcc(G*); set g called from gcc.
   831  TEXT setg_gcc<>(SB),NOSPLIT,$0
   832  	get_tls(AX)
   833  	MOVQ	DI, g(AX)
   834  	RET
   835  
   836  TEXT runtime·abort(SB),NOSPLIT,$0-0
   837  	INT	$3
   838  loop:
   839  	JMP	loop
   840  
   841  // check that SP is in range [g->stack.lo, g->stack.hi)
   842  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   843  	get_tls(CX)
   844  	MOVQ	g(CX), AX
   845  	CMPQ	(g_stack+stack_hi)(AX), SP
   846  	JHI	2(PC)
   847  	CALL	runtime·abort(SB)
   848  	CMPQ	SP, (g_stack+stack_lo)(AX)
   849  	JHI	2(PC)
   850  	CALL	runtime·abort(SB)
   851  	RET
   852  
   853  // func cputicks() int64
   854  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   855  	CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   856  	JNE	mfence
   857  	LFENCE
   858  	JMP	done
   859  mfence:
   860  	MFENCE
   861  done:
   862  	RDTSC
   863  	SHLQ	$32, DX
   864  	ADDQ	DX, AX
   865  	MOVQ	AX, ret+0(FP)
   866  	RET
   867  
   868  // func aeshash(p unsafe.Pointer, h, s uintptr) uintptr
   869  // hash function using AES hardware instructions
   870  TEXT runtime·aeshash(SB),NOSPLIT,$0-32
   871  	MOVQ	p+0(FP), AX	// ptr to data
   872  	MOVQ	s+16(FP), CX	// size
   873  	LEAQ	ret+24(FP), DX
   874  	JMP	runtime·aeshashbody(SB)
   875  
   876  // func aeshashstr(p unsafe.Pointer, h uintptr) uintptr
   877  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24
   878  	MOVQ	p+0(FP), AX	// ptr to string struct
   879  	MOVQ	8(AX), CX	// length of string
   880  	MOVQ	(AX), AX	// string data
   881  	LEAQ	ret+16(FP), DX
   882  	JMP	runtime·aeshashbody(SB)
   883  
   884  // AX: data
   885  // CX: length
   886  // DX: address to put return value
   887  TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
   888  	// Fill an SSE register with our seeds.
   889  	MOVQ	h+8(FP), X0			// 64 bits of per-table hash seed
   890  	PINSRW	$4, CX, X0			// 16 bits of length
   891  	PSHUFHW $0, X0, X0			// repeat length 4 times total
   892  	MOVO	X0, X1				// save unscrambled seed
   893  	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
   894  	AESENC	X0, X0				// scramble seed
   895  
   896  	CMPQ	CX, $16
   897  	JB	aes0to15
   898  	JE	aes16
   899  	CMPQ	CX, $32
   900  	JBE	aes17to32
   901  	CMPQ	CX, $64
   902  	JBE	aes33to64
   903  	CMPQ	CX, $128
   904  	JBE	aes65to128
   905  	JMP	aes129plus
   906  
   907  aes0to15:
   908  	TESTQ	CX, CX
   909  	JE	aes0
   910  
   911  	ADDQ	$16, AX
   912  	TESTW	$0xff0, AX
   913  	JE	endofpage
   914  
   915  	// 16 bytes loaded at this address won't cross
   916  	// a page boundary, so we can load it directly.
   917  	MOVOU	-16(AX), X1
   918  	ADDQ	CX, CX
   919  	MOVQ	$masks<>(SB), AX
   920  	PAND	(AX)(CX*8), X1
   921  final1:
   922  	PXOR	X0, X1	// xor data with seed
   923  	AESENC	X1, X1	// scramble combo 3 times
   924  	AESENC	X1, X1
   925  	AESENC	X1, X1
   926  	MOVQ	X1, (DX)
   927  	RET
   928  
   929  endofpage:
   930  	// address ends in 1111xxxx. Might be up against
   931  	// a page boundary, so load ending at last byte.
   932  	// Then shift bytes down using pshufb.
   933  	MOVOU	-32(AX)(CX*1), X1
   934  	ADDQ	CX, CX
   935  	MOVQ	$shifts<>(SB), AX
   936  	PSHUFB	(AX)(CX*8), X1
   937  	JMP	final1
   938  
   939  aes0:
   940  	// Return scrambled input seed
   941  	AESENC	X0, X0
   942  	MOVQ	X0, (DX)
   943  	RET
   944  
   945  aes16:
   946  	MOVOU	(AX), X1
   947  	JMP	final1
   948  
   949  aes17to32:
   950  	// make second starting seed
   951  	PXOR	runtime·aeskeysched+16(SB), X1
   952  	AESENC	X1, X1
   953  
   954  	// load data to be hashed
   955  	MOVOU	(AX), X2
   956  	MOVOU	-16(AX)(CX*1), X3
   957  
   958  	// xor with seed
   959  	PXOR	X0, X2
   960  	PXOR	X1, X3
   961  
   962  	// scramble 3 times
   963  	AESENC	X2, X2
   964  	AESENC	X3, X3
   965  	AESENC	X2, X2
   966  	AESENC	X3, X3
   967  	AESENC	X2, X2
   968  	AESENC	X3, X3
   969  
   970  	// combine results
   971  	PXOR	X3, X2
   972  	MOVQ	X2, (DX)
   973  	RET
   974  
   975  aes33to64:
   976  	// make 3 more starting seeds
   977  	MOVO	X1, X2
   978  	MOVO	X1, X3
   979  	PXOR	runtime·aeskeysched+16(SB), X1
   980  	PXOR	runtime·aeskeysched+32(SB), X2
   981  	PXOR	runtime·aeskeysched+48(SB), X3
   982  	AESENC	X1, X1
   983  	AESENC	X2, X2
   984  	AESENC	X3, X3
   985  
   986  	MOVOU	(AX), X4
   987  	MOVOU	16(AX), X5
   988  	MOVOU	-32(AX)(CX*1), X6
   989  	MOVOU	-16(AX)(CX*1), X7
   990  
   991  	PXOR	X0, X4
   992  	PXOR	X1, X5
   993  	PXOR	X2, X6
   994  	PXOR	X3, X7
   995  
   996  	AESENC	X4, X4
   997  	AESENC	X5, X5
   998  	AESENC	X6, X6
   999  	AESENC	X7, X7
  1000  
  1001  	AESENC	X4, X4
  1002  	AESENC	X5, X5
  1003  	AESENC	X6, X6
  1004  	AESENC	X7, X7
  1005  
  1006  	AESENC	X4, X4
  1007  	AESENC	X5, X5
  1008  	AESENC	X6, X6
  1009  	AESENC	X7, X7
  1010  
  1011  	PXOR	X6, X4
  1012  	PXOR	X7, X5
  1013  	PXOR	X5, X4
  1014  	MOVQ	X4, (DX)
  1015  	RET
  1016  
  1017  aes65to128:
  1018  	// make 7 more starting seeds
  1019  	MOVO	X1, X2
  1020  	MOVO	X1, X3
  1021  	MOVO	X1, X4
  1022  	MOVO	X1, X5
  1023  	MOVO	X1, X6
  1024  	MOVO	X1, X7
  1025  	PXOR	runtime·aeskeysched+16(SB), X1
  1026  	PXOR	runtime·aeskeysched+32(SB), X2
  1027  	PXOR	runtime·aeskeysched+48(SB), X3
  1028  	PXOR	runtime·aeskeysched+64(SB), X4
  1029  	PXOR	runtime·aeskeysched+80(SB), X5
  1030  	PXOR	runtime·aeskeysched+96(SB), X6
  1031  	PXOR	runtime·aeskeysched+112(SB), X7
  1032  	AESENC	X1, X1
  1033  	AESENC	X2, X2
  1034  	AESENC	X3, X3
  1035  	AESENC	X4, X4
  1036  	AESENC	X5, X5
  1037  	AESENC	X6, X6
  1038  	AESENC	X7, X7
  1039  
  1040  	// load data
  1041  	MOVOU	(AX), X8
  1042  	MOVOU	16(AX), X9
  1043  	MOVOU	32(AX), X10
  1044  	MOVOU	48(AX), X11
  1045  	MOVOU	-64(AX)(CX*1), X12
  1046  	MOVOU	-48(AX)(CX*1), X13
  1047  	MOVOU	-32(AX)(CX*1), X14
  1048  	MOVOU	-16(AX)(CX*1), X15
  1049  
  1050  	// xor with seed
  1051  	PXOR	X0, X8
  1052  	PXOR	X1, X9
  1053  	PXOR	X2, X10
  1054  	PXOR	X3, X11
  1055  	PXOR	X4, X12
  1056  	PXOR	X5, X13
  1057  	PXOR	X6, X14
  1058  	PXOR	X7, X15
  1059  
  1060  	// scramble 3 times
  1061  	AESENC	X8, X8
  1062  	AESENC	X9, X9
  1063  	AESENC	X10, X10
  1064  	AESENC	X11, X11
  1065  	AESENC	X12, X12
  1066  	AESENC	X13, X13
  1067  	AESENC	X14, X14
  1068  	AESENC	X15, X15
  1069  
  1070  	AESENC	X8, X8
  1071  	AESENC	X9, X9
  1072  	AESENC	X10, X10
  1073  	AESENC	X11, X11
  1074  	AESENC	X12, X12
  1075  	AESENC	X13, X13
  1076  	AESENC	X14, X14
  1077  	AESENC	X15, X15
  1078  
  1079  	AESENC	X8, X8
  1080  	AESENC	X9, X9
  1081  	AESENC	X10, X10
  1082  	AESENC	X11, X11
  1083  	AESENC	X12, X12
  1084  	AESENC	X13, X13
  1085  	AESENC	X14, X14
  1086  	AESENC	X15, X15
  1087  
  1088  	// combine results
  1089  	PXOR	X12, X8
  1090  	PXOR	X13, X9
  1091  	PXOR	X14, X10
  1092  	PXOR	X15, X11
  1093  	PXOR	X10, X8
  1094  	PXOR	X11, X9
  1095  	PXOR	X9, X8
  1096  	MOVQ	X8, (DX)
  1097  	RET
  1098  
  1099  aes129plus:
  1100  	// make 7 more starting seeds
  1101  	MOVO	X1, X2
  1102  	MOVO	X1, X3
  1103  	MOVO	X1, X4
  1104  	MOVO	X1, X5
  1105  	MOVO	X1, X6
  1106  	MOVO	X1, X7
  1107  	PXOR	runtime·aeskeysched+16(SB), X1
  1108  	PXOR	runtime·aeskeysched+32(SB), X2
  1109  	PXOR	runtime·aeskeysched+48(SB), X3
  1110  	PXOR	runtime·aeskeysched+64(SB), X4
  1111  	PXOR	runtime·aeskeysched+80(SB), X5
  1112  	PXOR	runtime·aeskeysched+96(SB), X6
  1113  	PXOR	runtime·aeskeysched+112(SB), X7
  1114  	AESENC	X1, X1
  1115  	AESENC	X2, X2
  1116  	AESENC	X3, X3
  1117  	AESENC	X4, X4
  1118  	AESENC	X5, X5
  1119  	AESENC	X6, X6
  1120  	AESENC	X7, X7
  1121  
  1122  	// start with last (possibly overlapping) block
  1123  	MOVOU	-128(AX)(CX*1), X8
  1124  	MOVOU	-112(AX)(CX*1), X9
  1125  	MOVOU	-96(AX)(CX*1), X10
  1126  	MOVOU	-80(AX)(CX*1), X11
  1127  	MOVOU	-64(AX)(CX*1), X12
  1128  	MOVOU	-48(AX)(CX*1), X13
  1129  	MOVOU	-32(AX)(CX*1), X14
  1130  	MOVOU	-16(AX)(CX*1), X15
  1131  
  1132  	// xor in seed
  1133  	PXOR	X0, X8
  1134  	PXOR	X1, X9
  1135  	PXOR	X2, X10
  1136  	PXOR	X3, X11
  1137  	PXOR	X4, X12
  1138  	PXOR	X5, X13
  1139  	PXOR	X6, X14
  1140  	PXOR	X7, X15
  1141  
  1142  	// compute number of remaining 128-byte blocks
  1143  	DECQ	CX
  1144  	SHRQ	$7, CX
  1145  
  1146  aesloop:
  1147  	// scramble state
  1148  	AESENC	X8, X8
  1149  	AESENC	X9, X9
  1150  	AESENC	X10, X10
  1151  	AESENC	X11, X11
  1152  	AESENC	X12, X12
  1153  	AESENC	X13, X13
  1154  	AESENC	X14, X14
  1155  	AESENC	X15, X15
  1156  
  1157  	// scramble state, xor in a block
  1158  	MOVOU	(AX), X0
  1159  	MOVOU	16(AX), X1
  1160  	MOVOU	32(AX), X2
  1161  	MOVOU	48(AX), X3
  1162  	AESENC	X0, X8
  1163  	AESENC	X1, X9
  1164  	AESENC	X2, X10
  1165  	AESENC	X3, X11
  1166  	MOVOU	64(AX), X4
  1167  	MOVOU	80(AX), X5
  1168  	MOVOU	96(AX), X6
  1169  	MOVOU	112(AX), X7
  1170  	AESENC	X4, X12
  1171  	AESENC	X5, X13
  1172  	AESENC	X6, X14
  1173  	AESENC	X7, X15
  1174  
  1175  	ADDQ	$128, AX
  1176  	DECQ	CX
  1177  	JNE	aesloop
  1178  
  1179  	// 3 more scrambles to finish
  1180  	AESENC	X8, X8
  1181  	AESENC	X9, X9
  1182  	AESENC	X10, X10
  1183  	AESENC	X11, X11
  1184  	AESENC	X12, X12
  1185  	AESENC	X13, X13
  1186  	AESENC	X14, X14
  1187  	AESENC	X15, X15
  1188  	AESENC	X8, X8
  1189  	AESENC	X9, X9
  1190  	AESENC	X10, X10
  1191  	AESENC	X11, X11
  1192  	AESENC	X12, X12
  1193  	AESENC	X13, X13
  1194  	AESENC	X14, X14
  1195  	AESENC	X15, X15
  1196  	AESENC	X8, X8
  1197  	AESENC	X9, X9
  1198  	AESENC	X10, X10
  1199  	AESENC	X11, X11
  1200  	AESENC	X12, X12
  1201  	AESENC	X13, X13
  1202  	AESENC	X14, X14
  1203  	AESENC	X15, X15
  1204  
  1205  	PXOR	X12, X8
  1206  	PXOR	X13, X9
  1207  	PXOR	X14, X10
  1208  	PXOR	X15, X11
  1209  	PXOR	X10, X8
  1210  	PXOR	X11, X9
  1211  	PXOR	X9, X8
  1212  	MOVQ	X8, (DX)
  1213  	RET
  1214  
  1215  // func aeshash32(p unsafe.Pointer, h uintptr) uintptr
  1216  TEXT runtime·aeshash32(SB),NOSPLIT,$0-24
  1217  	MOVQ	p+0(FP), AX	// ptr to data
  1218  	MOVQ	h+8(FP), X0	// seed
  1219  	PINSRD	$2, (AX), X0	// data
  1220  	AESENC	runtime·aeskeysched+0(SB), X0
  1221  	AESENC	runtime·aeskeysched+16(SB), X0
  1222  	AESENC	runtime·aeskeysched+32(SB), X0
  1223  	MOVQ	X0, ret+16(FP)
  1224  	RET
  1225  
  1226  // func aeshash64(p unsafe.Pointer, h uintptr) uintptr
  1227  TEXT runtime·aeshash64(SB),NOSPLIT,$0-24
  1228  	MOVQ	p+0(FP), AX	// ptr to data
  1229  	MOVQ	h+8(FP), X0	// seed
  1230  	PINSRQ	$1, (AX), X0	// data
  1231  	AESENC	runtime·aeskeysched+0(SB), X0
  1232  	AESENC	runtime·aeskeysched+16(SB), X0
  1233  	AESENC	runtime·aeskeysched+32(SB), X0
  1234  	MOVQ	X0, ret+16(FP)
  1235  	RET
  1236  
  1237  // simple mask to get rid of data in the high part of the register.
  1238  DATA masks<>+0x00(SB)/8, $0x0000000000000000
  1239  DATA masks<>+0x08(SB)/8, $0x0000000000000000
  1240  DATA masks<>+0x10(SB)/8, $0x00000000000000ff
  1241  DATA masks<>+0x18(SB)/8, $0x0000000000000000
  1242  DATA masks<>+0x20(SB)/8, $0x000000000000ffff
  1243  DATA masks<>+0x28(SB)/8, $0x0000000000000000
  1244  DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
  1245  DATA masks<>+0x38(SB)/8, $0x0000000000000000
  1246  DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
  1247  DATA masks<>+0x48(SB)/8, $0x0000000000000000
  1248  DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
  1249  DATA masks<>+0x58(SB)/8, $0x0000000000000000
  1250  DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
  1251  DATA masks<>+0x68(SB)/8, $0x0000000000000000
  1252  DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
  1253  DATA masks<>+0x78(SB)/8, $0x0000000000000000
  1254  DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
  1255  DATA masks<>+0x88(SB)/8, $0x0000000000000000
  1256  DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
  1257  DATA masks<>+0x98(SB)/8, $0x00000000000000ff
  1258  DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
  1259  DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
  1260  DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
  1261  DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
  1262  DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
  1263  DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
  1264  DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
  1265  DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
  1266  DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
  1267  DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
  1268  DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
  1269  DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
  1270  GLOBL masks<>(SB),RODATA,$256
  1271  
  1272  // func checkASM() bool
  1273  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1274  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1275  	MOVQ	$masks<>(SB), AX
  1276  	MOVQ	$shifts<>(SB), BX
  1277  	ORQ	BX, AX
  1278  	TESTQ	$15, AX
  1279  	SETEQ	ret+0(FP)
  1280  	RET
  1281  
  1282  // these are arguments to pshufb. They move data down from
  1283  // the high bytes of the register to the low bytes of the register.
  1284  // index is how many bytes to move.
  1285  DATA shifts<>+0x00(SB)/8, $0x0000000000000000
  1286  DATA shifts<>+0x08(SB)/8, $0x0000000000000000
  1287  DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
  1288  DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
  1289  DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
  1290  DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
  1291  DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
  1292  DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
  1293  DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
  1294  DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
  1295  DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
  1296  DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
  1297  DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
  1298  DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
  1299  DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
  1300  DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
  1301  DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
  1302  DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
  1303  DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
  1304  DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
  1305  DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
  1306  DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
  1307  DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
  1308  DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
  1309  DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
  1310  DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
  1311  DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
  1312  DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
  1313  DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
  1314  DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
  1315  DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
  1316  DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
  1317  GLOBL shifts<>(SB),RODATA,$256
  1318  
  1319  TEXT runtime·return0(SB), NOSPLIT, $0
  1320  	MOVL	$0, AX
  1321  	RET
  1322  
  1323  
  1324  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1325  // Must obey the gcc calling convention.
  1326  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1327  	get_tls(CX)
  1328  	MOVQ	g(CX), AX
  1329  	MOVQ	g_m(AX), AX
  1330  	MOVQ	m_curg(AX), AX
  1331  	MOVQ	(g_stack+stack_hi)(AX), AX
  1332  	RET
  1333  
  1334  // The top-most function running on a goroutine
  1335  // returns to goexit+PCQuantum.
  1336  TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1337  	BYTE	$0x90	// NOP
  1338  	CALL	runtime·goexit1(SB)	// does not return
  1339  	// traceback from goexit1 must hit code range of goexit
  1340  	BYTE	$0x90	// NOP
  1341  
  1342  // This is called from .init_array and follows the platform, not Go, ABI.
  1343  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1344  	PUSHQ	R15 // The access to global variables below implicitly uses R15, which is callee-save
  1345  	MOVQ	runtime·lastmoduledatap(SB), AX
  1346  	MOVQ	DI, moduledata_next(AX)
  1347  	MOVQ	DI, runtime·lastmoduledatap(SB)
  1348  	POPQ	R15
  1349  	RET
  1350  
  1351  // gcWriteBarrier performs a heap pointer write and informs the GC.
  1352  //
  1353  // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
  1354  // - DI is the destination of the write
  1355  // - AX is the value being written at DI
  1356  // It clobbers FLAGS. It does not clobber any general-purpose registers,
  1357  // but may clobber others (e.g., SSE registers).
  1358  TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120
  1359  	// Save the registers clobbered by the fast path. This is slightly
  1360  	// faster than having the caller spill these.
  1361  	MOVQ	R14, 104(SP)
  1362  	MOVQ	R13, 112(SP)
  1363  	// TODO: Consider passing g.m.p in as an argument so they can be shared
  1364  	// across a sequence of write barriers.
  1365  	get_tls(R13)
  1366  	MOVQ	g(R13), R13
  1367  	MOVQ	g_m(R13), R13
  1368  	MOVQ	m_p(R13), R13
  1369  	MOVQ	(p_wbBuf+wbBuf_next)(R13), R14
  1370  	// Increment wbBuf.next position.
  1371  	LEAQ	16(R14), R14
  1372  	MOVQ	R14, (p_wbBuf+wbBuf_next)(R13)
  1373  	CMPQ	R14, (p_wbBuf+wbBuf_end)(R13)
  1374  	// Record the write.
  1375  	MOVQ	AX, -16(R14)	// Record value
  1376  	// Note: This turns bad pointer writes into bad
  1377  	// pointer reads, which could be confusing. We could avoid
  1378  	// reading from obviously bad pointers, which would
  1379  	// take care of the vast majority of these. We could
  1380  	// patch this up in the signal handler, or use XCHG to
  1381  	// combine the read and the write.
  1382  	MOVQ	(DI), R13
  1383  	MOVQ	R13, -8(R14)	// Record *slot
  1384  	// Is the buffer full? (flags set in CMPQ above)
  1385  	JEQ	flush
  1386  ret:
  1387  	MOVQ	104(SP), R14
  1388  	MOVQ	112(SP), R13
  1389  	// Do the write.
  1390  	MOVQ	AX, (DI)
  1391  	RET
  1392  
  1393  flush:
  1394  	// Save all general purpose registers since these could be
  1395  	// clobbered by wbBufFlush and were not saved by the caller.
  1396  	// It is possible for wbBufFlush to clobber other registers
  1397  	// (e.g., SSE registers), but the compiler takes care of saving
  1398  	// those in the caller if necessary. This strikes a balance
  1399  	// with registers that are likely to be used.
  1400  	//
  1401  	// We don't have type information for these, but all code under
  1402  	// here is NOSPLIT, so nothing will observe these.
  1403  	//
  1404  	// TODO: We could strike a different balance; e.g., saving X0
  1405  	// and not saving GP registers that are less likely to be used.
  1406  	MOVQ	DI, 0(SP)	// Also first argument to wbBufFlush
  1407  	MOVQ	AX, 8(SP)	// Also second argument to wbBufFlush
  1408  	MOVQ	BX, 16(SP)
  1409  	MOVQ	CX, 24(SP)
  1410  	MOVQ	DX, 32(SP)
  1411  	// DI already saved
  1412  	MOVQ	SI, 40(SP)
  1413  	MOVQ	BP, 48(SP)
  1414  	MOVQ	R8, 56(SP)
  1415  	MOVQ	R9, 64(SP)
  1416  	MOVQ	R10, 72(SP)
  1417  	MOVQ	R11, 80(SP)
  1418  	MOVQ	R12, 88(SP)
  1419  	// R13 already saved
  1420  	// R14 already saved
  1421  	MOVQ	R15, 96(SP)
  1422  
  1423  	// This takes arguments DI and AX
  1424  	CALL	runtime·wbBufFlush(SB)
  1425  
  1426  	MOVQ	0(SP), DI
  1427  	MOVQ	8(SP), AX
  1428  	MOVQ	16(SP), BX
  1429  	MOVQ	24(SP), CX
  1430  	MOVQ	32(SP), DX
  1431  	MOVQ	40(SP), SI
  1432  	MOVQ	48(SP), BP
  1433  	MOVQ	56(SP), R8
  1434  	MOVQ	64(SP), R9
  1435  	MOVQ	72(SP), R10
  1436  	MOVQ	80(SP), R11
  1437  	MOVQ	88(SP), R12
  1438  	MOVQ	96(SP), R15
  1439  	JMP	ret
  1440  
  1441  DATA	debugCallFrameTooLarge<>+0x00(SB)/8, $"call fra"
  1442  DATA	debugCallFrameTooLarge<>+0x08(SB)/8, $"me too l"
  1443  DATA	debugCallFrameTooLarge<>+0x10(SB)/4, $"arge"
  1444  GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $0x14	// Size duplicated below
  1445  
  1446  // debugCallV1 is the entry point for debugger-injected function
  1447  // calls on running goroutines. It informs the runtime that a
  1448  // debug call has been injected and creates a call frame for the
  1449  // debugger to fill in.
  1450  //
  1451  // To inject a function call, a debugger should:
  1452  // 1. Check that the goroutine is in state _Grunning and that
  1453  //    there are at least 256 bytes free on the stack.
  1454  // 2. Push the current PC on the stack (updating SP).
  1455  // 3. Write the desired argument frame size at SP-16 (using the SP
  1456  //    after step 2).
  1457  // 4. Save all machine registers (including flags and XMM reigsters)
  1458  //    so they can be restored later by the debugger.
  1459  // 5. Set the PC to debugCallV1 and resume execution.
  1460  //
  1461  // If the goroutine is in state _Grunnable, then it's not generally
  1462  // safe to inject a call because it may return out via other runtime
  1463  // operations. Instead, the debugger should unwind the stack to find
  1464  // the return to non-runtime code, add a temporary breakpoint there,
  1465  // and inject the call once that breakpoint is hit.
  1466  //
  1467  // If the goroutine is in any other state, it's not safe to inject a call.
  1468  //
  1469  // This function communicates back to the debugger by setting RAX and
  1470  // invoking INT3 to raise a breakpoint signal. See the comments in the
  1471  // implementation for the protocol the debugger is expected to
  1472  // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
  1473  //
  1474  // The debugger must ensure that any pointers passed to the function
  1475  // obey escape analysis requirements. Specifically, it must not pass
  1476  // a stack pointer to an escaping argument. debugCallV1 cannot check
  1477  // this invariant.
  1478  TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0
  1479  	// Save all registers that may contain pointers in GC register
  1480  	// map order (see ssa.registersAMD64). This makes it possible
  1481  	// to copy the stack while updating pointers currently held in
  1482  	// registers, and for the GC to find roots in registers.
  1483  	//
  1484  	// We can't do anything that might clobber any of these
  1485  	// registers before this.
  1486  	MOVQ	R15, r15-(14*8+8)(SP)
  1487  	MOVQ	R14, r14-(13*8+8)(SP)
  1488  	MOVQ	R13, r13-(12*8+8)(SP)
  1489  	MOVQ	R12, r12-(11*8+8)(SP)
  1490  	MOVQ	R11, r11-(10*8+8)(SP)
  1491  	MOVQ	R10, r10-(9*8+8)(SP)
  1492  	MOVQ	R9, r9-(8*8+8)(SP)
  1493  	MOVQ	R8, r8-(7*8+8)(SP)
  1494  	MOVQ	DI, di-(6*8+8)(SP)
  1495  	MOVQ	SI, si-(5*8+8)(SP)
  1496  	MOVQ	BP, bp-(4*8+8)(SP)
  1497  	MOVQ	BX, bx-(3*8+8)(SP)
  1498  	MOVQ	DX, dx-(2*8+8)(SP)
  1499  	// Save the frame size before we clobber it. Either of the last
  1500  	// saves could clobber this depending on whether there's a saved BP.
  1501  	MOVQ	frameSize-24(FP), DX	// aka -16(RSP) before prologue
  1502  	MOVQ	CX, cx-(1*8+8)(SP)
  1503  	MOVQ	AX, ax-(0*8+8)(SP)
  1504  
  1505  	// Save the argument frame size.
  1506  	MOVQ	DX, frameSize-128(SP)
  1507  
  1508  	// Perform a safe-point check.
  1509  	MOVQ	retpc-8(FP), AX	// Caller's PC
  1510  	MOVQ	AX, 0(SP)
  1511  	CALL	runtime·debugCallCheck(SB)
  1512  	MOVQ	8(SP), AX
  1513  	TESTQ	AX, AX
  1514  	JZ	good
  1515  	// The safety check failed. Put the reason string at the top
  1516  	// of the stack.
  1517  	MOVQ	AX, 0(SP)
  1518  	MOVQ	16(SP), AX
  1519  	MOVQ	AX, 8(SP)
  1520  	// Set AX to 8 and invoke INT3. The debugger should get the
  1521  	// reason a call can't be injected from the top of the stack
  1522  	// and resume execution.
  1523  	MOVQ	$8, AX
  1524  	BYTE	$0xcc
  1525  	JMP	restore
  1526  
  1527  good:
  1528  	// Registers are saved and it's safe to make a call.
  1529  	// Open up a call frame, moving the stack if necessary.
  1530  	//
  1531  	// Once the frame is allocated, this will set AX to 0 and
  1532  	// invoke INT3. The debugger should write the argument
  1533  	// frame for the call at SP, push the trapping PC on the
  1534  	// stack, set the PC to the function to call, set RCX to point
  1535  	// to the closure (if a closure call), and resume execution.
  1536  	//
  1537  	// If the function returns, this will set AX to 1 and invoke
  1538  	// INT3. The debugger can then inspect any return value saved
  1539  	// on the stack at SP and resume execution again.
  1540  	//
  1541  	// If the function panics, this will set AX to 2 and invoke INT3.
  1542  	// The interface{} value of the panic will be at SP. The debugger
  1543  	// can inspect the panic value and resume execution again.
  1544  #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1545  	CMPQ	AX, $MAXSIZE;			\
  1546  	JA	5(PC);				\
  1547  	MOVQ	$NAME(SB), AX;			\
  1548  	MOVQ	AX, 0(SP);			\
  1549  	CALL	runtime·debugCallWrap(SB);	\
  1550  	JMP	restore
  1551  
  1552  	MOVQ	frameSize-128(SP), AX
  1553  	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1554  	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1555  	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1556  	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1557  	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1558  	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1559  	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1560  	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1561  	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1562  	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1563  	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1564  	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1565  	// The frame size is too large. Report the error.
  1566  	MOVQ	$debugCallFrameTooLarge<>(SB), AX
  1567  	MOVQ	AX, 0(SP)
  1568  	MOVQ	$0x14, 8(SP)
  1569  	MOVQ	$8, AX
  1570  	BYTE	$0xcc
  1571  	JMP	restore
  1572  
  1573  restore:
  1574  	// Calls and failures resume here.
  1575  	//
  1576  	// Set AX to 16 and invoke INT3. The debugger should restore
  1577  	// all registers except RIP and RSP and resume execution.
  1578  	MOVQ	$16, AX
  1579  	BYTE	$0xcc
  1580  	// We must not modify flags after this point.
  1581  
  1582  	// Restore pointer-containing registers, which may have been
  1583  	// modified from the debugger's copy by stack copying.
  1584  	MOVQ	ax-(0*8+8)(SP), AX
  1585  	MOVQ	cx-(1*8+8)(SP), CX
  1586  	MOVQ	dx-(2*8+8)(SP), DX
  1587  	MOVQ	bx-(3*8+8)(SP), BX
  1588  	MOVQ	bp-(4*8+8)(SP), BP
  1589  	MOVQ	si-(5*8+8)(SP), SI
  1590  	MOVQ	di-(6*8+8)(SP), DI
  1591  	MOVQ	r8-(7*8+8)(SP), R8
  1592  	MOVQ	r9-(8*8+8)(SP), R9
  1593  	MOVQ	r10-(9*8+8)(SP), R10
  1594  	MOVQ	r11-(10*8+8)(SP), R11
  1595  	MOVQ	r12-(11*8+8)(SP), R12
  1596  	MOVQ	r13-(12*8+8)(SP), R13
  1597  	MOVQ	r14-(13*8+8)(SP), R14
  1598  	MOVQ	r15-(14*8+8)(SP), R15
  1599  
  1600  	RET
  1601  
  1602  #define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  1603  TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  1604  	NO_LOCAL_POINTERS;			\
  1605  	MOVQ	$0, AX;				\
  1606  	BYTE	$0xcc;				\
  1607  	MOVQ	$1, AX;				\
  1608  	BYTE	$0xcc;				\
  1609  	RET
  1610  DEBUG_CALL_FN(debugCall32<>, 32)
  1611  DEBUG_CALL_FN(debugCall64<>, 64)
  1612  DEBUG_CALL_FN(debugCall128<>, 128)
  1613  DEBUG_CALL_FN(debugCall256<>, 256)
  1614  DEBUG_CALL_FN(debugCall512<>, 512)
  1615  DEBUG_CALL_FN(debugCall1024<>, 1024)
  1616  DEBUG_CALL_FN(debugCall2048<>, 2048)
  1617  DEBUG_CALL_FN(debugCall4096<>, 4096)
  1618  DEBUG_CALL_FN(debugCall8192<>, 8192)
  1619  DEBUG_CALL_FN(debugCall16384<>, 16384)
  1620  DEBUG_CALL_FN(debugCall32768<>, 32768)
  1621  DEBUG_CALL_FN(debugCall65536<>, 65536)
  1622  
  1623  // func debugCallPanicked(val interface{})
  1624  TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  1625  	// Copy the panic value to the top of stack.
  1626  	MOVQ	val_type+0(FP), AX
  1627  	MOVQ	AX, 0(SP)
  1628  	MOVQ	val_data+8(FP), AX
  1629  	MOVQ	AX, 8(SP)
  1630  	MOVQ	$2, AX
  1631  	BYTE	$0xcc
  1632  	RET