github.com/cloudwego/iasm@v0.2.0/repl/asm_amd64.s (about)

     1  //
     2  // Copyright 2024 CloudWeGo Authors
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //     http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  //
    16  
    17  #include "go_asm.h"
    18  #include "funcdata.h"
    19  #include "textflag.h"
    20  
    21  TEXT ·dumpregs(SB), (NOSPLIT | NOFRAME), $0
    22      NO_LOCAL_POINTERS
    23      PUSHFQ
    24      PUSHFQ
    25      PUSHQ AX
    26      MOVQ  0x20(SP), AX
    27      POPQ  (AX)
    28      MOVQ  BX, 0x08(AX)
    29      MOVQ  CX, 0x10(AX)
    30      MOVQ  DX, 0x18(AX)
    31      MOVQ  DI, 0x20(AX)
    32      MOVQ  SI, 0x28(AX)
    33      MOVQ  BP, 0x30(AX)
    34      MOVQ  SP, 0x38(AX)
    35      ADDQ  $16, 0x38(AX)
    36      MOVQ  R8, 0x40(AX)
    37      MOVQ  R9, 0x48(AX)
    38      MOVQ  R10, 0x50(AX)
    39      MOVQ  R11, 0x58(AX)
    40      MOVQ  R12, 0x60(AX)
    41      MOVQ  R13, 0x68(AX)
    42      MOVQ  R14, 0x70(AX)
    43      MOVQ  R15, 0x78(AX)
    44      MOVQ  16(SP), CX
    45      MOVQ  CX, 0x80(AX)
    46      POPQ  0x88(AX)
    47      MOVW  CS, 0x90(AX)
    48      MOVW  FS, 0x98(AX)
    49      MOVW  GS, 0xa0(AX)
    50  
    51      // SSE xmm0 ~ xmm15 registers
    52      MOVOU X0, 0xc0(AX)
    53      MOVOU X1, 0xd0(AX)
    54      MOVOU X2, 0xe0(AX)
    55      MOVOU X3, 0xf0(AX)
    56      MOVOU X4, 0x100(AX)
    57      MOVOU X5, 0x110(AX)
    58      MOVOU X6, 0x120(AX)
    59      MOVOU X7, 0x130(AX)
    60      MOVOU X8, 0x140(AX)
    61      MOVOU X9, 0x150(AX)
    62      MOVOU X10, 0x160(AX)
    63      MOVOU X11, 0x170(AX)
    64      MOVOU X12, 0x180(AX)
    65      MOVOU X13, 0x190(AX)
    66      MOVOU X14, 0x1a0(AX)
    67      MOVOU X15, 0x1b0(AX)
    68  
    69      // check for AVX
    70      CMPB ·hasAVX(SB), $0
    71      JE   _no_avx
    72  
    73      // AVX ymm0 ~ ymm15 registers
    74      VMOVDQU Y0, 0x2c0(AX)
    75      VMOVDQU Y1, 0x2e0(AX)
    76      VMOVDQU Y2, 0x300(AX)
    77      VMOVDQU Y3, 0x320(AX)
    78      VMOVDQU Y4, 0x340(AX)
    79      VMOVDQU Y5, 0x360(AX)
    80      VMOVDQU Y6, 0x380(AX)
    81      VMOVDQU Y7, 0x3a0(AX)
    82      VMOVDQU Y8, 0x3c0(AX)
    83      VMOVDQU Y9, 0x3e0(AX)
    84      VMOVDQU Y10, 0x400(AX)
    85      VMOVDQU Y11, 0x420(AX)
    86      VMOVDQU Y12, 0x440(AX)
    87      VMOVDQU Y13, 0x460(AX)
    88      VMOVDQU Y14, 0x480(AX)
    89      VMOVDQU Y15, 0x4a0(AX)
    90  
    91      // check for AVX512VL
    92      CMPB ·hasAVX512VL(SB), $0
    93      JE   _no_avx512vl
    94  
    95      // AVX512VL xmm16 ~ xmm31 registers
    96      LONG $0x087fe162; WORD $0x407f; BYTE $0x1c  // vmovdqu8 %xmm16, 0x1c0(%rax)
    97      LONG $0x087fe162; WORD $0x487f; BYTE $0x1d  // vmovdqu8 %xmm17, 0x1d0(%rax)
    98      LONG $0x087fe162; WORD $0x507f; BYTE $0x1e  // vmovdqu8 %xmm18, 0x1e0(%rax)
    99      LONG $0x087fe162; WORD $0x587f; BYTE $0x1f  // vmovdqu8 %xmm19, 0x1f0(%rax)
   100      LONG $0x087fe162; WORD $0x607f; BYTE $0x20  // vmovdqu8 %xmm20, 0x200(%rax)
   101      LONG $0x087fe162; WORD $0x687f; BYTE $0x21  // vmovdqu8 %xmm21, 0x210(%rax)
   102      LONG $0x087fe162; WORD $0x707f; BYTE $0x22  // vmovdqu8 %xmm22, 0x220(%rax)
   103      LONG $0x087fe162; WORD $0x787f; BYTE $0x23  // vmovdqu8 %xmm23, 0x230(%rax)
   104      LONG $0x087f6162; WORD $0x407f; BYTE $0x24  // vmovdqu8 %xmm24, 0x240(%rax)
   105      LONG $0x087f6162; WORD $0x487f; BYTE $0x25  // vmovdqu8 %xmm25, 0x250(%rax)
   106      LONG $0x087f6162; WORD $0x507f; BYTE $0x26  // vmovdqu8 %xmm26, 0x260(%rax)
   107      LONG $0x087f6162; WORD $0x587f; BYTE $0x27  // vmovdqu8 %xmm27, 0x270(%rax)
   108      LONG $0x087f6162; WORD $0x607f; BYTE $0x28  // vmovdqu8 %xmm28, 0x280(%rax)
   109      LONG $0x087f6162; WORD $0x687f; BYTE $0x29  // vmovdqu8 %xmm29, 0x290(%rax)
   110      LONG $0x087f6162; WORD $0x707f; BYTE $0x2a  // vmovdqu8 %xmm30, 0x2a0(%rax)
   111      LONG $0x087f6162; WORD $0x787f; BYTE $0x2b  // vmovdqu8 %xmm31, 0x2b0(%rax)
   112  
   113      // AVX512VL ymm16 ~ ymm31 registers
   114      LONG $0x28ffe162; WORD $0x407f; BYTE $0x26  // vmovdqu16 %ymm16, 0x4c0(%rax)
   115      LONG $0x28ffe162; WORD $0x487f; BYTE $0x27  // vmovdqu16 %ymm17, 0x4e0(%rax)
   116      LONG $0x28ffe162; WORD $0x507f; BYTE $0x28  // vmovdqu16 %ymm18, 0x500(%rax)
   117      LONG $0x28ffe162; WORD $0x587f; BYTE $0x29  // vmovdqu16 %ymm19, 0x520(%rax)
   118      LONG $0x28ffe162; WORD $0x607f; BYTE $0x2a  // vmovdqu16 %ymm20, 0x540(%rax)
   119      LONG $0x28ffe162; WORD $0x687f; BYTE $0x2b  // vmovdqu16 %ymm21, 0x560(%rax)
   120      LONG $0x28ffe162; WORD $0x707f; BYTE $0x2c  // vmovdqu16 %ymm22, 0x580(%rax)
   121      LONG $0x28ffe162; WORD $0x787f; BYTE $0x2d  // vmovdqu16 %ymm23, 0x5a0(%rax)
   122      LONG $0x28ff6162; WORD $0x407f; BYTE $0x2e  // vmovdqu16 %ymm24, 0x5c0(%rax)
   123      LONG $0x28ff6162; WORD $0x487f; BYTE $0x2f  // vmovdqu16 %ymm25, 0x5e0(%rax)
   124      LONG $0x28ff6162; WORD $0x507f; BYTE $0x30  // vmovdqu16 %ymm26, 0x600(%rax)
   125      LONG $0x28ff6162; WORD $0x587f; BYTE $0x31  // vmovdqu16 %ymm27, 0x620(%rax)
   126      LONG $0x28ff6162; WORD $0x607f; BYTE $0x32  // vmovdqu16 %ymm28, 0x640(%rax)
   127      LONG $0x28ff6162; WORD $0x687f; BYTE $0x33  // vmovdqu16 %ymm29, 0x660(%rax)
   128      LONG $0x28ff6162; WORD $0x707f; BYTE $0x34  // vmovdqu16 %ymm30, 0x680(%rax)
   129      LONG $0x28ff6162; WORD $0x787f; BYTE $0x35  // vmovdqu16 %ymm31, 0x6a0(%rax)
   130  
   131  _no_avx512vl:
   132      CMPB ·hasAVX512F(SB), $0
   133      JE   _no_avx512f
   134  
   135      // AVX512F zmm0 ~ zmm31 registers
   136      LONG $0x487ef162; WORD $0x407f; BYTE $0x1b  // vmovdqu32 %zmm0, 0x6c0(%rax)
   137      LONG $0x487ef162; WORD $0x487f; BYTE $0x1c  // vmovdqu32 %zmm1, 0x700(%rax)
   138      LONG $0x487ef162; WORD $0x507f; BYTE $0x1d  // vmovdqu32 %zmm2, 0x740(%rax)
   139      LONG $0x487ef162; WORD $0x587f; BYTE $0x1e  // vmovdqu32 %zmm3, 0x780(%rax)
   140      LONG $0x487ef162; WORD $0x607f; BYTE $0x1f  // vmovdqu32 %zmm4, 0x7c0(%rax)
   141      LONG $0x487ef162; WORD $0x687f; BYTE $0x20  // vmovdqu32 %zmm5, 0x800(%rax)
   142      LONG $0x487ef162; WORD $0x707f; BYTE $0x21  // vmovdqu32 %zmm6, 0x840(%rax)
   143      LONG $0x487ef162; WORD $0x787f; BYTE $0x22  // vmovdqu32 %zmm7, 0x880(%rax)
   144      LONG $0x487e7162; WORD $0x407f; BYTE $0x23  // vmovdqu32 %zmm8, 0x8c0(%rax)
   145      LONG $0x487e7162; WORD $0x487f; BYTE $0x24  // vmovdqu32 %zmm9, 0x900(%rax)
   146      LONG $0x487e7162; WORD $0x507f; BYTE $0x25  // vmovdqu32 %zmm10, 0x940(%rax)
   147      LONG $0x487e7162; WORD $0x587f; BYTE $0x26  // vmovdqu32 %zmm11, 0x980(%rax)
   148      LONG $0x487e7162; WORD $0x607f; BYTE $0x27  // vmovdqu32 %zmm12, 0x9c0(%rax)
   149      LONG $0x487e7162; WORD $0x687f; BYTE $0x28  // vmovdqu32 %zmm13, 0xa00(%rax)
   150      LONG $0x487e7162; WORD $0x707f; BYTE $0x29  // vmovdqu32 %zmm14, 0xa40(%rax)
   151      LONG $0x487e7162; WORD $0x787f; BYTE $0x2a  // vmovdqu32 %zmm15, 0xa80(%rax)
   152      LONG $0x487ee162; WORD $0x407f; BYTE $0x2b  // vmovdqu32 %zmm16, 0xac0(%rax)
   153      LONG $0x487ee162; WORD $0x487f; BYTE $0x2c  // vmovdqu32 %zmm17, 0xb00(%rax)
   154      LONG $0x487ee162; WORD $0x507f; BYTE $0x2d  // vmovdqu32 %zmm18, 0xb40(%rax)
   155      LONG $0x487ee162; WORD $0x587f; BYTE $0x2e  // vmovdqu32 %zmm19, 0xb80(%rax)
   156      LONG $0x487ee162; WORD $0x607f; BYTE $0x2f  // vmovdqu32 %zmm20, 0xbc0(%rax)
   157      LONG $0x487ee162; WORD $0x687f; BYTE $0x30  // vmovdqu32 %zmm21, 0xc00(%rax)
   158      LONG $0x487ee162; WORD $0x707f; BYTE $0x31  // vmovdqu32 %zmm22, 0xc40(%rax)
   159      LONG $0x487ee162; WORD $0x787f; BYTE $0x32  // vmovdqu32 %zmm23, 0xc80(%rax)
   160      LONG $0x487e6162; WORD $0x407f; BYTE $0x33  // vmovdqu32 %zmm24, 0xcc0(%rax)
   161      LONG $0x487e6162; WORD $0x487f; BYTE $0x34  // vmovdqu32 %zmm25, 0xd00(%rax)
   162      LONG $0x487e6162; WORD $0x507f; BYTE $0x35  // vmovdqu32 %zmm26, 0xd40(%rax)
   163      LONG $0x487e6162; WORD $0x587f; BYTE $0x36  // vmovdqu32 %zmm27, 0xd80(%rax)
   164      LONG $0x487e6162; WORD $0x607f; BYTE $0x37  // vmovdqu32 %zmm28, 0xdc0(%rax)
   165      LONG $0x487e6162; WORD $0x687f; BYTE $0x38  // vmovdqu32 %zmm29, 0xe00(%rax)
   166      LONG $0x487e6162; WORD $0x707f; BYTE $0x39  // vmovdqu32 %zmm30, 0xe40(%rax)
   167      LONG $0x487e6162; WORD $0x787f; BYTE $0x3a  // vmovdqu32 %zmm31, 0xe80(%rax)
   168  
   169      // check for AVX512BW
   170      CMPB ·hasAVX512BW(SB), $0
   171      JE   _no_avx512bw
   172  
   173      // AVX512BW 64-bit K registers
   174      QUAD $0x000ec08091f8e1c4; BYTE $0x00    // kmovq %k0, 0xec0(%rax)
   175      QUAD $0x000ec88891f8e1c4; BYTE $0x00    // kmovq %k1, 0xec8(%rax)
   176      QUAD $0x000ed09091f8e1c4; BYTE $0x00    // kmovq %k2, 0xed0(%rax)
   177      QUAD $0x000ed89891f8e1c4; BYTE $0x00    // kmovq %k3, 0xed8(%rax)
   178      QUAD $0x000ee0a091f8e1c4; BYTE $0x00    // kmovq %k4, 0xee0(%rax)
   179      QUAD $0x000ee8a891f8e1c4; BYTE $0x00    // kmovq %k5, 0xee8(%rax)
   180      QUAD $0x000ef0b091f8e1c4; BYTE $0x00    // kmovq %k6, 0xef0(%rax)
   181      QUAD $0x000ef8b891f8e1c4; BYTE $0x00    // kmovq %k7, 0xef8(%rax)
   182      JMP  _avx512bw_done
   183  
   184  _no_avx512bw:
   185      QUAD $0x00000ec08091f8c5    // kmovw %k0, 0xec0(%rax)
   186      QUAD $0x00000ec88891f8c5    // kmovw %k1, 0xec8(%rax)
   187      QUAD $0x00000ed09091f8c5    // kmovw %k2, 0xed0(%rax)
   188      QUAD $0x00000ed89891f8c5    // kmovw %k3, 0xed8(%rax)
   189      QUAD $0x00000ee0a091f8c5    // kmovw %k4, 0xee0(%rax)
   190      QUAD $0x00000ee8a891f8c5    // kmovw %k5, 0xee8(%rax)
   191      QUAD $0x00000ef0b091f8c5    // kmovw %k6, 0xef0(%rax)
   192      QUAD $0x00000ef8b891f8c5    // kmovw %k7, 0xef8(%rax)
   193  
   194  _avx512bw_done:
   195  _no_avx512f:
   196  _no_avx:
   197      MOVQ 0x10(AX), CX
   198      MOVQ (AX), AX
   199      POPFQ
   200      RET
   201  
   202  TEXT ·execaddr(SB), (NOSPLIT | NOFRAME), $0
   203      NO_LOCAL_POINTERS
   204      LONG  $0x102474ff       // pushq 0x10(%rsp)
   205      CALL  ·dumpregs(SB)
   206      LEAQ  8(SP), SP
   207      CALL  exectrampoline(SB)
   208      LONG  $0x182474ff       // pushq 0x18(%rsp)
   209      CALL  ·dumpregs(SB)
   210      LEAQ  8(SP), SP
   211      RET
   212  
   213  TEXT exectrampoline(SB), (NOSPLIT | NOFRAME), $0
   214      NO_LOCAL_POINTERS
   215      JMP 0x10(SP)