github.com/cloudwego/iasm@v0.2.0/repl/asm_amd64.s (about) 1 // 2 // Copyright 2024 CloudWeGo Authors 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 17 #include "go_asm.h" 18 #include "funcdata.h" 19 #include "textflag.h" 20 21 TEXT ·dumpregs(SB), (NOSPLIT | NOFRAME), $0 22 NO_LOCAL_POINTERS 23 PUSHFQ 24 PUSHFQ 25 PUSHQ AX 26 MOVQ 0x20(SP), AX 27 POPQ (AX) 28 MOVQ BX, 0x08(AX) 29 MOVQ CX, 0x10(AX) 30 MOVQ DX, 0x18(AX) 31 MOVQ DI, 0x20(AX) 32 MOVQ SI, 0x28(AX) 33 MOVQ BP, 0x30(AX) 34 MOVQ SP, 0x38(AX) 35 ADDQ $16, 0x38(AX) 36 MOVQ R8, 0x40(AX) 37 MOVQ R9, 0x48(AX) 38 MOVQ R10, 0x50(AX) 39 MOVQ R11, 0x58(AX) 40 MOVQ R12, 0x60(AX) 41 MOVQ R13, 0x68(AX) 42 MOVQ R14, 0x70(AX) 43 MOVQ R15, 0x78(AX) 44 MOVQ 16(SP), CX 45 MOVQ CX, 0x80(AX) 46 POPQ 0x88(AX) 47 MOVW CS, 0x90(AX) 48 MOVW FS, 0x98(AX) 49 MOVW GS, 0xa0(AX) 50 51 // SSE xmm0 ~ xmm15 registers 52 MOVOU X0, 0xc0(AX) 53 MOVOU X1, 0xd0(AX) 54 MOVOU X2, 0xe0(AX) 55 MOVOU X3, 0xf0(AX) 56 MOVOU X4, 0x100(AX) 57 MOVOU X5, 0x110(AX) 58 MOVOU X6, 0x120(AX) 59 MOVOU X7, 0x130(AX) 60 MOVOU X8, 0x140(AX) 61 MOVOU X9, 0x150(AX) 62 MOVOU X10, 0x160(AX) 63 MOVOU X11, 0x170(AX) 64 MOVOU X12, 0x180(AX) 65 MOVOU X13, 0x190(AX) 66 MOVOU X14, 0x1a0(AX) 67 MOVOU X15, 0x1b0(AX) 68 69 // check for AVX 70 CMPB ·hasAVX(SB), $0 71 JE _no_avx 72 73 // AVX ymm0 ~ ymm15 registers 74 VMOVDQU Y0, 0x2c0(AX) 75 VMOVDQU Y1, 0x2e0(AX) 76 VMOVDQU Y2, 0x300(AX) 77 VMOVDQU Y3, 0x320(AX) 78 VMOVDQU Y4, 0x340(AX) 79 VMOVDQU Y5, 0x360(AX) 80 VMOVDQU Y6, 0x380(AX) 81 VMOVDQU Y7, 0x3a0(AX) 82 VMOVDQU Y8, 0x3c0(AX) 83 VMOVDQU Y9, 0x3e0(AX) 84 VMOVDQU Y10, 0x400(AX) 85 VMOVDQU Y11, 0x420(AX) 86 VMOVDQU Y12, 0x440(AX) 87 VMOVDQU Y13, 0x460(AX) 88 VMOVDQU Y14, 0x480(AX) 89 VMOVDQU Y15, 0x4a0(AX) 90 91 // check for AVX512VL 92 CMPB ·hasAVX512VL(SB), $0 93 JE _no_avx512vl 94 95 // AVX512VL xmm16 ~ xmm31 registers 96 LONG $0x087fe162; WORD $0x407f; BYTE $0x1c // vmovdqu8 %xmm16, 0x1c0(%rax) 97 LONG $0x087fe162; WORD $0x487f; BYTE $0x1d // vmovdqu8 %xmm17, 0x1d0(%rax) 98 LONG $0x087fe162; WORD $0x507f; BYTE $0x1e // vmovdqu8 %xmm18, 0x1e0(%rax) 99 LONG $0x087fe162; WORD $0x587f; BYTE $0x1f // vmovdqu8 %xmm19, 0x1f0(%rax) 100 LONG $0x087fe162; WORD $0x607f; BYTE $0x20 // vmovdqu8 %xmm20, 0x200(%rax) 101 LONG $0x087fe162; WORD $0x687f; BYTE $0x21 // vmovdqu8 %xmm21, 0x210(%rax) 102 LONG $0x087fe162; WORD $0x707f; BYTE $0x22 // vmovdqu8 %xmm22, 0x220(%rax) 103 LONG $0x087fe162; WORD $0x787f; BYTE $0x23 // vmovdqu8 %xmm23, 0x230(%rax) 104 LONG $0x087f6162; WORD $0x407f; BYTE $0x24 // vmovdqu8 %xmm24, 0x240(%rax) 105 LONG $0x087f6162; WORD $0x487f; BYTE $0x25 // vmovdqu8 %xmm25, 0x250(%rax) 106 LONG $0x087f6162; WORD $0x507f; BYTE $0x26 // vmovdqu8 %xmm26, 0x260(%rax) 107 LONG $0x087f6162; WORD $0x587f; BYTE $0x27 // vmovdqu8 %xmm27, 0x270(%rax) 108 LONG $0x087f6162; WORD $0x607f; BYTE $0x28 // vmovdqu8 %xmm28, 0x280(%rax) 109 LONG $0x087f6162; WORD $0x687f; BYTE $0x29 // vmovdqu8 %xmm29, 0x290(%rax) 110 LONG $0x087f6162; WORD $0x707f; BYTE $0x2a // vmovdqu8 %xmm30, 0x2a0(%rax) 111 LONG $0x087f6162; WORD $0x787f; BYTE $0x2b // vmovdqu8 %xmm31, 0x2b0(%rax) 112 113 // AVX512VL ymm16 ~ ymm31 registers 114 LONG $0x28ffe162; WORD $0x407f; BYTE $0x26 // vmovdqu16 %ymm16, 0x4c0(%rax) 115 LONG $0x28ffe162; WORD $0x487f; BYTE $0x27 // vmovdqu16 %ymm17, 0x4e0(%rax) 116 LONG $0x28ffe162; WORD $0x507f; BYTE $0x28 // vmovdqu16 %ymm18, 0x500(%rax) 117 LONG $0x28ffe162; WORD $0x587f; BYTE $0x29 // vmovdqu16 %ymm19, 0x520(%rax) 118 LONG $0x28ffe162; WORD $0x607f; BYTE $0x2a // vmovdqu16 %ymm20, 0x540(%rax) 119 LONG $0x28ffe162; WORD $0x687f; BYTE $0x2b // vmovdqu16 %ymm21, 0x560(%rax) 120 LONG $0x28ffe162; WORD $0x707f; BYTE $0x2c // vmovdqu16 %ymm22, 0x580(%rax) 121 LONG $0x28ffe162; WORD $0x787f; BYTE $0x2d // vmovdqu16 %ymm23, 0x5a0(%rax) 122 LONG $0x28ff6162; WORD $0x407f; BYTE $0x2e // vmovdqu16 %ymm24, 0x5c0(%rax) 123 LONG $0x28ff6162; WORD $0x487f; BYTE $0x2f // vmovdqu16 %ymm25, 0x5e0(%rax) 124 LONG $0x28ff6162; WORD $0x507f; BYTE $0x30 // vmovdqu16 %ymm26, 0x600(%rax) 125 LONG $0x28ff6162; WORD $0x587f; BYTE $0x31 // vmovdqu16 %ymm27, 0x620(%rax) 126 LONG $0x28ff6162; WORD $0x607f; BYTE $0x32 // vmovdqu16 %ymm28, 0x640(%rax) 127 LONG $0x28ff6162; WORD $0x687f; BYTE $0x33 // vmovdqu16 %ymm29, 0x660(%rax) 128 LONG $0x28ff6162; WORD $0x707f; BYTE $0x34 // vmovdqu16 %ymm30, 0x680(%rax) 129 LONG $0x28ff6162; WORD $0x787f; BYTE $0x35 // vmovdqu16 %ymm31, 0x6a0(%rax) 130 131 _no_avx512vl: 132 CMPB ·hasAVX512F(SB), $0 133 JE _no_avx512f 134 135 // AVX512F zmm0 ~ zmm31 registers 136 LONG $0x487ef162; WORD $0x407f; BYTE $0x1b // vmovdqu32 %zmm0, 0x6c0(%rax) 137 LONG $0x487ef162; WORD $0x487f; BYTE $0x1c // vmovdqu32 %zmm1, 0x700(%rax) 138 LONG $0x487ef162; WORD $0x507f; BYTE $0x1d // vmovdqu32 %zmm2, 0x740(%rax) 139 LONG $0x487ef162; WORD $0x587f; BYTE $0x1e // vmovdqu32 %zmm3, 0x780(%rax) 140 LONG $0x487ef162; WORD $0x607f; BYTE $0x1f // vmovdqu32 %zmm4, 0x7c0(%rax) 141 LONG $0x487ef162; WORD $0x687f; BYTE $0x20 // vmovdqu32 %zmm5, 0x800(%rax) 142 LONG $0x487ef162; WORD $0x707f; BYTE $0x21 // vmovdqu32 %zmm6, 0x840(%rax) 143 LONG $0x487ef162; WORD $0x787f; BYTE $0x22 // vmovdqu32 %zmm7, 0x880(%rax) 144 LONG $0x487e7162; WORD $0x407f; BYTE $0x23 // vmovdqu32 %zmm8, 0x8c0(%rax) 145 LONG $0x487e7162; WORD $0x487f; BYTE $0x24 // vmovdqu32 %zmm9, 0x900(%rax) 146 LONG $0x487e7162; WORD $0x507f; BYTE $0x25 // vmovdqu32 %zmm10, 0x940(%rax) 147 LONG $0x487e7162; WORD $0x587f; BYTE $0x26 // vmovdqu32 %zmm11, 0x980(%rax) 148 LONG $0x487e7162; WORD $0x607f; BYTE $0x27 // vmovdqu32 %zmm12, 0x9c0(%rax) 149 LONG $0x487e7162; WORD $0x687f; BYTE $0x28 // vmovdqu32 %zmm13, 0xa00(%rax) 150 LONG $0x487e7162; WORD $0x707f; BYTE $0x29 // vmovdqu32 %zmm14, 0xa40(%rax) 151 LONG $0x487e7162; WORD $0x787f; BYTE $0x2a // vmovdqu32 %zmm15, 0xa80(%rax) 152 LONG $0x487ee162; WORD $0x407f; BYTE $0x2b // vmovdqu32 %zmm16, 0xac0(%rax) 153 LONG $0x487ee162; WORD $0x487f; BYTE $0x2c // vmovdqu32 %zmm17, 0xb00(%rax) 154 LONG $0x487ee162; WORD $0x507f; BYTE $0x2d // vmovdqu32 %zmm18, 0xb40(%rax) 155 LONG $0x487ee162; WORD $0x587f; BYTE $0x2e // vmovdqu32 %zmm19, 0xb80(%rax) 156 LONG $0x487ee162; WORD $0x607f; BYTE $0x2f // vmovdqu32 %zmm20, 0xbc0(%rax) 157 LONG $0x487ee162; WORD $0x687f; BYTE $0x30 // vmovdqu32 %zmm21, 0xc00(%rax) 158 LONG $0x487ee162; WORD $0x707f; BYTE $0x31 // vmovdqu32 %zmm22, 0xc40(%rax) 159 LONG $0x487ee162; WORD $0x787f; BYTE $0x32 // vmovdqu32 %zmm23, 0xc80(%rax) 160 LONG $0x487e6162; WORD $0x407f; BYTE $0x33 // vmovdqu32 %zmm24, 0xcc0(%rax) 161 LONG $0x487e6162; WORD $0x487f; BYTE $0x34 // vmovdqu32 %zmm25, 0xd00(%rax) 162 LONG $0x487e6162; WORD $0x507f; BYTE $0x35 // vmovdqu32 %zmm26, 0xd40(%rax) 163 LONG $0x487e6162; WORD $0x587f; BYTE $0x36 // vmovdqu32 %zmm27, 0xd80(%rax) 164 LONG $0x487e6162; WORD $0x607f; BYTE $0x37 // vmovdqu32 %zmm28, 0xdc0(%rax) 165 LONG $0x487e6162; WORD $0x687f; BYTE $0x38 // vmovdqu32 %zmm29, 0xe00(%rax) 166 LONG $0x487e6162; WORD $0x707f; BYTE $0x39 // vmovdqu32 %zmm30, 0xe40(%rax) 167 LONG $0x487e6162; WORD $0x787f; BYTE $0x3a // vmovdqu32 %zmm31, 0xe80(%rax) 168 169 // check for AVX512BW 170 CMPB ·hasAVX512BW(SB), $0 171 JE _no_avx512bw 172 173 // AVX512BW 64-bit K registers 174 QUAD $0x000ec08091f8e1c4; BYTE $0x00 // kmovq %k0, 0xec0(%rax) 175 QUAD $0x000ec88891f8e1c4; BYTE $0x00 // kmovq %k1, 0xec8(%rax) 176 QUAD $0x000ed09091f8e1c4; BYTE $0x00 // kmovq %k2, 0xed0(%rax) 177 QUAD $0x000ed89891f8e1c4; BYTE $0x00 // kmovq %k3, 0xed8(%rax) 178 QUAD $0x000ee0a091f8e1c4; BYTE $0x00 // kmovq %k4, 0xee0(%rax) 179 QUAD $0x000ee8a891f8e1c4; BYTE $0x00 // kmovq %k5, 0xee8(%rax) 180 QUAD $0x000ef0b091f8e1c4; BYTE $0x00 // kmovq %k6, 0xef0(%rax) 181 QUAD $0x000ef8b891f8e1c4; BYTE $0x00 // kmovq %k7, 0xef8(%rax) 182 JMP _avx512bw_done 183 184 _no_avx512bw: 185 QUAD $0x00000ec08091f8c5 // kmovw %k0, 0xec0(%rax) 186 QUAD $0x00000ec88891f8c5 // kmovw %k1, 0xec8(%rax) 187 QUAD $0x00000ed09091f8c5 // kmovw %k2, 0xed0(%rax) 188 QUAD $0x00000ed89891f8c5 // kmovw %k3, 0xed8(%rax) 189 QUAD $0x00000ee0a091f8c5 // kmovw %k4, 0xee0(%rax) 190 QUAD $0x00000ee8a891f8c5 // kmovw %k5, 0xee8(%rax) 191 QUAD $0x00000ef0b091f8c5 // kmovw %k6, 0xef0(%rax) 192 QUAD $0x00000ef8b891f8c5 // kmovw %k7, 0xef8(%rax) 193 194 _avx512bw_done: 195 _no_avx512f: 196 _no_avx: 197 MOVQ 0x10(AX), CX 198 MOVQ (AX), AX 199 POPFQ 200 RET 201 202 TEXT ·execaddr(SB), (NOSPLIT | NOFRAME), $0 203 NO_LOCAL_POINTERS 204 LONG $0x102474ff // pushq 0x10(%rsp) 205 CALL ·dumpregs(SB) 206 LEAQ 8(SP), SP 207 CALL exectrampoline(SB) 208 LONG $0x182474ff // pushq 0x18(%rsp) 209 CALL ·dumpregs(SB) 210 LEAQ 8(SP), SP 211 RET 212 213 TEXT exectrampoline(SB), (NOSPLIT | NOFRAME), $0 214 NO_LOCAL_POINTERS 215 JMP 0x10(SP)