github.com/minio/simdjson-go@v0.4.6-0.20231116094823-04d21cddf993/find_whitespace_and_structurals_amd64.s (about) 1 //+build !noasm !appengine gc 2 // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT 3 4 #include "common.h" 5 6 DATA LCDATA1<>+0x000(SB)/8, $0x0000000000000010 7 DATA LCDATA1<>+0x008(SB)/8, $0x00000902010c0800 8 DATA LCDATA1<>+0x010(SB)/8, $0x0000000000000010 9 DATA LCDATA1<>+0x018(SB)/8, $0x00000902010c0800 10 DATA LCDATA1<>+0x020(SB)/8, $0x0000000000000010 11 DATA LCDATA1<>+0x028(SB)/8, $0x00000902010c0800 12 DATA LCDATA1<>+0x030(SB)/8, $0x0000000000000010 13 DATA LCDATA1<>+0x038(SB)/8, $0x00000902010c0800 14 DATA LCDATA1<>+0x040(SB)/8, $0x7f7f7f7f7f7f7f7f 15 DATA LCDATA1<>+0x048(SB)/8, $0x7f7f7f7f7f7f7f7f 16 DATA LCDATA1<>+0x050(SB)/8, $0x7f7f7f7f7f7f7f7f 17 DATA LCDATA1<>+0x058(SB)/8, $0x7f7f7f7f7f7f7f7f 18 DATA LCDATA1<>+0x060(SB)/8, $0x7f7f7f7f7f7f7f7f 19 DATA LCDATA1<>+0x068(SB)/8, $0x7f7f7f7f7f7f7f7f 20 DATA LCDATA1<>+0x070(SB)/8, $0x7f7f7f7f7f7f7f7f 21 DATA LCDATA1<>+0x078(SB)/8, $0x7f7f7f7f7f7f7f7f 22 DATA LCDATA1<>+0x080(SB)/8, $0x0100010004120008 23 DATA LCDATA1<>+0x088(SB)/8, $0x0000010203000000 24 DATA LCDATA1<>+0x090(SB)/8, $0x0100010004120008 25 DATA LCDATA1<>+0x098(SB)/8, $0x0000010203000000 26 DATA LCDATA1<>+0x0a0(SB)/8, $0x0100010004120008 27 DATA LCDATA1<>+0x0a8(SB)/8, $0x0000010203000000 28 DATA LCDATA1<>+0x0b0(SB)/8, $0x0100010004120008 29 DATA LCDATA1<>+0x0b8(SB)/8, $0x0000010203000000 30 DATA LCDATA1<>+0x0c0(SB)/8, $0x0707070707070707 31 DATA LCDATA1<>+0x0c8(SB)/8, $0x0707070707070707 32 DATA LCDATA1<>+0x0d0(SB)/8, $0x0707070707070707 33 DATA LCDATA1<>+0x0d8(SB)/8, $0x0707070707070707 34 DATA LCDATA1<>+0x0e0(SB)/8, $0x0707070707070707 35 DATA LCDATA1<>+0x0e8(SB)/8, $0x0707070707070707 36 DATA LCDATA1<>+0x0f0(SB)/8, $0x0707070707070707 37 DATA LCDATA1<>+0x0f8(SB)/8, $0x0707070707070707 38 DATA LCDATA1<>+0x100(SB)/8, $0x1818181818181818 39 DATA LCDATA1<>+0x108(SB)/8, $0x1818181818181818 40 DATA LCDATA1<>+0x110(SB)/8, $0x1818181818181818 41 DATA LCDATA1<>+0x118(SB)/8, $0x1818181818181818 42 DATA LCDATA1<>+0x120(SB)/8, $0x1818181818181818 43 DATA LCDATA1<>+0x128(SB)/8, $0x1818181818181818 44 DATA LCDATA1<>+0x130(SB)/8, $0x1818181818181818 45 DATA LCDATA1<>+0x138(SB)/8, $0x1818181818181818 46 GLOBL LCDATA1<>(SB), 8, $320 47 48 TEXT ·_find_whitespace_and_structurals(SB), $0-24 49 50 MOVQ input+0(FP), DI 51 MOVQ whitespace+8(FP), DX 52 MOVQ structurals+16(FP), CX 53 54 VMOVDQU (DI), Y8 // load low 32-bytes 55 VMOVDQU 0x20(DI), Y9 // load high 32-bytes 56 57 CALL ·__find_whitespace_and_structurals(SB) 58 59 VZEROUPPER 60 RET 61 62 TEXT ·__find_whitespace_and_structurals(SB), $0 63 LEAQ LCDATA1<>(SB), R8 64 65 VMOVDQA Y8, Y0 // vmovdqu ymm0, yword [rdi] 66 VMOVDQA Y9, Y1 // vmovdqu ymm1, yword [rsi] 67 VMOVDQA (R8), Y2 // vmovdqa ymm2, yword 0[rbp] /* [rip + LCPI0_0] */ 68 VPSHUFB Y0, Y2, Y3 // vpshufb ymm3, ymm2, ymm0 69 VPSRLD $4, Y0, Y0 // vpsrld ymm0, ymm0, 4 70 VMOVDQA 0x40(R8), Y4 // vmovdqa ymm4, yword 32[rbp] /* [rip + LCPI0_1] */ 71 VPAND Y4, Y0, Y0 // vpand ymm0, ymm0, ymm4 72 VMOVDQA 0x80(R8), Y5 // vmovdqa ymm5, yword 64[rbp] /* [rip + LCPI0_2] */ 73 VPSHUFB Y0, Y5, Y0 // vpshufb ymm0, ymm5, ymm0 74 VPAND Y3, Y0, Y0 // vpand ymm0, ymm0, ymm3 75 VPSHUFB Y1, Y2, Y2 // vpshufb ymm2, ymm2, ymm1 76 VPSRLD $4, Y1, Y1 // vpsrld ymm1, ymm1, 4 77 VPAND Y4, Y1, Y1 // vpand ymm1, ymm1, ymm4 78 VPSHUFB Y1, Y5, Y1 // vpshufb ymm1, ymm5, ymm1 79 VPAND Y2, Y1, Y1 // vpand ymm1, ymm1, ymm2 80 VMOVDQA 0xc0(R8), Y2 // vmovdqa ymm2, yword 96[rbp] /* [rip + LCPI0_3] */ 81 VPAND Y2, Y0, Y3 // vpand ymm3, ymm0, ymm2 82 VPXOR Y4, Y4, Y4 // vpxor ymm4, ymm4, ymm4 83 VPCMPEQB Y4, Y3, Y3 // vpcmpeqb ymm3, ymm3, ymm4 84 VPAND Y2, Y1, Y2 // vpand ymm2, ymm1, ymm2 85 VPCMPEQB Y4, Y2, Y2 // vpcmpeqb ymm2, ymm2, ymm4 86 VPMOVMSKB Y3, AX // vpmovmskb eax, ymm3 87 VPMOVMSKB Y2, SI // vpmovmskb esi, ymm2 88 SHLQ $32, SI // shl rsi, 32 89 ORQ AX, SI // or rsi, rax 90 NOTQ SI // not rsi 91 MOVQ SI, (CX) // mov qword [rcx], rsi 92 VMOVDQA 0x100(R8), Y2 // vmovdqa ymm2, yword 128[rbp] /* [rip + LCPI0_4] */ 93 VPAND Y2, Y0, Y0 // vpand ymm0, ymm0, ymm2 94 VPCMPEQB Y4, Y0, Y0 // vpcmpeqb ymm0, ymm0, ymm4 95 VPAND Y2, Y1, Y1 // vpand ymm1, ymm1, ymm2 96 VPCMPEQB Y4, Y1, Y1 // vpcmpeqb ymm1, ymm1, ymm4 97 VPMOVMSKB Y0, AX // vpmovmskb eax, ymm0 98 VPMOVMSKB Y1, CX // vpmovmskb ecx, ymm1 99 SHLQ $32, CX // shl rcx, 32 100 ORQ AX, CX // or rcx, rax 101 NOTQ CX // not rcx 102 MOVQ CX, (DX) // mov qword [rdx], rcx 103 RET 104 105 TEXT ·_find_whitespace_and_structurals_avx512(SB), $0-24 106 107 MOVQ input+0(FP), DI 108 109 VMOVDQU32 (DI), Z8 110 111 CALL ·__init_whitespace_and_structurals_avx512(SB) 112 CALL ·__find_whitespace_and_structurals_avx512(SB) 113 114 VZEROUPPER 115 KMOVQ K_WHITESPACE, whitespace+8(FP) 116 KMOVQ K_STRUCTURALS, structurals+16(FP) 117 RET 118 119 #define ZERO_CONST Z20 120 #define WSAS_CONST_1 Z21 121 #define WSAS_CONST_2 Z22 122 #define WSAS_CONST_3 Z23 123 #define WSAS_CONST_4 Z24 124 #define WSAS_CONST_5 Z25 125 126 TEXT ·__init_whitespace_and_structurals_avx512(SB), $0 127 LEAQ LCDATA1<>(SB), R8 128 VPXORD ZERO_CONST, ZERO_CONST, ZERO_CONST 129 VMOVDQU32 0x000(R8), WSAS_CONST_1 130 VMOVDQU32 0x040(R8), WSAS_CONST_2 131 VMOVDQU32 0x080(R8), WSAS_CONST_3 132 VMOVDQU32 0x0c0(R8), WSAS_CONST_4 133 VMOVDQU32 0x100(R8), WSAS_CONST_5 134 RET 135 136 TEXT ·__find_whitespace_and_structurals_avx512(SB), $0 137 VPSHUFB Z8, WSAS_CONST_1, Z3 138 VPSRLD $4, Z8, Z0 139 VPANDD WSAS_CONST_2, Z0, Z0 140 VPSHUFB Z0, WSAS_CONST_3, Z0 141 VPANDD Z3, Z0, Z0 142 VPANDD WSAS_CONST_4, Z0, Z3 143 VPCMPEQB ZERO_CONST, Z3, K_STRUCTURALS 144 KNOTQ K_STRUCTURALS, K_STRUCTURALS 145 VPANDD WSAS_CONST_5, Z0, Z0 146 VPCMPEQB ZERO_CONST, Z0, K_WHITESPACE 147 KNOTQ K_WHITESPACE, K_WHITESPACE 148 RET