github.com/dgraph-io/simdjson-go@v0.3.0/find_quote_mask_and_bits_amd64.s (about) 1 //+build !noasm !appengine gc 2 // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT 3 4 #include "common.h" 5 6 DATA LCDATA1<>+0x000(SB)/8, $0x2222222222222222 7 DATA LCDATA1<>+0x008(SB)/8, $0x2222222222222222 8 DATA LCDATA1<>+0x010(SB)/8, $0x2222222222222222 9 DATA LCDATA1<>+0x018(SB)/8, $0x2222222222222222 10 DATA LCDATA1<>+0x020(SB)/8, $0x2222222222222222 11 DATA LCDATA1<>+0x028(SB)/8, $0x2222222222222222 12 DATA LCDATA1<>+0x030(SB)/8, $0x2222222222222222 13 DATA LCDATA1<>+0x038(SB)/8, $0x2222222222222222 14 DATA LCDATA1<>+0x040(SB)/8, $0x8080808080808080 15 DATA LCDATA1<>+0x048(SB)/8, $0x8080808080808080 16 DATA LCDATA1<>+0x050(SB)/8, $0x8080808080808080 17 DATA LCDATA1<>+0x058(SB)/8, $0x8080808080808080 18 DATA LCDATA1<>+0x060(SB)/8, $0x8080808080808080 19 DATA LCDATA1<>+0x068(SB)/8, $0x8080808080808080 20 DATA LCDATA1<>+0x070(SB)/8, $0x8080808080808080 21 DATA LCDATA1<>+0x078(SB)/8, $0x8080808080808080 22 DATA LCDATA1<>+0x080(SB)/8, $0xa0a0a0a0a0a0a0a0 23 DATA LCDATA1<>+0x088(SB)/8, $0xa0a0a0a0a0a0a0a0 24 DATA LCDATA1<>+0x090(SB)/8, $0xa0a0a0a0a0a0a0a0 25 DATA LCDATA1<>+0x098(SB)/8, $0xa0a0a0a0a0a0a0a0 26 DATA LCDATA1<>+0x0a0(SB)/8, $0xa0a0a0a0a0a0a0a0 27 DATA LCDATA1<>+0x0a8(SB)/8, $0xa0a0a0a0a0a0a0a0 28 DATA LCDATA1<>+0x0b0(SB)/8, $0xa0a0a0a0a0a0a0a0 29 DATA LCDATA1<>+0x0b8(SB)/8, $0xa0a0a0a0a0a0a0a0 30 GLOBL LCDATA1<>(SB), 8, $192 31 32 TEXT ·_find_quote_mask_and_bits(SB), $0-48 33 34 MOVQ input+0(FP), DI 35 MOVQ odd_ends+8(FP), DX 36 MOVQ prev_iter_inside_quote+16(FP), CX 37 MOVQ quote_bits+24(FP), R8 38 MOVQ error_mask+32(FP), R9 39 40 VMOVDQU (DI), Y8 // load low 32-bytes 41 VMOVDQU 0x20(DI), Y9 // load high 32-bytes 42 43 CALL ·__find_quote_mask_and_bits(SB) 44 45 VZEROUPPER 46 MOVQ AX, quote_mask+40(FP) 47 RET 48 49 TEXT ·__find_quote_mask_and_bits(SB), $0 50 LEAQ LCDATA1<>(SB), BP 51 52 VMOVDQA Y8, Y0 // vmovdqu ymm0, yword [rdi] 53 VMOVDQA Y9, Y1 // vmovdqu ymm1, yword [rsi] 54 VMOVDQA (BP), Y2 // vmovdqa ymm2, yword 0[rbp] /* [rip + LCPI0_0] */ 55 VPCMPEQB Y2, Y0, Y3 // vpcmpeqb ymm3, ymm0, ymm2 56 VPMOVMSKB Y3, AX // vpmovmskb eax, ymm3 57 VPCMPEQB Y2, Y1, Y2 // vpcmpeqb ymm2, ymm1, ymm2 58 VPMOVMSKB Y2, SI // vpmovmskb esi, ymm2 59 SHLQ $32, SI // shl rsi, 32 60 ORQ AX, SI // or rsi, rax 61 NOTQ DX // not rdx 62 ANDQ SI, DX // and rdx, rsi 63 MOVQ DX, (R8) // mov qword [r8], rdx 64 VMOVQ DX, X2 // vmovq xmm2, rdx 65 VPCMPEQD X3, X3, X3 // vpcmpeqd xmm3, xmm3, xmm3 66 VPCLMULQDQ $0, X3, X2, X2 // vpclmulqdq xmm2, xmm2, xmm3, 0 67 VMOVQ X2, AX // vmovq rax, xmm2 68 XORQ (CX), AX // xor rax, qword [rcx] 69 VMOVDQA 0x40(BP), Y2 // vmovdqa ymm2, yword 32[rbp] /* [rip + LCPI0_1] */ 70 VPXOR Y2, Y0, Y0 // vpxor ymm0, ymm0, ymm2 71 VMOVDQA 0x80(BP), Y3 // vmovdqa ymm3, yword 64[rbp] /* [rip + LCPI0_2] */ 72 VPCMPGTB Y0, Y3, Y0 // vpcmpgtb ymm0, ymm3, ymm0 73 VPMOVMSKB Y0, DX // vpmovmskb edx, ymm0 74 VPXOR Y2, Y1, Y0 // vpxor ymm0, ymm1, ymm2 75 VPCMPGTB Y0, Y3, Y0 // vpcmpgtb ymm0, ymm3, ymm0 76 VPMOVMSKB Y0, SI // vpmovmskb esi, ymm0 77 SHLQ $32, SI // shl rsi, 32 78 ORQ DX, SI // or rsi, rdx 79 ANDQ AX, SI // and rsi, rax 80 ORQ SI, (R9) // or qword [r9], rsi 81 MOVQ AX, DX // mov rdx, rax 82 SARQ $63, DX // sar rdx, 63 83 MOVQ DX, (CX) // mov qword [rcx], rdx 84 RET 85 86 TEXT ·_find_quote_mask_and_bits_avx512(SB), $0-48 87 88 MOVQ input+0(FP), DI 89 MOVQ odd_ends+8(FP), DX 90 MOVQ prev_iter_inside_quote+16(FP), CX 91 92 KORQ K_ERRORMASK, K_ERRORMASK, K_ERRORMASK 93 94 VMOVDQU32 (DI), Z8 95 96 CALL ·__init_quote_mask_and_bits_avx512(SB) 97 CALL ·__find_quote_mask_and_bits_avx512(SB) 98 99 VZEROUPPER 100 KMOVQ K_ERRORMASK, error_mask+24(FP) 101 KMOVQ K_QUOTEBITS, quote_bits+32(FP) 102 MOVQ AX, quote_mask+40(FP) 103 RET 104 105 #define QMAB_CONST1 Z17 106 #define QMAB_CONST2 Z18 107 #define QMAB_CONST3 Z19 108 109 TEXT ·__init_quote_mask_and_bits_avx512(SB), $0 110 LEAQ LCDATA1<>(SB), BP 111 VMOVDQU32 0x00(BP), QMAB_CONST1 112 VMOVDQU32 0x40(BP), QMAB_CONST2 113 VMOVDQU32 0x80(BP), QMAB_CONST3 114 RET 115 116 TEXT ·__find_quote_mask_and_bits_avx512(SB), $0 117 VPCMPEQB QMAB_CONST1, Z8, K_QUOTEBITS 118 KMOVQ DX, K_TEMP1 119 KNOTQ K_TEMP1, K_TEMP1 120 KANDQ K_TEMP1, K_QUOTEBITS, K_QUOTEBITS 121 KMOVQ K_QUOTEBITS, DX 122 VMOVQ DX, X2 // vmovq xmm2, rdx 123 VPCMPEQD X3, X3, X3 // vpcmpeqd xmm3, xmm3, xmm3 124 VPCLMULQDQ $0, X3, X2, X2 // vpclmulqdq xmm2, xmm2, xmm3, 0 125 VMOVQ X2, AX // vmovq rax, xmm2 126 XORQ (CX), AX // xor rax, qword [rcx] 127 VPXORD QMAB_CONST2, Z8, Z0 128 VPCMPGTB Z0, QMAB_CONST3, K_TEMP1 // vpcmpgtb ymm0, ymm3, ymm0 129 KMOVQ AX, K_TEMP2 130 KANDQ K_TEMP2, K_TEMP1, K_TEMP1 131 KORQ K_TEMP1, K_ERRORMASK, K_ERRORMASK 132 MOVQ AX, DX // mov rdx, rax 133 SARQ $63, DX // sar rdx, 63 134 MOVQ DX, (CX) // mov qword [rcx], rdx 135 RET