github.com/dgraph-io/simdjson-go@v0.3.0/find_quote_mask_and_bits_amd64.s (about)

     1  //+build !noasm !appengine gc
     2  // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
     3  
     4  #include "common.h"
     5  
     6  DATA LCDATA1<>+0x000(SB)/8, $0x2222222222222222
     7  DATA LCDATA1<>+0x008(SB)/8, $0x2222222222222222
     8  DATA LCDATA1<>+0x010(SB)/8, $0x2222222222222222
     9  DATA LCDATA1<>+0x018(SB)/8, $0x2222222222222222
    10  DATA LCDATA1<>+0x020(SB)/8, $0x2222222222222222
    11  DATA LCDATA1<>+0x028(SB)/8, $0x2222222222222222
    12  DATA LCDATA1<>+0x030(SB)/8, $0x2222222222222222
    13  DATA LCDATA1<>+0x038(SB)/8, $0x2222222222222222
    14  DATA LCDATA1<>+0x040(SB)/8, $0x8080808080808080
    15  DATA LCDATA1<>+0x048(SB)/8, $0x8080808080808080
    16  DATA LCDATA1<>+0x050(SB)/8, $0x8080808080808080
    17  DATA LCDATA1<>+0x058(SB)/8, $0x8080808080808080
    18  DATA LCDATA1<>+0x060(SB)/8, $0x8080808080808080
    19  DATA LCDATA1<>+0x068(SB)/8, $0x8080808080808080
    20  DATA LCDATA1<>+0x070(SB)/8, $0x8080808080808080
    21  DATA LCDATA1<>+0x078(SB)/8, $0x8080808080808080
    22  DATA LCDATA1<>+0x080(SB)/8, $0xa0a0a0a0a0a0a0a0
    23  DATA LCDATA1<>+0x088(SB)/8, $0xa0a0a0a0a0a0a0a0
    24  DATA LCDATA1<>+0x090(SB)/8, $0xa0a0a0a0a0a0a0a0
    25  DATA LCDATA1<>+0x098(SB)/8, $0xa0a0a0a0a0a0a0a0
    26  DATA LCDATA1<>+0x0a0(SB)/8, $0xa0a0a0a0a0a0a0a0
    27  DATA LCDATA1<>+0x0a8(SB)/8, $0xa0a0a0a0a0a0a0a0
    28  DATA LCDATA1<>+0x0b0(SB)/8, $0xa0a0a0a0a0a0a0a0
    29  DATA LCDATA1<>+0x0b8(SB)/8, $0xa0a0a0a0a0a0a0a0
    30  GLOBL LCDATA1<>(SB), 8, $192
    31  
    32  TEXT ·_find_quote_mask_and_bits(SB), $0-48
    33  
    34  	MOVQ input+0(FP), DI
    35  	MOVQ odd_ends+8(FP), DX
    36  	MOVQ prev_iter_inside_quote+16(FP), CX
    37  	MOVQ quote_bits+24(FP), R8
    38  	MOVQ error_mask+32(FP), R9
    39  
    40  	VMOVDQU (DI), Y8     // load low 32-bytes
    41  	VMOVDQU 0x20(DI), Y9 // load high 32-bytes
    42  
    43  	CALL ·__find_quote_mask_and_bits(SB)
    44  
    45  	VZEROUPPER
    46  	MOVQ AX, quote_mask+40(FP)
    47  	RET
    48  
    49  TEXT ·__find_quote_mask_and_bits(SB), $0
    50  	LEAQ LCDATA1<>(SB), BP
    51  
    52  	VMOVDQA    Y8, Y0         // vmovdqu    ymm0, yword [rdi]
    53  	VMOVDQA    Y9, Y1         // vmovdqu    ymm1, yword [rsi]
    54  	VMOVDQA    (BP), Y2       // vmovdqa    ymm2, yword 0[rbp] /* [rip + LCPI0_0] */
    55  	VPCMPEQB   Y2, Y0, Y3     // vpcmpeqb    ymm3, ymm0, ymm2
    56  	VPMOVMSKB  Y3, AX         // vpmovmskb    eax, ymm3
    57  	VPCMPEQB   Y2, Y1, Y2     // vpcmpeqb    ymm2, ymm1, ymm2
    58  	VPMOVMSKB  Y2, SI         // vpmovmskb    esi, ymm2
    59  	SHLQ       $32, SI        // shl    rsi, 32
    60  	ORQ        AX, SI         // or    rsi, rax
    61  	NOTQ       DX             // not    rdx
    62  	ANDQ       SI, DX         // and    rdx, rsi
    63  	MOVQ       DX, (R8)       // mov    qword [r8], rdx
    64  	VMOVQ      DX, X2         // vmovq    xmm2, rdx
    65  	VPCMPEQD   X3, X3, X3     // vpcmpeqd    xmm3, xmm3, xmm3
    66  	VPCLMULQDQ $0, X3, X2, X2 // vpclmulqdq    xmm2, xmm2, xmm3, 0
    67  	VMOVQ      X2, AX         // vmovq    rax, xmm2
    68  	XORQ       (CX), AX       // xor    rax, qword [rcx]
    69  	VMOVDQA    0x40(BP), Y2   // vmovdqa    ymm2, yword 32[rbp] /* [rip + LCPI0_1] */
    70  	VPXOR      Y2, Y0, Y0     // vpxor    ymm0, ymm0, ymm2
    71  	VMOVDQA    0x80(BP), Y3   // vmovdqa    ymm3, yword 64[rbp] /* [rip + LCPI0_2] */
    72  	VPCMPGTB   Y0, Y3, Y0     // vpcmpgtb    ymm0, ymm3, ymm0
    73  	VPMOVMSKB  Y0, DX         // vpmovmskb    edx, ymm0
    74  	VPXOR      Y2, Y1, Y0     // vpxor    ymm0, ymm1, ymm2
    75  	VPCMPGTB   Y0, Y3, Y0     // vpcmpgtb    ymm0, ymm3, ymm0
    76  	VPMOVMSKB  Y0, SI         // vpmovmskb    esi, ymm0
    77  	SHLQ       $32, SI        // shl    rsi, 32
    78  	ORQ        DX, SI         // or    rsi, rdx
    79  	ANDQ       AX, SI         // and    rsi, rax
    80  	ORQ        SI, (R9)       // or    qword [r9], rsi
    81  	MOVQ       AX, DX         // mov    rdx, rax
    82  	SARQ       $63, DX        // sar    rdx, 63
    83  	MOVQ       DX, (CX)       // mov    qword [rcx], rdx
    84  	RET
    85  
    86  TEXT ·_find_quote_mask_and_bits_avx512(SB), $0-48
    87  
    88  	MOVQ input+0(FP), DI
    89  	MOVQ odd_ends+8(FP), DX
    90  	MOVQ prev_iter_inside_quote+16(FP), CX
    91  
    92  	KORQ K_ERRORMASK, K_ERRORMASK, K_ERRORMASK
    93  
    94  	VMOVDQU32 (DI), Z8
    95  
    96  	CALL ·__init_quote_mask_and_bits_avx512(SB)
    97  	CALL ·__find_quote_mask_and_bits_avx512(SB)
    98  
    99  	VZEROUPPER
   100  	KMOVQ K_ERRORMASK, error_mask+24(FP)
   101  	KMOVQ K_QUOTEBITS, quote_bits+32(FP)
   102  	MOVQ  AX, quote_mask+40(FP)
   103  	RET
   104  
   105  #define QMAB_CONST1 Z17
   106  #define QMAB_CONST2 Z18
   107  #define QMAB_CONST3 Z19
   108  
   109  TEXT ·__init_quote_mask_and_bits_avx512(SB), $0
   110  	LEAQ      LCDATA1<>(SB), BP
   111  	VMOVDQU32 0x00(BP), QMAB_CONST1
   112  	VMOVDQU32 0x40(BP), QMAB_CONST2
   113  	VMOVDQU32 0x80(BP), QMAB_CONST3
   114  	RET
   115  
   116  TEXT ·__find_quote_mask_and_bits_avx512(SB), $0
   117  	VPCMPEQB   QMAB_CONST1, Z8, K_QUOTEBITS
   118  	KMOVQ      DX, K_TEMP1
   119  	KNOTQ      K_TEMP1, K_TEMP1
   120  	KANDQ      K_TEMP1, K_QUOTEBITS, K_QUOTEBITS
   121  	KMOVQ      K_QUOTEBITS, DX
   122  	VMOVQ      DX, X2                            // vmovq    xmm2, rdx
   123  	VPCMPEQD   X3, X3, X3                        // vpcmpeqd    xmm3, xmm3, xmm3
   124  	VPCLMULQDQ $0, X3, X2, X2                    // vpclmulqdq    xmm2, xmm2, xmm3, 0
   125  	VMOVQ      X2, AX                            // vmovq    rax, xmm2
   126  	XORQ       (CX), AX                          // xor    rax, qword [rcx]
   127  	VPXORD     QMAB_CONST2, Z8, Z0
   128  	VPCMPGTB   Z0, QMAB_CONST3, K_TEMP1          // vpcmpgtb    ymm0, ymm3, ymm0
   129  	KMOVQ      AX, K_TEMP2
   130  	KANDQ      K_TEMP2, K_TEMP1, K_TEMP1
   131  	KORQ       K_TEMP1, K_ERRORMASK, K_ERRORMASK
   132  	MOVQ       AX, DX                            // mov    rdx, rax
   133  	SARQ       $63, DX                           // sar    rdx, 63
   134  	MOVQ       DX, (CX)                          // mov    qword [rcx], rdx
   135  	RET