github.com/minio/simdjson-go@v0.4.6-0.20231116094823-04d21cddf993/find_whitespace_and_structurals_amd64.s (about)

     1  //+build !noasm !appengine gc
     2  // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
     3  
     4  #include "common.h"
     5  
     6  DATA LCDATA1<>+0x000(SB)/8, $0x0000000000000010
     7  DATA LCDATA1<>+0x008(SB)/8, $0x00000902010c0800
     8  DATA LCDATA1<>+0x010(SB)/8, $0x0000000000000010
     9  DATA LCDATA1<>+0x018(SB)/8, $0x00000902010c0800
    10  DATA LCDATA1<>+0x020(SB)/8, $0x0000000000000010
    11  DATA LCDATA1<>+0x028(SB)/8, $0x00000902010c0800
    12  DATA LCDATA1<>+0x030(SB)/8, $0x0000000000000010
    13  DATA LCDATA1<>+0x038(SB)/8, $0x00000902010c0800
    14  DATA LCDATA1<>+0x040(SB)/8, $0x7f7f7f7f7f7f7f7f
    15  DATA LCDATA1<>+0x048(SB)/8, $0x7f7f7f7f7f7f7f7f
    16  DATA LCDATA1<>+0x050(SB)/8, $0x7f7f7f7f7f7f7f7f
    17  DATA LCDATA1<>+0x058(SB)/8, $0x7f7f7f7f7f7f7f7f
    18  DATA LCDATA1<>+0x060(SB)/8, $0x7f7f7f7f7f7f7f7f
    19  DATA LCDATA1<>+0x068(SB)/8, $0x7f7f7f7f7f7f7f7f
    20  DATA LCDATA1<>+0x070(SB)/8, $0x7f7f7f7f7f7f7f7f
    21  DATA LCDATA1<>+0x078(SB)/8, $0x7f7f7f7f7f7f7f7f
    22  DATA LCDATA1<>+0x080(SB)/8, $0x0100010004120008
    23  DATA LCDATA1<>+0x088(SB)/8, $0x0000010203000000
    24  DATA LCDATA1<>+0x090(SB)/8, $0x0100010004120008
    25  DATA LCDATA1<>+0x098(SB)/8, $0x0000010203000000
    26  DATA LCDATA1<>+0x0a0(SB)/8, $0x0100010004120008
    27  DATA LCDATA1<>+0x0a8(SB)/8, $0x0000010203000000
    28  DATA LCDATA1<>+0x0b0(SB)/8, $0x0100010004120008
    29  DATA LCDATA1<>+0x0b8(SB)/8, $0x0000010203000000
    30  DATA LCDATA1<>+0x0c0(SB)/8, $0x0707070707070707
    31  DATA LCDATA1<>+0x0c8(SB)/8, $0x0707070707070707
    32  DATA LCDATA1<>+0x0d0(SB)/8, $0x0707070707070707
    33  DATA LCDATA1<>+0x0d8(SB)/8, $0x0707070707070707
    34  DATA LCDATA1<>+0x0e0(SB)/8, $0x0707070707070707
    35  DATA LCDATA1<>+0x0e8(SB)/8, $0x0707070707070707
    36  DATA LCDATA1<>+0x0f0(SB)/8, $0x0707070707070707
    37  DATA LCDATA1<>+0x0f8(SB)/8, $0x0707070707070707
    38  DATA LCDATA1<>+0x100(SB)/8, $0x1818181818181818
    39  DATA LCDATA1<>+0x108(SB)/8, $0x1818181818181818
    40  DATA LCDATA1<>+0x110(SB)/8, $0x1818181818181818
    41  DATA LCDATA1<>+0x118(SB)/8, $0x1818181818181818
    42  DATA LCDATA1<>+0x120(SB)/8, $0x1818181818181818
    43  DATA LCDATA1<>+0x128(SB)/8, $0x1818181818181818
    44  DATA LCDATA1<>+0x130(SB)/8, $0x1818181818181818
    45  DATA LCDATA1<>+0x138(SB)/8, $0x1818181818181818
    46  GLOBL LCDATA1<>(SB), 8, $320
    47  
    48  TEXT ·_find_whitespace_and_structurals(SB), $0-24
    49  
    50  	MOVQ input+0(FP), DI
    51  	MOVQ whitespace+8(FP), DX
    52  	MOVQ structurals+16(FP), CX
    53  
    54  	VMOVDQU (DI), Y8     // load low 32-bytes
    55  	VMOVDQU 0x20(DI), Y9 // load high 32-bytes
    56  
    57  	CALL ·__find_whitespace_and_structurals(SB)
    58  
    59  	VZEROUPPER
    60  	RET
    61  
    62  TEXT ·__find_whitespace_and_structurals(SB), $0
    63  	LEAQ LCDATA1<>(SB), R8
    64  
    65  	VMOVDQA   Y8, Y0        // vmovdqu    ymm0, yword [rdi]
    66  	VMOVDQA   Y9, Y1        // vmovdqu    ymm1, yword [rsi]
    67  	VMOVDQA   (R8), Y2      // vmovdqa    ymm2, yword 0[rbp] /* [rip + LCPI0_0] */
    68  	VPSHUFB   Y0, Y2, Y3    // vpshufb    ymm3, ymm2, ymm0
    69  	VPSRLD    $4, Y0, Y0    // vpsrld    ymm0, ymm0, 4
    70  	VMOVDQA   0x40(R8), Y4  // vmovdqa    ymm4, yword 32[rbp] /* [rip + LCPI0_1] */
    71  	VPAND     Y4, Y0, Y0    // vpand    ymm0, ymm0, ymm4
    72  	VMOVDQA   0x80(R8), Y5  // vmovdqa    ymm5, yword 64[rbp] /* [rip + LCPI0_2] */
    73  	VPSHUFB   Y0, Y5, Y0    // vpshufb    ymm0, ymm5, ymm0
    74  	VPAND     Y3, Y0, Y0    // vpand    ymm0, ymm0, ymm3
    75  	VPSHUFB   Y1, Y2, Y2    // vpshufb    ymm2, ymm2, ymm1
    76  	VPSRLD    $4, Y1, Y1    // vpsrld    ymm1, ymm1, 4
    77  	VPAND     Y4, Y1, Y1    // vpand    ymm1, ymm1, ymm4
    78  	VPSHUFB   Y1, Y5, Y1    // vpshufb    ymm1, ymm5, ymm1
    79  	VPAND     Y2, Y1, Y1    // vpand    ymm1, ymm1, ymm2
    80  	VMOVDQA   0xc0(R8), Y2  // vmovdqa    ymm2, yword 96[rbp] /* [rip + LCPI0_3] */
    81  	VPAND     Y2, Y0, Y3    // vpand    ymm3, ymm0, ymm2
    82  	VPXOR     Y4, Y4, Y4    // vpxor    ymm4, ymm4, ymm4
    83  	VPCMPEQB  Y4, Y3, Y3    // vpcmpeqb    ymm3, ymm3, ymm4
    84  	VPAND     Y2, Y1, Y2    // vpand    ymm2, ymm1, ymm2
    85  	VPCMPEQB  Y4, Y2, Y2    // vpcmpeqb    ymm2, ymm2, ymm4
    86  	VPMOVMSKB Y3, AX        // vpmovmskb    eax, ymm3
    87  	VPMOVMSKB Y2, SI        // vpmovmskb    esi, ymm2
    88  	SHLQ      $32, SI       // shl    rsi, 32
    89  	ORQ       AX, SI        // or    rsi, rax
    90  	NOTQ      SI            // not    rsi
    91  	MOVQ      SI, (CX)      // mov    qword [rcx], rsi
    92  	VMOVDQA   0x100(R8), Y2 // vmovdqa    ymm2, yword 128[rbp] /* [rip + LCPI0_4] */
    93  	VPAND     Y2, Y0, Y0    // vpand    ymm0, ymm0, ymm2
    94  	VPCMPEQB  Y4, Y0, Y0    // vpcmpeqb    ymm0, ymm0, ymm4
    95  	VPAND     Y2, Y1, Y1    // vpand    ymm1, ymm1, ymm2
    96  	VPCMPEQB  Y4, Y1, Y1    // vpcmpeqb    ymm1, ymm1, ymm4
    97  	VPMOVMSKB Y0, AX        // vpmovmskb    eax, ymm0
    98  	VPMOVMSKB Y1, CX        // vpmovmskb    ecx, ymm1
    99  	SHLQ      $32, CX       // shl    rcx, 32
   100  	ORQ       AX, CX        // or    rcx, rax
   101  	NOTQ      CX            // not    rcx
   102  	MOVQ      CX, (DX)      // mov    qword [rdx], rcx
   103  	RET
   104  
   105  TEXT ·_find_whitespace_and_structurals_avx512(SB), $0-24
   106  
   107  	MOVQ input+0(FP), DI
   108  
   109  	VMOVDQU32 (DI), Z8
   110  
   111  	CALL ·__init_whitespace_and_structurals_avx512(SB)
   112  	CALL ·__find_whitespace_and_structurals_avx512(SB)
   113  
   114  	VZEROUPPER
   115  	KMOVQ K_WHITESPACE, whitespace+8(FP)
   116  	KMOVQ K_STRUCTURALS, structurals+16(FP)
   117  	RET
   118  
   119  #define ZERO_CONST   Z20
   120  #define WSAS_CONST_1 Z21
   121  #define WSAS_CONST_2 Z22
   122  #define WSAS_CONST_3 Z23
   123  #define WSAS_CONST_4 Z24
   124  #define WSAS_CONST_5 Z25
   125  
   126  TEXT ·__init_whitespace_and_structurals_avx512(SB), $0
   127  	LEAQ      LCDATA1<>(SB), R8
   128  	VPXORD    ZERO_CONST, ZERO_CONST, ZERO_CONST
   129  	VMOVDQU32 0x000(R8), WSAS_CONST_1
   130  	VMOVDQU32 0x040(R8), WSAS_CONST_2
   131  	VMOVDQU32 0x080(R8), WSAS_CONST_3
   132  	VMOVDQU32 0x0c0(R8), WSAS_CONST_4
   133  	VMOVDQU32 0x100(R8), WSAS_CONST_5
   134  	RET
   135  
   136  TEXT ·__find_whitespace_and_structurals_avx512(SB), $0
   137  	VPSHUFB  Z8, WSAS_CONST_1, Z3
   138  	VPSRLD   $4, Z8, Z0
   139  	VPANDD   WSAS_CONST_2, Z0, Z0
   140  	VPSHUFB  Z0, WSAS_CONST_3, Z0
   141  	VPANDD   Z3, Z0, Z0
   142  	VPANDD   WSAS_CONST_4, Z0, Z3
   143  	VPCMPEQB ZERO_CONST, Z3, K_STRUCTURALS
   144  	KNOTQ    K_STRUCTURALS, K_STRUCTURALS
   145  	VPANDD   WSAS_CONST_5, Z0, Z0
   146  	VPCMPEQB ZERO_CONST, Z0, K_WHITESPACE
   147  	KNOTQ    K_WHITESPACE, K_WHITESPACE
   148  	RET