github.com/minio/simdjson-go@v0.4.6-0.20231116094823-04d21cddf993/find_structural_bits_avx512_amd64.s (about)

     1  //+build !noasm !appengine gc
     2  
     3  #include "common.h"
     4  
     5  TEXT ·_find_structural_bits_avx512(SB), $0-56
     6  
     7  	CALL ·__init_odd_backslash_sequences_avx512(SB)
     8  	CALL ·__init_quote_mask_and_bits_avx512(SB)
     9  	CALL ·__init_whitespace_and_structurals_avx512(SB)
    10  	CALL ·__init_newline_delimiters_avx512(SB)
    11  
    12  	MOVQ p1+0(FP), DI
    13  	MOVQ p3+8(FP), DX
    14  
    15  	KORQ K_ERRORMASK, K_ERRORMASK, K_ERRORMASK
    16  
    17  	VMOVDQU32 (DI), Z8
    18  
    19  	CALL ·__find_odd_backslash_sequences_avx512(SB)
    20  
    21  	MOVQ AX, DX                            // odd_ends + 16
    22  	MOVQ prev_iter_inside_quote+16(FP), CX
    23  
    24  	CALL  ·__find_quote_mask_and_bits_avx512(SB)
    25  	PUSHQ AX                                     // MOVQ AX, quote_mask + 64
    26  
    27  	CALL ·__find_whitespace_and_structurals_avx512(SB)
    28  
    29  	POPQ DX                                    // DX = quote_mask
    30  	MOVQ prev_iter_ends_pseudo_pred+40(FP), R8 // R8 = &prev_iter_ends_pseudo_pred
    31  
    32  	CALL ·__finalize_structurals_avx512(SB)
    33  
    34  	VZEROUPPER
    35  	MOVQ  error_mask+24(FP), R9
    36  	KMOVQ K_ERRORMASK, (R9)
    37  	MOVQ  AX, structurals+48(FP)
    38  	RET
    39  
    40  #define MASK_WHITESPACE(MAX, Y) \
    41  	LEAQ     MASKTABLE<>(SB), DX \
    42  	MOVQ     $MAX, BX            \
    43  	SUBQ     CX, BX              \
    44  	VMOVDQU  (DX)(BX*1), Y10     \ // Load mask
    45  	VPCMPEQB Y11, Y11, Y11       \ // Set all bits
    46  	VPXOR    Y11, Y10, Y12       \ // Invert mask
    47  	VPAND    Y13, Y12, Y12       \ // Mask whitespace
    48  	VPAND    Y10, Y, Y           \ // Mask message
    49  	VPOR     Y12, Y, Y           // Combine together
    50  
    51  TEXT ·_find_structural_bits_in_slice_avx512(SB), $0-104
    52  
    53  	CALL ·__init_odd_backslash_sequences_avx512(SB)
    54  	CALL ·__init_quote_mask_and_bits_avx512(SB)
    55  	CALL ·__init_whitespace_and_structurals_avx512(SB)
    56  	CALL ·__init_newline_delimiters_avx512(SB)
    57  
    58  	MOVQ  error_mask+32(FP), R9
    59  	KMOVQ (R9), K_ERRORMASK
    60  
    61  	XORQ AX, AX
    62  	MOVQ len+8(FP), CX
    63  	ANDQ $0xffffffffffffffc0, CX
    64  	CMPQ AX, CX
    65  	JEQ  check_partial_load
    66  
    67  loop:
    68  	MOVQ      buf+0(FP), DI
    69  	VMOVDQU32 (DI)(AX*1), Z8
    70  	ADDQ      $0x40, AX
    71  
    72  loop_after_load:
    73  	PUSHQ CX
    74  	PUSHQ AX
    75  
    76  	MOVQ p3+16(FP), DX
    77  	CALL ·__find_odd_backslash_sequences_avx512(SB)
    78  
    79  	MOVQ AX, DX                            // odd_ends + 16
    80  	MOVQ prev_iter_inside_quote+24(FP), CX
    81  
    82  	CALL  ·__find_quote_mask_and_bits_avx512(SB)
    83  	PUSHQ AX                                     // MOVQ AX, quote_mask + 64
    84  
    85  	CALL ·__find_whitespace_and_structurals_avx512(SB)
    86  
    87  	POPQ  DX                                    // DX = quote_mask
    88  	PUSHQ DX                                    // Save again for newline determination
    89  	MOVQ  prev_iter_ends_pseudo_pred+40(FP), R8 // R8 = &prev_iter_ends_pseudo_pred
    90  
    91  	CALL ·__finalize_structurals_avx512(SB)
    92  
    93  	POPQ DX                                    // DX = quote_mask
    94  	CMPQ ndjson+88(FP), $0
    95  	JZ   skip_ndjson_detection
    96  	CALL ·__find_newline_delimiters_avx512(SB)
    97  	ORQ  BX, AX
    98  
    99  skip_ndjson_detection:
   100  	MOVQ indexes+48(FP), DI
   101  	MOVQ index+56(FP), SI; MOVQ (SI), BX       // BX = index
   102  	MOVQ carried+72(FP), R11; MOVQ (R11), DX   // DX = carried
   103  	MOVQ position+80(FP), R12; MOVQ (R12), R10 // R10 = position
   104  	CALL ·__flatten_bits_incremental(SB)
   105  	MOVQ BX, (SI)                              // *index = BX
   106  	MOVQ DX, (R11)                             // *carried = DX
   107  	MOVQ R10, (R12)                            // *position = R10
   108  
   109  	POPQ AX
   110  	POPQ CX
   111  
   112  	CMPQ BX, indexes_len+64(FP)
   113  	JGE  done
   114  
   115  	CMPQ AX, CX
   116  	JLT  loop
   117  
   118  	// Check if AX is not aligned on a 64-byte boundary, this signals the last (partial) iteration
   119  	MOVQ AX, BX
   120  	ANDQ $0x3f, BX
   121  	CMPQ BX, $0
   122  	JNE  done
   123  
   124  check_partial_load:
   125  	MOVQ len+8(FP), CX
   126  	ANDQ $0x3f, CX
   127  	CMPQ CX, $0
   128  	JNE  masking       // end of message is not aligned on 64-byte boundary, so mask the remaining bytes
   129  
   130  done:
   131  	VZEROUPPER
   132  	MOVQ  error_mask+32(FP), R9
   133  	KMOVQ K_ERRORMASK, (R9)
   134  	MOVQ  AX, processed+96(FP)
   135  	RET
   136  
   137  masking:
   138  	// Do a partial load and mask out bytes after the end of the message with whitespace
   139  	VPBROADCASTQ WHITESPACE<>(SB), Y13 // Load padding whitespace constant
   140  
   141  	MOVQ    buf+0(FP), DI
   142  	VMOVDQU (DI)(AX*1), Y8 // Always load low 32-bytes
   143  	CMPQ    CX, $0x20
   144  	JGE     masking_high
   145  
   146  	// Perform masking on low 32-bytes
   147  	MASK_WHITESPACE(0x1f, Y8)
   148  	VMOVDQU Y13, Y9
   149  	JMP     masking_done
   150  
   151  masking_high:
   152  	// Perform masking on high 32-bytes
   153  	VMOVDQU 0x20(DI)(AX*1), Y9 // Load high 32-bytes
   154  	MASK_WHITESPACE(0x3f, Y9)
   155  
   156  masking_done:
   157  	ADDQ CX, AX
   158  
   159  	// Merge Y9 into upper half of Z8
   160  	VPXORD  Z10, Z10, Z10
   161  	VALIGND $8, Z10, Z9, Z9
   162  	VPORD   Z9, Z8, Z8
   163  
   164  	JMP loop_after_load // Rejoin loop after regular loading
   165  
   166  DATA MASKTABLE<>+0x000(SB)/8, $0xffffffffffffffff
   167  DATA MASKTABLE<>+0x008(SB)/8, $0xffffffffffffffff
   168  DATA MASKTABLE<>+0x010(SB)/8, $0xffffffffffffffff
   169  DATA MASKTABLE<>+0x018(SB)/8, $0x00ffffffffffffff
   170  DATA MASKTABLE<>+0x020(SB)/8, $0x0000000000000000
   171  DATA MASKTABLE<>+0x028(SB)/8, $0x0000000000000000
   172  DATA MASKTABLE<>+0x030(SB)/8, $0x0000000000000000
   173  DATA MASKTABLE<>+0x038(SB)/8, $0x0000000000000000
   174  GLOBL MASKTABLE<>(SB), 8, $64
   175  
   176  DATA WHITESPACE<>+0x000(SB)/8, $0x2020202020202020
   177  GLOBL WHITESPACE<>(SB), 8, $8