github.com/dgraph-io/simdjson-go@v0.3.0/find_structural_bits_amd64.s (about)

     1  //+build !noasm !appengine gc
     2  
     3  TEXT ·_find_structural_bits(SB), $0-72
     4  
     5  	MOVQ p1+0(FP), DI
     6  	MOVQ p3+8(FP), DX
     7  
     8  	VMOVDQU (DI), Y8     // load low 32-bytes
     9  	VMOVDQU 0x20(DI), Y9 // load high 32-bytes
    10  
    11  	CALL ·__find_odd_backslash_sequences(SB)
    12  
    13  	MOVQ AX, DX                            // odd_ends + 16
    14  	MOVQ prev_iter_inside_quote+16(FP), CX
    15  	MOVQ quote_bits+24(FP), R8
    16  	MOVQ error_mask+32(FP), R9
    17  
    18  	CALL  ·__find_quote_mask_and_bits(SB)
    19  	PUSHQ AX                              // MOVQ AX, quote_mask + 64
    20  
    21  	MOVQ whitespace+40(FP), DX
    22  	MOVQ structurals_in+48(FP), CX
    23  
    24  	CALL ·__find_whitespace_and_structurals(SB)
    25  
    26  	MOVQ structurals_in+48(FP), DI; MOVQ (DI), DI // DI = structurals
    27  	MOVQ whitespace+40(FP), SI; MOVQ (SI), SI     // SI = whitespace
    28  	POPQ DX                                       // DX = quote_mask
    29  	MOVQ quote_bits+24(FP), CX; MOVQ (CX), CX     // CX = quote_bits
    30  	MOVQ prev_iter_ends_pseudo_pred+56(FP), R8    // R8 = &prev_iter_ends_pseudo_pred
    31  
    32  	CALL ·__finalize_structurals(SB)
    33  	MOVQ AX, structurals+64(FP)
    34  
    35  	VZEROUPPER
    36  	RET
    37  
    38  #define MASK_WHITESPACE(MAX, Y) \
    39  	LEAQ     MASKTABLE<>(SB), DX \
    40  	MOVQ     $MAX, BX            \
    41  	SUBQ     CX, BX              \
    42  	VMOVDQU  (DX)(BX*1), Y10     \ // Load mask
    43  	VPCMPEQB Y11, Y11, Y11       \ // Set all bits
    44  	VPXOR    Y11, Y10, Y12       \ // Invert mask
    45  	VPAND    Y13, Y12, Y12       \ // Mask whitespace
    46  	VPAND    Y10, Y, Y           \ // Mask message
    47  	VPOR     Y12, Y, Y           // Combine together
    48  
    49  TEXT ·_find_structural_bits_in_slice(SB), $0-128
    50  	XORQ AX, AX
    51  	MOVQ len+8(FP), CX
    52  	ANDQ $0xffffffffffffffc0, CX
    53  	CMPQ AX, CX
    54  	JEQ  check_partial_load
    55  
    56  loop:
    57  	MOVQ    buf+0(FP), DI
    58  	VMOVDQU (DI)(AX*1), Y8     // load low 32-bytes
    59  	VMOVDQU 0x20(DI)(AX*1), Y9 // load high 32-bytes
    60  	ADDQ    $0x40, AX
    61  
    62  loop_after_load:
    63  	PUSHQ CX
    64  	PUSHQ AX
    65  
    66  	MOVQ p3+16(FP), DX
    67  	CALL ·__find_odd_backslash_sequences(SB)
    68  
    69  	MOVQ AX, DX                            // odd_ends + 16
    70  	MOVQ prev_iter_inside_quote+24(FP), CX
    71  	MOVQ quote_bits+32(FP), R8
    72  	MOVQ error_mask+40(FP), R9
    73  
    74  	CALL  ·__find_quote_mask_and_bits(SB)
    75  	PUSHQ AX                              // MOVQ AX, quote_mask + 64
    76  
    77  	MOVQ whitespace+48(FP), DX
    78  	MOVQ structurals_in+56(FP), CX
    79  
    80  	CALL ·__find_whitespace_and_structurals(SB)
    81  
    82  	MOVQ  structurals_in+56(FP), DI; MOVQ (DI), DI // DI = structurals
    83  	MOVQ  whitespace+48(FP), SI; MOVQ (SI), SI     // SI = whitespace
    84  	POPQ  DX                                       // DX = quote_mask
    85  	PUSHQ DX                                       // Save again for newline determination
    86  
    87  	MOVQ quote_bits+32(FP), CX; MOVQ (CX), CX  // CX = quote_bits
    88  	MOVQ prev_iter_ends_pseudo_pred+64(FP), R8 // R8 = &prev_iter_ends_pseudo_pred
    89  
    90  	CALL ·__finalize_structurals(SB)
    91  
    92  	POPQ DX                             // DX = quote_mask
    93  	CMPQ ndjson+112(FP), $0
    94  	JZ   skip_ndjson_detection
    95  	CALL ·__find_newline_delimiters(SB)
    96  	ORQ  BX, AX
    97  
    98  skip_ndjson_detection:
    99  	MOVQ indexes+72(FP), DI
   100  	MOVQ index+80(FP), SI; MOVQ (SI), BX        // BX = index
   101  	MOVQ carried+96(FP), R11; MOVQ (R11), DX    // DX = carried
   102  	MOVQ position+104(FP), R12; MOVQ (R12), R10 // R10 = position
   103  	CALL ·__flatten_bits_incremental(SB)
   104  	MOVQ BX, (SI)                               // *index = BX
   105  	MOVQ DX, (R11)                              // *carried = DX
   106  	MOVQ R10, (R12)                             // *position = R10
   107  
   108  	POPQ AX
   109  	POPQ CX
   110  
   111  	CMPQ BX, indexes_len+88(FP)
   112  	JGE  done
   113  
   114  	CMPQ AX, CX
   115  	JLT  loop
   116  
   117  	// Check if AX is not aligned on a 64-byte boundary, this signals the last (partial) iteration
   118  	MOVQ AX, BX
   119  	ANDQ $0x3f, BX
   120  	CMPQ BX, $0
   121  	JNE  done
   122  
   123  check_partial_load:
   124  	MOVQ len+8(FP), CX
   125  	ANDQ $0x3f, CX
   126  	CMPQ CX, $0
   127  	JNE  masking       // end of message is not aligned on 64-byte boundary, so mask the remaining bytes
   128  
   129  done:
   130  	MOVQ AX, processed+120(FP)
   131  	VZEROUPPER
   132  	RET
   133  
   134  masking:
   135  	// Do a partial load and mask out bytes after the end of the message with whitespace
   136  	VPBROADCASTQ WHITESPACE<>(SB), Y13 // Load padding whitespace constant
   137  
   138  	MOVQ    buf+0(FP), DI
   139  	VMOVDQU (DI)(AX*1), Y8 // Always load low 32-bytes
   140  	CMPQ    CX, $0x20
   141  	JGE     masking_high
   142  
   143  	// Perform masking on low 32-bytes
   144  	MASK_WHITESPACE(0x1f, Y8)
   145  	VMOVDQU Y13, Y9
   146  	JMP     masking_done
   147  
   148  masking_high:
   149  	// Perform masking on high 32-bytes
   150  	VMOVDQU 0x20(DI)(AX*1), Y9 // Load high 32-bytes
   151  	MASK_WHITESPACE(0x3f, Y9)
   152  
   153  masking_done:
   154  	ADDQ CX, AX
   155  	JMP  loop_after_load // Rejoin loop after regular loading
   156  
   157  DATA MASKTABLE<>+0x000(SB)/8, $0xffffffffffffffff
   158  DATA MASKTABLE<>+0x008(SB)/8, $0xffffffffffffffff
   159  DATA MASKTABLE<>+0x010(SB)/8, $0xffffffffffffffff
   160  DATA MASKTABLE<>+0x018(SB)/8, $0x00ffffffffffffff
   161  DATA MASKTABLE<>+0x020(SB)/8, $0x0000000000000000
   162  DATA MASKTABLE<>+0x028(SB)/8, $0x0000000000000000
   163  DATA MASKTABLE<>+0x030(SB)/8, $0x0000000000000000
   164  DATA MASKTABLE<>+0x038(SB)/8, $0x0000000000000000
   165  GLOBL MASKTABLE<>(SB), 8, $64
   166  
   167  DATA WHITESPACE<>+0x000(SB)/8, $0x2020202020202020
   168  GLOBL WHITESPACE<>(SB), 8, $8