github.com/minio/simdjson-go@v0.4.6-0.20231116094823-04d21cddf993/find_structural_bits_amd64.s (about) 1 //+build !noasm !appengine gc 2 3 TEXT ·_find_structural_bits(SB), $0-72 4 5 MOVQ p1+0(FP), DI 6 MOVQ p3+8(FP), DX 7 8 VMOVDQU (DI), Y8 // load low 32-bytes 9 VMOVDQU 0x20(DI), Y9 // load high 32-bytes 10 11 CALL ·__find_odd_backslash_sequences(SB) 12 13 MOVQ AX, DX // odd_ends + 16 14 MOVQ prev_iter_inside_quote+16(FP), CX 15 MOVQ quote_bits+24(FP), R8 16 MOVQ error_mask+32(FP), R9 17 18 CALL ·__find_quote_mask_and_bits(SB) 19 PUSHQ AX // MOVQ AX, quote_mask + 64 20 21 MOVQ whitespace+40(FP), DX 22 MOVQ structurals_in+48(FP), CX 23 24 CALL ·__find_whitespace_and_structurals(SB) 25 26 MOVQ structurals_in+48(FP), DI; MOVQ (DI), DI // DI = structurals 27 MOVQ whitespace+40(FP), SI; MOVQ (SI), SI // SI = whitespace 28 POPQ DX // DX = quote_mask 29 MOVQ quote_bits+24(FP), CX; MOVQ (CX), CX // CX = quote_bits 30 MOVQ prev_iter_ends_pseudo_pred+56(FP), R8 // R8 = &prev_iter_ends_pseudo_pred 31 32 CALL ·__finalize_structurals(SB) 33 MOVQ AX, structurals+64(FP) 34 35 VZEROUPPER 36 RET 37 38 #define MASK_WHITESPACE(MAX, Y) \ 39 LEAQ MASKTABLE<>(SB), DX \ 40 MOVQ $MAX, BX \ 41 SUBQ CX, BX \ 42 VMOVDQU (DX)(BX*1), Y10 \ // Load mask 43 VPCMPEQB Y11, Y11, Y11 \ // Set all bits 44 VPXOR Y11, Y10, Y12 \ // Invert mask 45 VPAND Y13, Y12, Y12 \ // Mask whitespace 46 VPAND Y10, Y, Y \ // Mask message 47 VPOR Y12, Y, Y // Combine together 48 49 TEXT ·_find_structural_bits_in_slice(SB), $0-128 50 XORQ AX, AX 51 MOVQ len+8(FP), CX 52 ANDQ $0xffffffffffffffc0, CX 53 CMPQ AX, CX 54 JEQ check_partial_load 55 56 loop: 57 MOVQ buf+0(FP), DI 58 VMOVDQU (DI)(AX*1), Y8 // load low 32-bytes 59 VMOVDQU 0x20(DI)(AX*1), Y9 // load high 32-bytes 60 ADDQ $0x40, AX 61 62 loop_after_load: 63 PUSHQ CX 64 PUSHQ AX 65 66 MOVQ p3+16(FP), DX 67 CALL ·__find_odd_backslash_sequences(SB) 68 69 MOVQ AX, DX // odd_ends + 16 70 MOVQ prev_iter_inside_quote+24(FP), CX 71 MOVQ quote_bits+32(FP), R8 72 MOVQ error_mask+40(FP), R9 73 74 CALL ·__find_quote_mask_and_bits(SB) 75 PUSHQ AX // MOVQ AX, quote_mask + 64 76 77 MOVQ whitespace+48(FP), DX 78 MOVQ structurals_in+56(FP), CX 79 80 CALL ·__find_whitespace_and_structurals(SB) 81 82 MOVQ structurals_in+56(FP), DI; MOVQ (DI), DI // DI = structurals 83 MOVQ whitespace+48(FP), SI; MOVQ (SI), SI // SI = whitespace 84 POPQ DX // DX = quote_mask 85 PUSHQ DX // Save again for newline determination 86 87 MOVQ quote_bits+32(FP), CX; MOVQ (CX), CX // CX = quote_bits 88 MOVQ prev_iter_ends_pseudo_pred+64(FP), R8 // R8 = &prev_iter_ends_pseudo_pred 89 90 CALL ·__finalize_structurals(SB) 91 92 POPQ DX // DX = quote_mask 93 CMPQ ndjson+112(FP), $0 94 JZ skip_ndjson_detection 95 CALL ·__find_newline_delimiters(SB) 96 ORQ BX, AX 97 98 skip_ndjson_detection: 99 MOVQ indexes+72(FP), DI 100 MOVQ index+80(FP), SI; MOVQ (SI), BX // BX = index 101 MOVQ carried+96(FP), R11; MOVQ (R11), DX // DX = carried 102 MOVQ position+104(FP), R12; MOVQ (R12), R10 // R10 = position 103 CALL ·__flatten_bits_incremental(SB) 104 MOVQ BX, (SI) // *index = BX 105 MOVQ DX, (R11) // *carried = DX 106 MOVQ R10, (R12) // *position = R10 107 108 POPQ AX 109 POPQ CX 110 111 CMPQ BX, indexes_len+88(FP) 112 JGE done 113 114 CMPQ AX, CX 115 JLT loop 116 117 // Check if AX is not aligned on a 64-byte boundary, this signals the last (partial) iteration 118 MOVQ AX, BX 119 ANDQ $0x3f, BX 120 CMPQ BX, $0 121 JNE done 122 123 check_partial_load: 124 MOVQ len+8(FP), CX 125 ANDQ $0x3f, CX 126 CMPQ CX, $0 127 JNE masking // end of message is not aligned on 64-byte boundary, so mask the remaining bytes 128 129 done: 130 MOVQ AX, processed+120(FP) 131 VZEROUPPER 132 RET 133 134 masking: 135 // Do a partial load and mask out bytes after the end of the message with whitespace 136 VPBROADCASTQ WHITESPACE<>(SB), Y13 // Load padding whitespace constant 137 138 MOVQ buf+0(FP), DI 139 VMOVDQU (DI)(AX*1), Y8 // Always load low 32-bytes 140 CMPQ CX, $0x20 141 JGE masking_high 142 143 // Perform masking on low 32-bytes 144 MASK_WHITESPACE(0x1f, Y8) 145 VMOVDQU Y13, Y9 146 JMP masking_done 147 148 masking_high: 149 // Perform masking on high 32-bytes 150 VMOVDQU 0x20(DI)(AX*1), Y9 // Load high 32-bytes 151 MASK_WHITESPACE(0x3f, Y9) 152 153 masking_done: 154 ADDQ CX, AX 155 JMP loop_after_load // Rejoin loop after regular loading 156 157 DATA MASKTABLE<>+0x000(SB)/8, $0xffffffffffffffff 158 DATA MASKTABLE<>+0x008(SB)/8, $0xffffffffffffffff 159 DATA MASKTABLE<>+0x010(SB)/8, $0xffffffffffffffff 160 DATA MASKTABLE<>+0x018(SB)/8, $0x00ffffffffffffff 161 DATA MASKTABLE<>+0x020(SB)/8, $0x0000000000000000 162 DATA MASKTABLE<>+0x028(SB)/8, $0x0000000000000000 163 DATA MASKTABLE<>+0x030(SB)/8, $0x0000000000000000 164 DATA MASKTABLE<>+0x038(SB)/8, $0x0000000000000000 165 GLOBL MASKTABLE<>(SB), 8, $64 166 167 DATA WHITESPACE<>+0x000(SB)/8, $0x2020202020202020 168 GLOBL WHITESPACE<>(SB), 8, $8