github.com/dgraph-io/simdjson-go@v0.3.0/stage1_find_marks_amd64.go (about) 1 //+build !noasm 2 //+build !appengine 3 //+build gc 4 5 /* 6 * MinIO Cloud Storage, (C) 2020 MinIO, Inc. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package simdjson 22 23 import ( 24 "sync/atomic" 25 26 "github.com/klauspost/cpuid/v2" 27 ) 28 29 var jsonMarkupTable = [256]bool{ 30 '{': true, 31 '}': true, 32 '[': true, 33 ']': true, 34 ',': true, 35 ':': true, 36 } 37 38 func jsonMarkup(b byte) bool { 39 return jsonMarkupTable[b] 40 } 41 42 func findStructuralIndices(buf []byte, pj *internalParsedJson) bool { 43 44 f := find_structural_bits_in_slice 45 if cpuid.CPU.Has(cpuid.AVX512F) { 46 f = find_structural_bits_in_slice_avx512 47 } 48 49 // persistent state across loop 50 // does the last iteration end with an odd-length sequence of backslashes? 51 // either 0 or 1, but a 64-bit value 52 prev_iter_ends_odd_backslash := uint64(0) 53 54 // does the previous iteration end inside a double-quote pair? 55 prev_iter_inside_quote := uint64(0) // either all zeros or all ones 56 57 // does the previous iteration end on something that is a predecessor of a 58 // pseudo-structural character - i.e. whitespace or a structural character 59 // effectively the very first char is considered to follow "whitespace" for the 60 // purposes of pseudo-structural character detection so we initialize to 1 61 prev_iter_ends_pseudo_pred := uint64(1) 62 63 error_mask := uint64(0) // for unescaped characters within strings (ASCII code points < 0x20) 64 65 indexTotal := 0 66 67 // empty bits that are carried over to the next call to flatten_bits_incremental 68 carried := uint64(0) 69 70 // absolute position into message buffer 71 position := ^uint64(0) 72 stripped_index := ^uint64(0) 73 74 for len(buf) > 0 { 75 76 index := indexChan{} 77 offset := atomic.AddUint64(&pj.buffersOffset, 1) 78 index.indexes = &pj.buffers[offset%indexSlots] 79 80 // In case last index during previous round was stripped back, put it back 81 if stripped_index != ^uint64(0) { 82 position += stripped_index 83 index.indexes[0] = uint32(stripped_index) 84 index.length = 1 85 stripped_index = ^uint64(0) 86 } 87 88 processed := f(buf[:len(buf) & ^63], &prev_iter_ends_odd_backslash, 89 &prev_iter_inside_quote, &error_mask, 90 &prev_iter_ends_pseudo_pred, 91 index.indexes, &index.length, &carried, &position, pj.ndjson) 92 93 // Check if we have at most a single iteration of 64 bytes left, tag on to previous invocation 94 if uint64(len(buf))-processed <= 64 { 95 // Process last 64 bytes in larger buffer (to safeguard against reading beyond the end of the buffer) 96 paddedBuf := [128]byte{} 97 copy(paddedBuf[:], buf[processed:]) 98 paddedBytes := uint64(len(buf)) - processed 99 processed += f(paddedBuf[:paddedBytes], &prev_iter_ends_odd_backslash, 100 &prev_iter_inside_quote, &error_mask, 101 &prev_iter_ends_pseudo_pred, 102 index.indexes, &index.length, &carried, &position, pj.ndjson) 103 } 104 105 if index.length == 0 { // No structural chars found, so error out 106 error_mask = ^uint64(0) 107 break 108 } 109 110 if uint64(len(buf)) == processed { // message processing completed? 111 // break out if either 112 // - is there an unmatched quote at the end 113 // - the ending structural char is not either a '}' (normal json) or a ']' (array style) 114 if prev_iter_inside_quote != 0 || 115 position >= uint64(len(buf)) || 116 !(buf[position] == '}' || buf[position] == ']') { 117 error_mask = ^uint64(0) 118 break 119 } 120 } else if !jsonMarkup(buf[position]) { 121 // There may be a dangling quote at the end of the index buffer 122 // Strip it from current index buffer and save for next round 123 stripped_index = uint64(index.indexes[index.length-1]) 124 position -= stripped_index 125 index.length -= 1 126 } 127 128 pj.indexChans <- index 129 indexTotal += index.length 130 131 buf = buf[processed:] 132 position -= processed 133 } 134 close(pj.indexChans) 135 136 // a valid JSON file cannot have zero structural indexes - we should have found something 137 return error_mask == 0 && indexTotal > 0 138 }