github.com/minio/simdjson-go@v0.4.6-0.20231116094823-04d21cddf993/stage1_find_marks_amd64_test.go (about) 1 //go:build !noasm && !appengine && gc 2 // +build !noasm,!appengine,gc 3 4 /* 5 * MinIO Cloud Storage, (C) 2020 MinIO, Inc. 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20 package simdjson 21 22 import ( 23 "fmt" 24 "math/bits" 25 "strings" 26 "testing" 27 ) 28 29 func TestStage1FindMarks(t *testing.T) { 30 if !SupportedCPU() { 31 t.SkipNow() 32 } 33 testCases := []struct { 34 quoted string 35 structurals string 36 whitespace string 37 structurals_finalized string 38 }{ 39 { 40 // {"Image":{"Width":800,"Height":600,"Title":"View from 15th Floor 41 "0111111000111111000000111111100000011111100111111111111111111111", // quoted 42 "1000000011000000010001000000001000100000001000000000000000000000", // structurals 43 "0000000000000000000000000000000000000000000000001000010000100000", // whitespace 44 "1100000011100000011001100000001100110000001100000000000000000000", // structurals_finalized 45 }, 46 } 47 48 prev_iter_ends_odd_backslash := uint64(0) 49 odd_ends := find_odd_backslash_sequences([]byte(demo_json), &prev_iter_ends_odd_backslash) 50 51 if odd_ends != 0 { 52 t.Errorf("TestStage1FindMarks: got: %d want: %d", odd_ends, 0) 53 } 54 55 // detect insides of quote pairs ("quote_mask") and also our quote_bits themselves 56 quote_bits := uint64(0) 57 prev_iter_inside_quote, error_mask := uint64(0), uint64(0) 58 quote_mask := find_quote_mask_and_bits([]byte(demo_json), odd_ends, &prev_iter_inside_quote, "e_bits, &error_mask) 59 quoted := fmt.Sprintf("%064b", bits.Reverse64(quote_mask)) 60 if quoted != testCases[0].quoted { 61 t.Errorf("TestStage1FindMarks: got: %s want: %s", quoted, testCases[0].quoted) 62 } 63 64 structurals_mask := uint64(0) 65 whitespace_mask := uint64(0) 66 find_whitespace_and_structurals([]byte(demo_json), &whitespace_mask, &structurals_mask) 67 68 structurals := fmt.Sprintf("%064b", bits.Reverse64(structurals_mask)) 69 if structurals != testCases[0].structurals { 70 t.Errorf("TestStage1FindMarks: got: %s want: %s", structurals, testCases[0].structurals) 71 } 72 whitespace := fmt.Sprintf("%064b", bits.Reverse64(whitespace_mask)) 73 if whitespace != testCases[0].whitespace { 74 t.Errorf("TestStage1FindMarks: got: %s want: %s", whitespace, testCases[0].whitespace) 75 } 76 77 // fixup structurals to reflect quotes and add pseudo-structural characters 78 prev_iter_ends_pseudo_pred := uint64(0) 79 structurals_mask = finalize_structurals(structurals_mask, whitespace_mask, quote_mask, quote_bits, &prev_iter_ends_pseudo_pred) 80 81 structural_finalized := fmt.Sprintf("%064b", bits.Reverse64(structurals_mask)) 82 if structural_finalized != testCases[0].structurals_finalized { 83 t.Errorf("TestStage1FindMarks: got: %s want: %s", structural_finalized, testCases[0].structurals_finalized) 84 } 85 } 86 87 func TestFindStructuralIndices(t *testing.T) { 88 if !SupportedCPU() { 89 t.SkipNow() 90 } 91 parsed := []string{ 92 `{"Image":{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 93 ` "Image":{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 94 ` :{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 95 ` {"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 96 ` "Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 97 ` :800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 98 ` 800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 99 ` ,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 100 ` "Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 101 ` :600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 102 ` 600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 103 ` ,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 104 ` "Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 105 ` :"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 106 ` "View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 107 ` ,"Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 108 ` "Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 109 ` :{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 110 ` {"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 111 ` "Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 112 ` :"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 113 ` "http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 114 ` ,"Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 115 ` "Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 116 ` :125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 117 ` 125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 118 ` ,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 119 ` "Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 120 ` :100},"Animated":false,"IDs":[116,943,234,38793]}}`, 121 ` 100},"Animated":false,"IDs":[116,943,234,38793]}}`, 122 ` },"Animated":false,"IDs":[116,943,234,38793]}}`, 123 ` ,"Animated":false,"IDs":[116,943,234,38793]}}`, 124 ` "Animated":false,"IDs":[116,943,234,38793]}}`, 125 ` :false,"IDs":[116,943,234,38793]}}`, 126 ` false,"IDs":[116,943,234,38793]}}`, 127 ` ,"IDs":[116,943,234,38793]}}`, 128 ` "IDs":[116,943,234,38793]}}`, 129 ` :[116,943,234,38793]}}`, 130 ` [116,943,234,38793]}}`, 131 ` 116,943,234,38793]}}`, 132 ` ,943,234,38793]}}`, 133 ` 943,234,38793]}}`, 134 ` ,234,38793]}}`, 135 ` 234,38793]}}`, 136 ` ,38793]}}`, 137 ` 38793]}}`, 138 ` ]}}`, 139 ` }}`, 140 ` }`, 141 } 142 143 pj := internalParsedJson{} 144 pj.indexChans = make(chan indexChan, 16) 145 146 // No need to spawn go-routine since the channel is large enough 147 pj.Message = []byte(demo_json) 148 pj.findStructuralIndices() 149 150 ipos, pos := 0, ^uint64(0) 151 for ic := range pj.indexChans { 152 if ic.index == -1 { 153 break 154 } 155 for j := 0; j < ic.length; j++ { 156 pos += uint64((*ic.indexes)[j]) 157 result := fmt.Sprintf("%s%s", strings.Repeat(" ", int(pos)), demo_json[pos:]) 158 // fmt.Printf("`%s`,\n", result) 159 if result != parsed[ipos] { 160 t.Errorf("TestFindStructuralBits: got: %s want: %s", result, parsed[ipos]) 161 } 162 ipos++ 163 } 164 } 165 } 166 167 func BenchmarkStage1(b *testing.B) { 168 if !SupportedCPU() { 169 b.SkipNow() 170 } 171 msg := loadCompressed(b, "twitter") 172 173 b.SetBytes(int64(len(msg))) 174 b.ReportAllocs() 175 b.ResetTimer() 176 177 pj := internalParsedJson{} 178 pj.Message = msg 179 for i := 0; i < b.N; i++ { 180 // Create new channel (large enough so we won't block) 181 pj.indexChans = make(chan indexChan, 128) 182 pj.findStructuralIndices() 183 } 184 }