github.com/dgraph-io/simdjson-go@v0.3.0/stage1_find_marks_amd64_test.go (about) 1 //+build !noasm 2 //+build !appengine 3 //+build gc 4 5 /* 6 * MinIO Cloud Storage, (C) 2020 MinIO, Inc. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package simdjson 22 23 import ( 24 "fmt" 25 "math/bits" 26 "strings" 27 "testing" 28 ) 29 30 func TestStage1FindMarks(t *testing.T) { 31 32 testCases := []struct { 33 quoted string 34 structurals string 35 whitespace string 36 structurals_finalized string 37 }{ 38 { 39 // {"Image":{"Width":800,"Height":600,"Title":"View from 15th Floor 40 "0111111000111111000000111111100000011111100111111111111111111111", // quoted 41 "1000000011000000010001000000001000100000001000000000000000000000", // structurals 42 "0000000000000000000000000000000000000000000000001000010000100000", // whitespace 43 "1100000011100000011001100000001100110000001100000000000000000000", // structurals_finalized 44 }, 45 } 46 47 prev_iter_ends_odd_backslash := uint64(0) 48 odd_ends := find_odd_backslash_sequences([]byte(demo_json), &prev_iter_ends_odd_backslash) 49 50 if odd_ends != 0 { 51 t.Errorf("TestStage1FindMarks: got: %d want: %d", odd_ends, 0) 52 } 53 54 // detect insides of quote pairs ("quote_mask") and also our quote_bits themselves 55 quote_bits := uint64(0) 56 prev_iter_inside_quote, error_mask := uint64(0), uint64(0) 57 quote_mask := find_quote_mask_and_bits([]byte(demo_json), odd_ends, &prev_iter_inside_quote, "e_bits, &error_mask) 58 quoted := fmt.Sprintf("%064b", bits.Reverse64(quote_mask)) 59 if quoted != testCases[0].quoted { 60 t.Errorf("TestStage1FindMarks: got: %s want: %s", quoted, testCases[0].quoted) 61 } 62 63 structurals_mask := uint64(0) 64 whitespace_mask := uint64(0) 65 find_whitespace_and_structurals([]byte(demo_json), &whitespace_mask, &structurals_mask) 66 67 structurals := fmt.Sprintf("%064b", bits.Reverse64(structurals_mask)) 68 if structurals != testCases[0].structurals { 69 t.Errorf("TestStage1FindMarks: got: %s want: %s", structurals, testCases[0].structurals) 70 } 71 whitespace := fmt.Sprintf("%064b", bits.Reverse64(whitespace_mask)) 72 if whitespace != testCases[0].whitespace { 73 t.Errorf("TestStage1FindMarks: got: %s want: %s", whitespace, testCases[0].whitespace) 74 } 75 76 // fixup structurals to reflect quotes and add pseudo-structural characters 77 prev_iter_ends_pseudo_pred := uint64(0) 78 structurals_mask = finalize_structurals(structurals_mask, whitespace_mask, quote_mask, quote_bits, &prev_iter_ends_pseudo_pred) 79 80 structural_finalized := fmt.Sprintf("%064b", bits.Reverse64(structurals_mask)) 81 if structural_finalized != testCases[0].structurals_finalized { 82 t.Errorf("TestStage1FindMarks: got: %s want: %s", structural_finalized, testCases[0].structurals_finalized) 83 } 84 } 85 86 func TestFindStructuralIndices(t *testing.T) { 87 88 parsed := []string{ 89 `{"Image":{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 90 ` "Image":{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 91 ` :{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 92 ` {"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 93 ` "Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 94 ` :800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 95 ` 800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 96 ` ,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 97 ` "Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 98 ` :600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 99 ` 600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 100 ` ,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 101 ` "Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 102 ` :"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 103 ` "View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 104 ` ,"Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 105 ` "Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 106 ` :{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 107 ` {"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 108 ` "Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 109 ` :"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 110 ` "http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 111 ` ,"Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 112 ` "Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 113 ` :125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 114 ` 125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 115 ` ,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 116 ` "Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`, 117 ` :100},"Animated":false,"IDs":[116,943,234,38793]}}`, 118 ` 100},"Animated":false,"IDs":[116,943,234,38793]}}`, 119 ` },"Animated":false,"IDs":[116,943,234,38793]}}`, 120 ` ,"Animated":false,"IDs":[116,943,234,38793]}}`, 121 ` "Animated":false,"IDs":[116,943,234,38793]}}`, 122 ` :false,"IDs":[116,943,234,38793]}}`, 123 ` false,"IDs":[116,943,234,38793]}}`, 124 ` ,"IDs":[116,943,234,38793]}}`, 125 ` "IDs":[116,943,234,38793]}}`, 126 ` :[116,943,234,38793]}}`, 127 ` [116,943,234,38793]}}`, 128 ` 116,943,234,38793]}}`, 129 ` ,943,234,38793]}}`, 130 ` 943,234,38793]}}`, 131 ` ,234,38793]}}`, 132 ` 234,38793]}}`, 133 ` ,38793]}}`, 134 ` 38793]}}`, 135 ` ]}}`, 136 ` }}`, 137 ` }`, 138 } 139 140 pj := internalParsedJson{} 141 pj.indexChans = make(chan indexChan, 16) 142 143 // No need to spawn go-routine since the channel is large enough 144 findStructuralIndices([]byte(demo_json), &pj) 145 146 ipos, pos := 0, ^uint64(0) 147 for ic := range pj.indexChans { 148 for j := 0; j < ic.length; j++ { 149 pos += uint64((*ic.indexes)[j]) 150 result := fmt.Sprintf("%s%s", strings.Repeat(" ", int(pos)), demo_json[pos:]) 151 // fmt.Printf("`%s`,\n", result) 152 if result != parsed[ipos] { 153 t.Errorf("TestFindStructuralBits: got: %s want: %s", result, parsed[ipos]) 154 } 155 ipos++ 156 } 157 } 158 } 159 160 func BenchmarkStage1(b *testing.B) { 161 msg := loadCompressed(b, "twitter") 162 163 b.SetBytes(int64(len(msg))) 164 b.ReportAllocs() 165 b.ResetTimer() 166 167 pj := internalParsedJson{} 168 169 for i := 0; i < b.N; i++ { 170 // Create new channel (large enough so we won't block) 171 pj.indexChans = make(chan indexChan, 128) 172 findStructuralIndices([]byte(msg), &pj) 173 } 174 }