github.com/minio/simdjson-go@v0.4.6-0.20231116094823-04d21cddf993/stage1_find_marks_amd64_test.go (about)

     1  //go:build !noasm && !appengine && gc
     2  // +build !noasm,!appengine,gc
     3  
     4  /*
     5   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     6   *
     7   * Licensed under the Apache License, Version 2.0 (the "License");
     8   * you may not use this file except in compliance with the License.
     9   * You may obtain a copy of the License at
    10   *
    11   *     http://www.apache.org/licenses/LICENSE-2.0
    12   *
    13   * Unless required by applicable law or agreed to in writing, software
    14   * distributed under the License is distributed on an "AS IS" BASIS,
    15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16   * See the License for the specific language governing permissions and
    17   * limitations under the License.
    18   */
    19  
    20  package simdjson
    21  
    22  import (
    23  	"fmt"
    24  	"math/bits"
    25  	"strings"
    26  	"testing"
    27  )
    28  
    29  func TestStage1FindMarks(t *testing.T) {
    30  	if !SupportedCPU() {
    31  		t.SkipNow()
    32  	}
    33  	testCases := []struct {
    34  		quoted                string
    35  		structurals           string
    36  		whitespace            string
    37  		structurals_finalized string
    38  	}{
    39  		{
    40  			// {"Image":{"Width":800,"Height":600,"Title":"View from 15th Floor
    41  			"0111111000111111000000111111100000011111100111111111111111111111", // quoted
    42  			"1000000011000000010001000000001000100000001000000000000000000000", // structurals
    43  			"0000000000000000000000000000000000000000000000001000010000100000", // whitespace
    44  			"1100000011100000011001100000001100110000001100000000000000000000", // structurals_finalized
    45  		},
    46  	}
    47  
    48  	prev_iter_ends_odd_backslash := uint64(0)
    49  	odd_ends := find_odd_backslash_sequences([]byte(demo_json), &prev_iter_ends_odd_backslash)
    50  
    51  	if odd_ends != 0 {
    52  		t.Errorf("TestStage1FindMarks: got: %d want: %d", odd_ends, 0)
    53  	}
    54  
    55  	// detect insides of quote pairs ("quote_mask") and also our quote_bits themselves
    56  	quote_bits := uint64(0)
    57  	prev_iter_inside_quote, error_mask := uint64(0), uint64(0)
    58  	quote_mask := find_quote_mask_and_bits([]byte(demo_json), odd_ends, &prev_iter_inside_quote, &quote_bits, &error_mask)
    59  	quoted := fmt.Sprintf("%064b", bits.Reverse64(quote_mask))
    60  	if quoted != testCases[0].quoted {
    61  		t.Errorf("TestStage1FindMarks: got: %s want: %s", quoted, testCases[0].quoted)
    62  	}
    63  
    64  	structurals_mask := uint64(0)
    65  	whitespace_mask := uint64(0)
    66  	find_whitespace_and_structurals([]byte(demo_json), &whitespace_mask, &structurals_mask)
    67  
    68  	structurals := fmt.Sprintf("%064b", bits.Reverse64(structurals_mask))
    69  	if structurals != testCases[0].structurals {
    70  		t.Errorf("TestStage1FindMarks: got: %s want: %s", structurals, testCases[0].structurals)
    71  	}
    72  	whitespace := fmt.Sprintf("%064b", bits.Reverse64(whitespace_mask))
    73  	if whitespace != testCases[0].whitespace {
    74  		t.Errorf("TestStage1FindMarks: got: %s want: %s", whitespace, testCases[0].whitespace)
    75  	}
    76  
    77  	// fixup structurals to reflect quotes and add pseudo-structural characters
    78  	prev_iter_ends_pseudo_pred := uint64(0)
    79  	structurals_mask = finalize_structurals(structurals_mask, whitespace_mask, quote_mask, quote_bits, &prev_iter_ends_pseudo_pred)
    80  
    81  	structural_finalized := fmt.Sprintf("%064b", bits.Reverse64(structurals_mask))
    82  	if structural_finalized != testCases[0].structurals_finalized {
    83  		t.Errorf("TestStage1FindMarks: got: %s want: %s", structural_finalized, testCases[0].structurals_finalized)
    84  	}
    85  }
    86  
    87  func TestFindStructuralIndices(t *testing.T) {
    88  	if !SupportedCPU() {
    89  		t.SkipNow()
    90  	}
    91  	parsed := []string{
    92  		`{"Image":{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    93  		` "Image":{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    94  		`        :{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    95  		`         {"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    96  		`          "Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    97  		`                 :800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    98  		`                  800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    99  		`                     ,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   100  		`                      "Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   101  		`                              :600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   102  		`                               600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   103  		`                                  ,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   104  		`                                   "Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   105  		`                                          :"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   106  		`                                           "View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   107  		`                                                                 ,"Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   108  		`                                                                  "Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   109  		`                                                                             :{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   110  		`                                                                              {"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   111  		`                                                                               "Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   112  		`                                                                                    :"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   113  		`                                                                                     "http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   114  		`                                                                                                                             ,"Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   115  		`                                                                                                                              "Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   116  		`                                                                                                                                      :125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   117  		`                                                                                                                                       125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   118  		`                                                                                                                                          ,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   119  		`                                                                                                                                           "Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   120  		`                                                                                                                                                  :100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   121  		`                                                                                                                                                   100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   122  		`                                                                                                                                                      },"Animated":false,"IDs":[116,943,234,38793]}}`,
   123  		`                                                                                                                                                       ,"Animated":false,"IDs":[116,943,234,38793]}}`,
   124  		`                                                                                                                                                        "Animated":false,"IDs":[116,943,234,38793]}}`,
   125  		`                                                                                                                                                                  :false,"IDs":[116,943,234,38793]}}`,
   126  		`                                                                                                                                                                   false,"IDs":[116,943,234,38793]}}`,
   127  		`                                                                                                                                                                        ,"IDs":[116,943,234,38793]}}`,
   128  		`                                                                                                                                                                         "IDs":[116,943,234,38793]}}`,
   129  		`                                                                                                                                                                              :[116,943,234,38793]}}`,
   130  		`                                                                                                                                                                               [116,943,234,38793]}}`,
   131  		`                                                                                                                                                                                116,943,234,38793]}}`,
   132  		`                                                                                                                                                                                   ,943,234,38793]}}`,
   133  		`                                                                                                                                                                                    943,234,38793]}}`,
   134  		`                                                                                                                                                                                       ,234,38793]}}`,
   135  		`                                                                                                                                                                                        234,38793]}}`,
   136  		`                                                                                                                                                                                           ,38793]}}`,
   137  		`                                                                                                                                                                                            38793]}}`,
   138  		`                                                                                                                                                                                                 ]}}`,
   139  		`                                                                                                                                                                                                  }}`,
   140  		`                                                                                                                                                                                                   }`,
   141  	}
   142  
   143  	pj := internalParsedJson{}
   144  	pj.indexChans = make(chan indexChan, 16)
   145  
   146  	// No need to spawn go-routine since the channel is large enough
   147  	pj.Message = []byte(demo_json)
   148  	pj.findStructuralIndices()
   149  
   150  	ipos, pos := 0, ^uint64(0)
   151  	for ic := range pj.indexChans {
   152  		if ic.index == -1 {
   153  			break
   154  		}
   155  		for j := 0; j < ic.length; j++ {
   156  			pos += uint64((*ic.indexes)[j])
   157  			result := fmt.Sprintf("%s%s", strings.Repeat(" ", int(pos)), demo_json[pos:])
   158  			// fmt.Printf("`%s`,\n", result)
   159  			if result != parsed[ipos] {
   160  				t.Errorf("TestFindStructuralBits: got: %s want: %s", result, parsed[ipos])
   161  			}
   162  			ipos++
   163  		}
   164  	}
   165  }
   166  
   167  func BenchmarkStage1(b *testing.B) {
   168  	if !SupportedCPU() {
   169  		b.SkipNow()
   170  	}
   171  	msg := loadCompressed(b, "twitter")
   172  
   173  	b.SetBytes(int64(len(msg)))
   174  	b.ReportAllocs()
   175  	b.ResetTimer()
   176  
   177  	pj := internalParsedJson{}
   178  	pj.Message = msg
   179  	for i := 0; i < b.N; i++ {
   180  		// Create new channel (large enough so we won't block)
   181  		pj.indexChans = make(chan indexChan, 128)
   182  		pj.findStructuralIndices()
   183  	}
   184  }