github.com/dgraph-io/simdjson-go@v0.3.0/stage1_find_marks_amd64_test.go (about)

     1  //+build !noasm
     2  //+build !appengine
     3  //+build gc
     4  
     5  /*
     6   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     7   *
     8   * Licensed under the Apache License, Version 2.0 (the "License");
     9   * you may not use this file except in compliance with the License.
    10   * You may obtain a copy of the License at
    11   *
    12   *     http://www.apache.org/licenses/LICENSE-2.0
    13   *
    14   * Unless required by applicable law or agreed to in writing, software
    15   * distributed under the License is distributed on an "AS IS" BASIS,
    16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    17   * See the License for the specific language governing permissions and
    18   * limitations under the License.
    19   */
    20  
    21  package simdjson
    22  
    23  import (
    24  	"fmt"
    25  	"math/bits"
    26  	"strings"
    27  	"testing"
    28  )
    29  
    30  func TestStage1FindMarks(t *testing.T) {
    31  
    32  	testCases := []struct {
    33  		quoted                string
    34  		structurals           string
    35  		whitespace            string
    36  		structurals_finalized string
    37  	}{
    38  		{
    39  			// {"Image":{"Width":800,"Height":600,"Title":"View from 15th Floor
    40  			"0111111000111111000000111111100000011111100111111111111111111111", // quoted
    41  			"1000000011000000010001000000001000100000001000000000000000000000", // structurals
    42  			"0000000000000000000000000000000000000000000000001000010000100000", // whitespace
    43  			"1100000011100000011001100000001100110000001100000000000000000000", // structurals_finalized
    44  		},
    45  	}
    46  
    47  	prev_iter_ends_odd_backslash := uint64(0)
    48  	odd_ends := find_odd_backslash_sequences([]byte(demo_json), &prev_iter_ends_odd_backslash)
    49  
    50  	if odd_ends != 0 {
    51  		t.Errorf("TestStage1FindMarks: got: %d want: %d", odd_ends, 0)
    52  	}
    53  
    54  	// detect insides of quote pairs ("quote_mask") and also our quote_bits themselves
    55  	quote_bits := uint64(0)
    56  	prev_iter_inside_quote, error_mask := uint64(0), uint64(0)
    57  	quote_mask := find_quote_mask_and_bits([]byte(demo_json), odd_ends, &prev_iter_inside_quote, &quote_bits, &error_mask)
    58  	quoted := fmt.Sprintf("%064b", bits.Reverse64(quote_mask))
    59  	if quoted != testCases[0].quoted {
    60  		t.Errorf("TestStage1FindMarks: got: %s want: %s", quoted, testCases[0].quoted)
    61  	}
    62  
    63  	structurals_mask := uint64(0)
    64  	whitespace_mask := uint64(0)
    65  	find_whitespace_and_structurals([]byte(demo_json), &whitespace_mask, &structurals_mask)
    66  
    67  	structurals := fmt.Sprintf("%064b", bits.Reverse64(structurals_mask))
    68  	if structurals != testCases[0].structurals {
    69  		t.Errorf("TestStage1FindMarks: got: %s want: %s", structurals, testCases[0].structurals)
    70  	}
    71  	whitespace := fmt.Sprintf("%064b", bits.Reverse64(whitespace_mask))
    72  	if whitespace != testCases[0].whitespace {
    73  		t.Errorf("TestStage1FindMarks: got: %s want: %s", whitespace, testCases[0].whitespace)
    74  	}
    75  
    76  	// fixup structurals to reflect quotes and add pseudo-structural characters
    77  	prev_iter_ends_pseudo_pred := uint64(0)
    78  	structurals_mask = finalize_structurals(structurals_mask, whitespace_mask, quote_mask, quote_bits, &prev_iter_ends_pseudo_pred)
    79  
    80  	structural_finalized := fmt.Sprintf("%064b", bits.Reverse64(structurals_mask))
    81  	if structural_finalized != testCases[0].structurals_finalized {
    82  		t.Errorf("TestStage1FindMarks: got: %s want: %s", structural_finalized, testCases[0].structurals_finalized)
    83  	}
    84  }
    85  
    86  func TestFindStructuralIndices(t *testing.T) {
    87  
    88  	parsed := []string{
    89  		`{"Image":{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    90  		` "Image":{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    91  		`        :{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    92  		`         {"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    93  		`          "Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    94  		`                 :800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    95  		`                  800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    96  		`                     ,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    97  		`                      "Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    98  		`                              :600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
    99  		`                               600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   100  		`                                  ,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   101  		`                                   "Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   102  		`                                          :"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   103  		`                                           "View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   104  		`                                                                 ,"Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   105  		`                                                                  "Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   106  		`                                                                             :{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   107  		`                                                                              {"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   108  		`                                                                               "Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   109  		`                                                                                    :"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   110  		`                                                                                     "http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   111  		`                                                                                                                             ,"Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   112  		`                                                                                                                              "Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   113  		`                                                                                                                                      :125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   114  		`                                                                                                                                       125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   115  		`                                                                                                                                          ,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   116  		`                                                                                                                                           "Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   117  		`                                                                                                                                                  :100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   118  		`                                                                                                                                                   100},"Animated":false,"IDs":[116,943,234,38793]}}`,
   119  		`                                                                                                                                                      },"Animated":false,"IDs":[116,943,234,38793]}}`,
   120  		`                                                                                                                                                       ,"Animated":false,"IDs":[116,943,234,38793]}}`,
   121  		`                                                                                                                                                        "Animated":false,"IDs":[116,943,234,38793]}}`,
   122  		`                                                                                                                                                                  :false,"IDs":[116,943,234,38793]}}`,
   123  		`                                                                                                                                                                   false,"IDs":[116,943,234,38793]}}`,
   124  		`                                                                                                                                                                        ,"IDs":[116,943,234,38793]}}`,
   125  		`                                                                                                                                                                         "IDs":[116,943,234,38793]}}`,
   126  		`                                                                                                                                                                              :[116,943,234,38793]}}`,
   127  		`                                                                                                                                                                               [116,943,234,38793]}}`,
   128  		`                                                                                                                                                                                116,943,234,38793]}}`,
   129  		`                                                                                                                                                                                   ,943,234,38793]}}`,
   130  		`                                                                                                                                                                                    943,234,38793]}}`,
   131  		`                                                                                                                                                                                       ,234,38793]}}`,
   132  		`                                                                                                                                                                                        234,38793]}}`,
   133  		`                                                                                                                                                                                           ,38793]}}`,
   134  		`                                                                                                                                                                                            38793]}}`,
   135  		`                                                                                                                                                                                                 ]}}`,
   136  		`                                                                                                                                                                                                  }}`,
   137  		`                                                                                                                                                                                                   }`,
   138  	}
   139  
   140  	pj := internalParsedJson{}
   141  	pj.indexChans = make(chan indexChan, 16)
   142  
   143  	// No need to spawn go-routine since the channel is large enough
   144  	findStructuralIndices([]byte(demo_json), &pj)
   145  
   146  	ipos, pos := 0, ^uint64(0)
   147  	for ic := range pj.indexChans {
   148  		for j := 0; j < ic.length; j++ {
   149  			pos += uint64((*ic.indexes)[j])
   150  			result := fmt.Sprintf("%s%s", strings.Repeat(" ", int(pos)), demo_json[pos:])
   151  			// fmt.Printf("`%s`,\n", result)
   152  			if result != parsed[ipos] {
   153  				t.Errorf("TestFindStructuralBits: got: %s want: %s", result, parsed[ipos])
   154  			}
   155  			ipos++
   156  		}
   157  	}
   158  }
   159  
   160  func BenchmarkStage1(b *testing.B) {
   161  	msg := loadCompressed(b, "twitter")
   162  
   163  	b.SetBytes(int64(len(msg)))
   164  	b.ReportAllocs()
   165  	b.ResetTimer()
   166  
   167  	pj := internalParsedJson{}
   168  
   169  	for i := 0; i < b.N; i++ {
   170  		// Create new channel (large enough so we won't block)
   171  		pj.indexChans = make(chan indexChan, 128)
   172  		findStructuralIndices([]byte(msg), &pj)
   173  	}
   174  }