github.com/dgraph-io/simdjson-go@v0.3.0/parsed_json_test.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package simdjson
    18  
    19  import (
    20  	"encoding/binary"
    21  	"encoding/json"
    22  	"io/ioutil"
    23  	"path/filepath"
    24  	"testing"
    25  
    26  	"github.com/klauspost/compress/zstd"
    27  )
    28  
    29  const demo_json = `{"Image":{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`
    30  
    31  type tester interface {
    32  	Fatal(args ...interface{})
    33  }
    34  
    35  func loadCompressed(t tester, file string) (ref []byte) {
    36  	dec, err := zstd.NewReader(nil)
    37  	if err != nil {
    38  		t.Fatal(err)
    39  	}
    40  	ref, err = ioutil.ReadFile(filepath.Join("testdata", file+".json.zst"))
    41  	if err != nil {
    42  		t.Fatal(err)
    43  	}
    44  	ref, err = dec.DecodeAll(ref, nil)
    45  	if err != nil {
    46  		t.Fatal(err)
    47  	}
    48  
    49  	return ref
    50  }
    51  
    52  var testCases = []struct {
    53  	name  string
    54  	array bool
    55  }{
    56  	{
    57  		name: "apache_builds",
    58  	},
    59  	{
    60  		name: "canada",
    61  	},
    62  	{
    63  		name: "citm_catalog",
    64  	},
    65  	{
    66  		name:  "github_events",
    67  		array: true,
    68  	},
    69  	{
    70  		name: "gsoc-2018",
    71  	},
    72  	{
    73  		name: "instruments",
    74  	},
    75  	{
    76  		name:  "numbers",
    77  		array: true,
    78  	},
    79  	{
    80  		name: "marine_ik",
    81  	},
    82  	{
    83  		name: "mesh",
    84  	},
    85  	{
    86  		name: "mesh.pretty",
    87  	},
    88  	{
    89  		name: "twitterescaped",
    90  	},
    91  	{
    92  		name: "twitter",
    93  	},
    94  	{
    95  		name: "random",
    96  	},
    97  	{
    98  		name: "update-center",
    99  	},
   100  }
   101  
   102  func bytesToUint64(buf []byte) []uint64 {
   103  
   104  	tape := make([]uint64, len(buf)/8)
   105  	for i := range tape {
   106  		tape[i] = binary.LittleEndian.Uint64(buf[i*8:])
   107  	}
   108  	return tape
   109  }
   110  
   111  func testCTapeCtoGoTapeCompare(t *testing.T, ctape []uint64, csbuf []byte, pj internalParsedJson) {
   112  
   113  	gotape := pj.Tape
   114  
   115  	cindex, goindex := 0, 0
   116  	for goindex < len(gotape) {
   117  		if cindex == len(ctape) {
   118  			t.Errorf("TestCTapeCtoGoTapeCompare: unexpected, ctape at end, but gotape not yet")
   119  			break
   120  		}
   121  		cval, goval := ctape[cindex], gotape[goindex]
   122  
   123  		// Make sure the type is the same between the C and Go version
   124  		if cval>>56 != goval>>56 {
   125  			t.Errorf("TestCTapeCtoGoTapeCompare: got: %02x want: %02x", goval>>56, cval>>56)
   126  		}
   127  
   128  		ntype := Tag(goval >> 56)
   129  		switch ntype {
   130  		case TagRoot, TagObjectStart, TagObjectEnd, TagArrayStart, TagArrayEnd:
   131  			cindex++
   132  			goindex++
   133  
   134  		case TagString:
   135  			cpayload := cval & JSONVALUEMASK
   136  			cstrlen := binary.LittleEndian.Uint32(csbuf[cpayload : cpayload+4])
   137  			cstr := string(csbuf[cpayload+4 : cpayload+4+uint64(cstrlen)])
   138  			gostr, _ := pj.stringAt(goval&JSONVALUEMASK, gotape[goindex+1])
   139  			if cstr != gostr {
   140  				t.Errorf("TestCTapeCtoGoTapeCompare: got: %s want: %s", gostr, cstr)
   141  			}
   142  			cindex++
   143  			goindex += 2
   144  
   145  		case TagNull, TagBoolTrue, TagBoolFalse:
   146  			cindex++
   147  			goindex++
   148  
   149  		case TagInteger, TagFloat:
   150  			if ctape[cindex+1] != gotape[goindex+1] {
   151  				if ntype != TagFloat {
   152  					t.Errorf("TestCTapeCtoGoTapeCompare: got: %016x want: %016x", gotape[goindex+1], ctape[cindex+1])
   153  
   154  				}
   155  			}
   156  			cindex += 2
   157  			goindex += 2
   158  
   159  		default:
   160  			t.Errorf("TestCTapeCtoGoTapeCompare: unexpected token, got: %02x", ntype)
   161  		}
   162  	}
   163  
   164  	if cindex != len(ctape) {
   165  		t.Errorf("TestCTapeCtoGoTapeCompare: got: %d want: %d", cindex, len(ctape))
   166  	}
   167  }
   168  
   169  func BenchmarkIter_MarshalJSONBuffer(b *testing.B) {
   170  	if !SupportedCPU() {
   171  		b.SkipNow()
   172  	}
   173  	for _, tt := range testCases {
   174  		b.Run(tt.name, func(b *testing.B) {
   175  			ref := loadCompressed(b, tt.name)
   176  			pj, err := Parse(ref, nil)
   177  			if err != nil {
   178  				b.Fatal(err)
   179  			}
   180  			iter := pj.Iter()
   181  			cpy := iter
   182  			output, err := cpy.MarshalJSON()
   183  			if err != nil {
   184  				b.Fatal(err)
   185  			}
   186  			b.SetBytes(int64(len(output)))
   187  			b.ReportAllocs()
   188  			b.ResetTimer()
   189  			for i := 0; i < b.N; i++ {
   190  				cpy := iter
   191  				output, err = cpy.MarshalJSONBuffer(output[:0])
   192  				if err != nil {
   193  					b.Fatal(err)
   194  				}
   195  			}
   196  		})
   197  	}
   198  }
   199  
   200  func BenchmarkGoMarshalJSON(b *testing.B) {
   201  	for _, tt := range testCases {
   202  		b.Run(tt.name, func(b *testing.B) {
   203  			ref := loadCompressed(b, tt.name)
   204  			var m interface{}
   205  			m = map[string]interface{}{}
   206  			if tt.array {
   207  				m = []interface{}{}
   208  			}
   209  			err := json.Unmarshal(ref, &m)
   210  			if err != nil {
   211  				b.Fatal(err)
   212  			}
   213  			output, err := json.Marshal(m)
   214  			if err != nil {
   215  				b.Fatal(err)
   216  			}
   217  			b.SetBytes(int64(len(output)))
   218  			b.ReportAllocs()
   219  			b.ResetTimer()
   220  			for i := 0; i < b.N; i++ {
   221  				output, err = json.Marshal(m)
   222  				if err != nil {
   223  					b.Fatal(err)
   224  				}
   225  			}
   226  		})
   227  	}
   228  }
   229  
   230  func TestPrintJson(t *testing.T) {
   231  	if !SupportedCPU() {
   232  		t.SkipNow()
   233  	}
   234  	msg := []byte(demo_json)
   235  	expected := `{"Image":{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}`
   236  
   237  	pj, err := Parse(msg, nil)
   238  
   239  	if err != nil {
   240  		t.Errorf("parseMessage failed\n")
   241  	}
   242  
   243  	iter := pj.Iter()
   244  	out, err := iter.MarshalJSON()
   245  	if err != nil {
   246  		t.Fatal(err)
   247  	}
   248  
   249  	if string(out) != expected {
   250  		t.Errorf("TestPrintJson: got: %s want: %s", out, expected)
   251  	}
   252  }