github.com/dgraph-io/simdjson-go@v0.3.0/parse_json_amd64.go (about)

     1  //+build !noasm
     2  //+build !appengine
     3  //+build gc
     4  
     5  /*
     6   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     7   *
     8   * Licensed under the Apache License, Version 2.0 (the "License");
     9   * you may not use this file except in compliance with the License.
    10   * You may obtain a copy of the License at
    11   *
    12   *     http://www.apache.org/licenses/LICENSE-2.0
    13   *
    14   * Unless required by applicable law or agreed to in writing, software
    15   * distributed under the License is distributed on an "AS IS" BASIS,
    16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    17   * See the License for the specific language governing permissions and
    18   * limitations under the License.
    19   */
    20  
    21  package simdjson
    22  
    23  import (
    24  	"bytes"
    25  	"errors"
    26  	"sync"
    27  )
    28  
    29  func (pj *internalParsedJson) initialize(size int) {
    30  	// Estimate the tape size to be about 15% of the length of the JSON message
    31  	avgTapeSize := size * 15 / 100
    32  	if cap(pj.Tape) < avgTapeSize {
    33  		pj.Tape = make([]uint64, 0, avgTapeSize)
    34  	}
    35  	pj.Tape = pj.Tape[:0]
    36  
    37  	stringsSize := size / 10
    38  	if stringsSize < 128 {
    39  		stringsSize = 128 // always allocate at least 128 for the string buffer
    40  	}
    41  	if cap(pj.Strings) < stringsSize {
    42  		pj.Strings = make([]byte, 0, stringsSize)
    43  	}
    44  	pj.Strings = pj.Strings[:0]
    45  	if cap(pj.containingScopeOffset) < maxdepth {
    46  		pj.containingScopeOffset = make([]uint64, 0, maxdepth)
    47  	}
    48  	pj.containingScopeOffset = pj.containingScopeOffset[:0]
    49  }
    50  
    51  func (pj *internalParsedJson) parseMessage(msg []byte) error {
    52  	return pj.parseMessageInternal(msg, false)
    53  }
    54  
    55  func (pj *internalParsedJson) parseMessageNdjson(msg []byte) error {
    56  	return pj.parseMessageInternal(msg, true)
    57  }
    58  
    59  func (pj *internalParsedJson) parseMessageInternal(msg []byte, ndjson bool) (err error) {
    60  
    61  	// Cache message so we can point directly to strings
    62  	// TODO: Find out why TestVerifyTape/instruments fails without bytes.TrimSpace
    63  	pj.Message = bytes.TrimSpace(msg)
    64  	pj.initialize(len(pj.Message))
    65  
    66  	if ndjson {
    67  		pj.ndjson = 1
    68  	} else {
    69  		pj.ndjson = 0
    70  	}
    71  
    72  	var wg sync.WaitGroup
    73  	wg.Add(2)
    74  
    75  	// Make the capacity of the channel smaller than the number of slots.
    76  	// This way the sender will automatically block until the consumer
    77  	// has finished the slot it is working on.
    78  	pj.indexChans = make(chan indexChan, indexSlots-2)
    79  	pj.buffersOffset = ^uint64(0)
    80  
    81  	var errStage1 error
    82  	go func() {
    83  		if !findStructuralIndices(pj.Message, pj) {
    84  			errStage1 = errors.New("Failed to find all structural indices for stage 1")
    85  		}
    86  		wg.Done()
    87  	}()
    88  	go func() {
    89  		var parsed bool
    90  		parsed, err = unifiedMachine(pj.Message, pj)
    91  		if err != nil {
    92  			// drain the channel until empty
    93  			for range pj.indexChans {
    94  			}
    95  			wg.Done()
    96  			return
    97  		}
    98  		if !parsed {
    99  			err = errors.New("Bad parsing while executing stage 2")
   100  			// drain the channel until empty
   101  			for range pj.indexChans {
   102  			}
   103  		}
   104  		wg.Done()
   105  	}()
   106  
   107  	wg.Wait()
   108  
   109  	if errStage1 != nil {
   110  		return errStage1
   111  	}
   112  	return
   113  }