github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/util/json/parser.go (about)

     1  // Copyright 2022 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package json
    12  
    13  import (
    14  	"bytes"
    15  	"encoding/json"
    16  	"io"
    17  	"reflect"
    18  	"strings"
    19  	"unsafe"
    20  
    21  	"github.com/cockroachdb/cockroachdb-parser/pkg/util/json/tokenizer"
    22  	"github.com/cockroachdb/errors"
    23  )
    24  
    25  // parseUsingFastParser parses string as JSON using fast json parser.
    26  func parseUsingFastParser(s string, cfg parseConfig) (JSON, error) {
    27  	input, err := unsafeGetBytes(s)
    28  	if err != nil {
    29  		return nil, err
    30  	}
    31  
    32  	p := fastJSONParser{
    33  		parseConfig: cfg,
    34  		decoder:     tokenizer.MakeDecoder(input),
    35  		state:       (*fastJSONParser).parseTopValue,
    36  	}
    37  	defer p.decoder.Release()
    38  
    39  	j, err := p.parse()
    40  	if err != nil {
    41  		if errors.Is(err, io.ErrUnexpectedEOF) && p.decoder.More() {
    42  			// JSON scanner returns nil token if it encounters an invalid input
    43  			// character.  In such cases, decoder returns io.ErrUnexpectedEOF error.
    44  			// However, we know it's not an EOF because decoder has more data.  So,
    45  			// produce a bit nicer error message.
    46  			return nil, jsonDecodeError(decodeErrorContext(errInvalidInputToken, s, p.decoder.Pos()))
    47  		}
    48  		return nil, jsonDecodeError(decodeErrorContext(err, s, p.decoder.Pos()))
    49  	}
    50  
    51  	if j == nil {
    52  		return nil, errors.AssertionFailedf("expected parsed JSON value, got nil")
    53  	}
    54  
    55  	if p.decoder.More() {
    56  		return nil, jsonDecodeError(decodeErrorContext(errTrailingCharacters, s, p.decoder.Pos()+1))
    57  	}
    58  
    59  	return j, nil
    60  }
    61  
    62  // fastJSONParser builds JSON given input string. This implementation uses low level
    63  // API provided by fork of github.com/pkg/json package to implement direct
    64  // string to tree.JSON conversion, while trying to be as close to the
    65  // encoder/json implementation as possible.
    66  type fastJSONParser struct {
    67  	parseConfig
    68  	decoder tokenizer.Decoder
    69  
    70  	// state is the method expression for the next
    71  	// state in the state machine.
    72  	state func(*fastJSONParser, []byte) (JSON, error)
    73  
    74  	// State machine stack information.
    75  	// kind is the types of objects stored in stack
    76  	// len(kind) == len(arr) + len(obj)
    77  	kind []kind
    78  	arr  []ArrayBuilder  // array builder stack
    79  	obj  []ObjectBuilder // object builder stack
    80  }
    81  
    82  // parse runs the parse loop -- reading next token from the
    83  // stream, and decoding it based on the state machine.
    84  func (p *fastJSONParser) parse() (JSON, error) {
    85  	for {
    86  		tok, err := p.decoder.NextToken()
    87  		if err != nil {
    88  			return nil, err
    89  		}
    90  
    91  		if len(tok) < 1 {
    92  			return nil, io.ErrUnexpectedEOF
    93  		}
    94  
    95  		j, err := p.state(p, tok)
    96  		if err != nil {
    97  			return nil, err
    98  		}
    99  		if j != nil && len(p.kind) == 0 {
   100  			return j, nil
   101  		}
   102  	}
   103  }
   104  
   105  // parseTopValue processes top level JSON value.
   106  func (p *fastJSONParser) parseTopValue(tok []byte) (JSON, error) {
   107  	switch tok[0] {
   108  	case tokenizer.ArrayStart:
   109  		p.pushArray()
   110  		return nil, nil
   111  	case tokenizer.ObjectStart:
   112  		p.pushObject()
   113  		return nil, nil
   114  	case tokenizer.Null:
   115  		return NullJSONValue, nil
   116  	case tokenizer.String:
   117  		return jsonString(tok[1 : len(tok)-1]), nil
   118  	case tokenizer.True:
   119  		return TrueJSONValue, nil
   120  	case tokenizer.False:
   121  		return FalseJSONValue, nil
   122  	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   123  		return FromNumber(json.Number(tok))
   124  	default:
   125  		return nil, errors.Newf("unexpected token %q", tok)
   126  	}
   127  }
   128  
   129  // parseArrayValue processes JSON value inside array.
   130  func (p *fastJSONParser) parseArrayValue(tok []byte) (JSON, error) {
   131  	switch tok[0] {
   132  	case tokenizer.ArrayEnd:
   133  		return p.buildArray()
   134  	case tokenizer.ArrayStart:
   135  		p.pushArray()
   136  	case tokenizer.ObjectStart:
   137  		p.pushObject()
   138  	case tokenizer.Null:
   139  		p.addArrayValue(NullJSONValue)
   140  	case tokenizer.String:
   141  		p.addArrayValue(jsonString(tok[1 : len(tok)-1]))
   142  	case tokenizer.True:
   143  		p.addArrayValue(TrueJSONValue)
   144  	case tokenizer.False:
   145  		p.addArrayValue(FalseJSONValue)
   146  	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   147  		n, err := FromNumber(json.Number(tok))
   148  		if err != nil {
   149  			return n, err
   150  		}
   151  		p.addArrayValue(n)
   152  	default:
   153  		return nil, errors.Newf("unexpected array token %q", tok)
   154  	}
   155  	return nil, nil
   156  }
   157  
   158  // parseObjectKey processes object key.
   159  func (p *fastJSONParser) parseObjectKey(tok []byte) (JSON, error) {
   160  	switch tok[0] {
   161  	case tokenizer.ObjectEnd:
   162  		return p.buildObject()
   163  	case tokenizer.String:
   164  		p.addObjectKey(string(tok[1 : len(tok)-1]))
   165  		return nil, nil
   166  	default:
   167  		return nil, errors.Newf("expected to read object key (string), found %q", tok)
   168  	}
   169  }
   170  
   171  // parseObjectValue processes object value.
   172  func (p *fastJSONParser) parseObjectValue(tok []byte) (JSON, error) {
   173  	switch tok[0] {
   174  	case tokenizer.ArrayStart:
   175  		p.pushArray()
   176  	case tokenizer.ObjectStart:
   177  		p.pushObject()
   178  	case tokenizer.Null:
   179  		p.setObjectValue(NullJSONValue)
   180  	case tokenizer.String:
   181  		p.setObjectValue(jsonString(tok[1 : len(tok)-1]))
   182  	case tokenizer.True:
   183  		p.setObjectValue(TrueJSONValue)
   184  	case tokenizer.False:
   185  		p.setObjectValue(FalseJSONValue)
   186  	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   187  		n, err := FromNumber(json.Number(tok))
   188  		if err != nil {
   189  			return n, err
   190  		}
   191  		p.setObjectValue(n)
   192  	default:
   193  		return nil, errors.Newf("unexpected object token %q", tok)
   194  	}
   195  	return nil, nil
   196  }
   197  
   198  type kind bool
   199  
   200  const (
   201  	kindArray  kind = false
   202  	kindObject kind = true
   203  )
   204  
   205  var errUnexpectedState = errors.New("unexpected state machine state")
   206  
   207  // pushArray adds array builder and transitions state to read array values.
   208  func (p *fastJSONParser) pushArray() {
   209  	p.arr = append(p.arr, ArrayBuilder{})
   210  	p.kind = append(p.kind, kindArray)
   211  	p.state = (*fastJSONParser).parseArrayValue
   212  }
   213  
   214  // addArrayValue adds value to top array.
   215  func (p *fastJSONParser) addArrayValue(j JSON) {
   216  	p.arr[len(p.arr)-1].Add(j)
   217  }
   218  
   219  // buildArray builds top array value, and adjusts stack appropriately.
   220  func (p *fastJSONParser) buildArray() (JSON, error) {
   221  	if len(p.kind) == 0 || p.kind[len(p.kind)-1] != kindArray {
   222  		return nil, errUnexpectedState
   223  	}
   224  	j := p.arr[len(p.arr)-1].Build()
   225  	p.pop()
   226  	return p.stackReturn(j)
   227  }
   228  
   229  // pushObject adds object builder and transitions state to read object.
   230  func (p *fastJSONParser) pushObject() {
   231  	p.obj = append(p.obj, ObjectBuilder{unordered: p.unordered})
   232  	p.kind = append(p.kind, kindObject)
   233  	p.state = (*fastJSONParser).parseObjectKey
   234  }
   235  
   236  // addObjectKey adds key to object builder and transitions state to read object
   237  // value.
   238  func (p *fastJSONParser) addObjectKey(k string) {
   239  	p.obj[len(p.obj)-1].Add(k, nil)
   240  	p.state = (*fastJSONParser).parseObjectValue
   241  }
   242  
   243  // setObjectValue sets the value for the previously added object key,
   244  // and transitions state to read the next object key.
   245  func (p *fastJSONParser) setObjectValue(v JSON) {
   246  	pairs := p.obj[len(p.obj)-1].pairs
   247  	pairs[len(pairs)-1].v = v
   248  	p.state = (*fastJSONParser).parseObjectKey
   249  }
   250  
   251  // buildObject builds top JSON object.
   252  func (p *fastJSONParser) buildObject() (JSON, error) {
   253  	if len(p.kind) == 0 || p.kind[len(p.kind)-1] != kindObject {
   254  		return nil, errUnexpectedState
   255  	}
   256  	j := p.obj[len(p.obj)-1].Build()
   257  	p.pop()
   258  	return p.stackReturn(j)
   259  }
   260  
   261  // pop stack.
   262  func (p *fastJSONParser) pop() {
   263  	top := len(p.kind) - 1
   264  	if p.kind[top] == kindArray {
   265  		p.arr = p.arr[:len(p.arr)-1]
   266  	} else {
   267  		p.obj = p.obj[:len(p.obj)-1]
   268  	}
   269  	p.kind = p.kind[:top]
   270  }
   271  
   272  // stackReturn returns json object to the top of the stack
   273  // and transitions state machine to the next state.
   274  func (p *fastJSONParser) stackReturn(j JSON) (JSON, error) {
   275  	// If stack is now empty, we're done -- return JSON.
   276  	if len(p.kind) == 0 {
   277  		return j, nil
   278  	}
   279  
   280  	// Add json to array or object; arrange for next state transition.
   281  	if p.kind[len(p.kind)-1] == kindArray {
   282  		p.addArrayValue(j)
   283  		p.state = (*fastJSONParser).parseArrayValue
   284  	} else {
   285  		p.setObjectValue(j)
   286  		p.state = (*fastJSONParser).parseObjectKey
   287  	}
   288  	return nil, nil
   289  }
   290  
   291  var errInvalidInputToken = errors.New("invalid JSON token")
   292  
   293  // decodeErrorContext returns input context for an error encountered during decoding.
   294  // There is quite a bit of code here, but debugging faulty JSON is hard, so
   295  // take extra care to produce nice error message, with good context information.
   296  func decodeErrorContext(err error, s string, pos int) error {
   297  	if len(s) == 0 {
   298  		return errors.Wrap(err, "while decoding empty string")
   299  	}
   300  
   301  	const contextSize = 16
   302  	ctxStart := pos - contextSize
   303  	if ctxStart < 0 {
   304  		ctxStart = 0
   305  	}
   306  	ctxEnd := pos + contextSize
   307  	if ctxEnd > len(s) {
   308  		ctxEnd = len(s)
   309  	}
   310  
   311  	var leftPad, rightPad string
   312  	if pos > ctxStart {
   313  		leftPad = strings.Repeat(".", pos-ctxStart)
   314  	}
   315  	if ctxEnd > pos {
   316  		rightPad = strings.Repeat(".", ctxEnd-pos-1)
   317  	}
   318  
   319  	return errors.Wrapf(err,
   320  		"while decoding %d bytes at offset %d:\n"+
   321  			"...|%s|...\n"+
   322  			"...|%s^%s|...",
   323  		len(s), pos,
   324  		s[ctxStart:ctxEnd],
   325  		leftPad, rightPad,
   326  	)
   327  }
   328  
   329  // unsafeGetBytes returns []byte in the underlying string,
   330  // without incurring copy.
   331  // This unsafe mechanism is safe to use here because, ultimately, every
   332  // JSON object produced from those bytes will copy those bytes anyway
   333  // (i.e. jsonString([]byte)).
   334  // See https://groups.google.com/g/golang-nuts/c/Zsfk-VMd_fU/m/O1ru4fO-BgAJ
   335  func unsafeGetBytes(s string) ([]byte, error) {
   336  	const maxStrLen = 1 << 30 // Really, can't see us supporting input JSONs that big.
   337  	if len(s) > maxStrLen {
   338  		return nil, bytes.ErrTooLarge
   339  	}
   340  	if len(s) == 0 {
   341  		return nil, nil
   342  	}
   343  	//lint:ignore SA1019 StringHeader is deprecated, but no clear replacement
   344  	p := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&s)).Data)
   345  	return (*[maxStrLen]byte)(p)[:len(s):len(s)], nil
   346  }