github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/util/json/tokenizer/decoder.go (about)

     1  // Copyright 2022 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // This is a fork of pkg/json package.
    12  
    13  // Copyright (c) 2020, Dave Cheney <dave@cheney.net>
    14  // All rights reserved.
    15  //
    16  // Redistribution and use in source and binary forms, with or without
    17  // modification, are permitted provided that the following conditions are met:
    18  //
    19  //   - Redistributions of source code must retain the above copyright notice, this
    20  //     list of conditions and the following disclaimer.
    21  //
    22  //   - Redistributions in binary form must reproduce the above copyright notice,
    23  //     this list of conditions and the following disclaimer in the documentation
    24  //     and/or other materials provided with the distribution.
    25  //
    26  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    27  // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    28  // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    29  // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
    30  // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    31  // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    32  // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
    33  // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
    34  // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    35  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    36  
    37  package tokenizer
    38  
    39  import (
    40  	"fmt"
    41  	"io"
    42  )
    43  
    44  // A Decoder decodes JSON values from an input stream.
    45  type Decoder struct {
    46  	scanner Scanner
    47  	state   func(*Decoder) ([]byte, error)
    48  
    49  	// mustHaveValue is set when decoder processes
    50  	// array or object -- as indicated by stack state.
    51  	// In those cases, when we see a comma, there *must*
    52  	// be either an array value or a string object key following
    53  	// it; and if array/object terminates without seeing
    54  	// this value, return an error.
    55  	mustHaveValue bool
    56  	stack
    57  }
    58  
    59  // MakeDecoder returns decoder for the input data
    60  func MakeDecoder(data []byte) Decoder {
    61  	return Decoder{
    62  		scanner: Scanner{data: data},
    63  		state:   (*Decoder).stateValue,
    64  	}
    65  }
    66  
    67  // Pos returns current input position.
    68  func (d *Decoder) Pos() int {
    69  	return d.scanner.offset
    70  }
    71  
    72  // More returns true if there is more non-whitespace tokens available.
    73  func (d *Decoder) More() bool {
    74  	return d.scanner.More()
    75  }
    76  
    77  // Release releases acquired resources.
    78  func (d *Decoder) Release() {
    79  	d.scanner.Release()
    80  }
    81  
    82  type stack []bool
    83  
    84  func (s *stack) push(v bool) {
    85  	*s = append(*s, v)
    86  }
    87  
    88  func (s *stack) pop() bool {
    89  	*s = (*s)[:len(*s)-1]
    90  	if len(*s) == 0 {
    91  		return false
    92  	}
    93  	return (*s)[len(*s)-1]
    94  }
    95  
    96  func (s *stack) len() int { return len(*s) }
    97  
    98  // NextToken returns a []byte referencing the next logical token in the stream.
    99  // The []byte is valid until Token is called again.
   100  // At the end of the input stream, Token returns nil, io.EOF.
   101  //
   102  // Token guarantees that the delimiters [ ] { } it returns are properly nested
   103  // and matched: if Token encounters an unexpected delimiter in the input, it
   104  // will return an error.
   105  //
   106  // A valid token begins with one of the following:
   107  //
   108  //	{ Object start
   109  //	[ Array start
   110  //	} Object end
   111  //	] Array End
   112  //	t JSON true
   113  //	f JSON false
   114  //	n JSON null
   115  //	" A string, possibly containing backslash escaped entites.
   116  //	-, 0-9 A number
   117  //
   118  // Commas and colons are elided.
   119  func (d *Decoder) NextToken() ([]byte, error) {
   120  	return d.state(d)
   121  }
   122  
   123  func (d *Decoder) stateObjectString() ([]byte, error) {
   124  	tok := d.scanner.Next()
   125  	if len(tok) < 1 {
   126  		return nil, io.ErrUnexpectedEOF
   127  	}
   128  	switch tok[0] {
   129  	case '}':
   130  		if d.mustHaveValue {
   131  			d.scanner.offset -= len(tok) + 1 // Rewind to point to comma.
   132  			return nil, fmt.Errorf("stateObjectString: missing string key")
   133  		}
   134  
   135  		inObj := d.pop()
   136  		switch {
   137  		case d.len() == 0:
   138  			d.state = (*Decoder).stateEnd
   139  		case inObj:
   140  			d.state = (*Decoder).stateObjectComma
   141  		case !inObj:
   142  			d.state = (*Decoder).stateArrayComma
   143  		}
   144  		return tok, nil
   145  	case '"':
   146  		d.state = (*Decoder).stateObjectColon
   147  		return tok, nil
   148  	default:
   149  		return nil, fmt.Errorf("stateObjectString: missing string key")
   150  	}
   151  }
   152  
   153  func (d *Decoder) stateObjectColon() ([]byte, error) {
   154  	tok := d.scanner.Next()
   155  	if len(tok) < 1 {
   156  		return nil, io.ErrUnexpectedEOF
   157  	}
   158  	switch tok[0] {
   159  	case Colon:
   160  		d.state = (*Decoder).stateObjectValue
   161  		return d.NextToken()
   162  	default:
   163  		return tok, fmt.Errorf("stateObjectColon: expecting colon")
   164  	}
   165  }
   166  
   167  func (d *Decoder) stateObjectValue() ([]byte, error) {
   168  	tok := d.scanner.Next()
   169  	if len(tok) < 1 {
   170  		return nil, io.ErrUnexpectedEOF
   171  	}
   172  	switch tok[0] {
   173  	case '{':
   174  		d.state = (*Decoder).stateObjectString
   175  		d.push(true)
   176  		return tok, nil
   177  	case '[':
   178  		d.state = (*Decoder).stateArrayValue
   179  		d.push(false)
   180  		return tok, nil
   181  	default:
   182  		d.state = (*Decoder).stateObjectComma
   183  		return tok, nil
   184  	}
   185  }
   186  
   187  func (d *Decoder) stateObjectComma() (_ []byte, err error) {
   188  	tok := d.scanner.Next()
   189  	if len(tok) < 1 {
   190  		return nil, io.ErrUnexpectedEOF
   191  	}
   192  	switch tok[0] {
   193  	case '}':
   194  		inObj := d.pop()
   195  		switch {
   196  		case d.len() == 0:
   197  			d.state = (*Decoder).stateEnd
   198  		case inObj:
   199  			d.state = (*Decoder).stateObjectComma
   200  		case !inObj:
   201  			d.state = (*Decoder).stateArrayComma
   202  		}
   203  		return tok, nil
   204  	case Comma:
   205  		d.mustHaveValue = true
   206  		tok, err = d.stateObjectString()
   207  		d.mustHaveValue = false
   208  		return tok, err
   209  	default:
   210  		return tok, fmt.Errorf("stateObjectComma: expecting comma")
   211  	}
   212  }
   213  
   214  func (d *Decoder) stateArrayValue() ([]byte, error) {
   215  	tok := d.scanner.Next()
   216  	if len(tok) < 1 {
   217  		return nil, io.ErrUnexpectedEOF
   218  	}
   219  	switch tok[0] {
   220  	case '{':
   221  		d.state = (*Decoder).stateObjectString
   222  		d.push(true)
   223  		return tok, nil
   224  	case '[':
   225  		d.state = (*Decoder).stateArrayValue
   226  		d.push(false)
   227  		return tok, nil
   228  	case ']':
   229  		if d.mustHaveValue {
   230  			d.scanner.offset -= len(tok) + 1 // Rewind to point to comma.
   231  			return nil, fmt.Errorf("stateArrayValue: unexpected comma")
   232  		}
   233  		inObj := d.pop()
   234  		switch {
   235  		case d.len() == 0:
   236  			d.state = (*Decoder).stateEnd
   237  		case inObj:
   238  			d.state = (*Decoder).stateObjectComma
   239  		case !inObj:
   240  			d.state = (*Decoder).stateArrayComma
   241  		}
   242  		return tok, nil
   243  	case Comma:
   244  		return nil, fmt.Errorf("stateArrayValue: unexpected comma")
   245  	default:
   246  		d.state = (*Decoder).stateArrayComma
   247  		return tok, nil
   248  	}
   249  }
   250  
   251  func (d *Decoder) stateArrayComma() (_ []byte, err error) {
   252  	tok := d.scanner.Next()
   253  	if len(tok) < 1 {
   254  		return nil, io.ErrUnexpectedEOF
   255  	}
   256  	switch tok[0] {
   257  	case ']':
   258  		inObj := d.pop()
   259  		switch {
   260  		case d.len() == 0:
   261  			d.state = (*Decoder).stateEnd
   262  		case inObj:
   263  			d.state = (*Decoder).stateObjectComma
   264  		case !inObj:
   265  			d.state = (*Decoder).stateArrayComma
   266  		}
   267  		return tok, nil
   268  	case Comma:
   269  		d.mustHaveValue = true
   270  		tok, err = d.stateArrayValue()
   271  		d.mustHaveValue = false
   272  		return tok, err
   273  	default:
   274  		return nil, fmt.Errorf("stateArrayComma: expected comma, %v", d.stack)
   275  	}
   276  }
   277  
   278  func (d *Decoder) stateValue() ([]byte, error) {
   279  	tok := d.scanner.Next()
   280  	if len(tok) < 1 {
   281  		return nil, io.ErrUnexpectedEOF
   282  	}
   283  	switch tok[0] {
   284  	case '{':
   285  		d.state = (*Decoder).stateObjectString
   286  		d.push(true)
   287  		return tok, nil
   288  	case '[':
   289  		d.state = (*Decoder).stateArrayValue
   290  		d.push(false)
   291  		return tok, nil
   292  	case ',':
   293  		return nil, fmt.Errorf("stateValue: unexpected comma")
   294  	default:
   295  		d.state = (*Decoder).stateEnd
   296  		return tok, nil
   297  	}
   298  }
   299  
   300  func (d *Decoder) stateEnd() ([]byte, error) { return nil, io.EOF }