github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/util/json/tokenizer/decoder.go (about) 1 // Copyright 2022 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 // This is a fork of pkg/json package. 12 13 // Copyright (c) 2020, Dave Cheney <dave@cheney.net> 14 // All rights reserved. 15 // 16 // Redistribution and use in source and binary forms, with or without 17 // modification, are permitted provided that the following conditions are met: 18 // 19 // - Redistributions of source code must retain the above copyright notice, this 20 // list of conditions and the following disclaimer. 21 // 22 // - Redistributions in binary form must reproduce the above copyright notice, 23 // this list of conditions and the following disclaimer in the documentation 24 // and/or other materials provided with the distribution. 25 // 26 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 27 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 29 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 30 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 32 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 33 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 34 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 37 package tokenizer 38 39 import ( 40 "fmt" 41 "io" 42 ) 43 44 // A Decoder decodes JSON values from an input stream. 45 type Decoder struct { 46 scanner Scanner 47 state func(*Decoder) ([]byte, error) 48 49 // mustHaveValue is set when decoder processes 50 // array or object -- as indicated by stack state. 51 // In those cases, when we see a comma, there *must* 52 // be either an array value or a string object key following 53 // it; and if array/object terminates without seeing 54 // this value, return an error. 55 mustHaveValue bool 56 stack 57 } 58 59 // MakeDecoder returns decoder for the input data 60 func MakeDecoder(data []byte) Decoder { 61 return Decoder{ 62 scanner: Scanner{data: data}, 63 state: (*Decoder).stateValue, 64 } 65 } 66 67 // Pos returns current input position. 68 func (d *Decoder) Pos() int { 69 return d.scanner.offset 70 } 71 72 // More returns true if there is more non-whitespace tokens available. 73 func (d *Decoder) More() bool { 74 return d.scanner.More() 75 } 76 77 // Release releases acquired resources. 78 func (d *Decoder) Release() { 79 d.scanner.Release() 80 } 81 82 type stack []bool 83 84 func (s *stack) push(v bool) { 85 *s = append(*s, v) 86 } 87 88 func (s *stack) pop() bool { 89 *s = (*s)[:len(*s)-1] 90 if len(*s) == 0 { 91 return false 92 } 93 return (*s)[len(*s)-1] 94 } 95 96 func (s *stack) len() int { return len(*s) } 97 98 // NextToken returns a []byte referencing the next logical token in the stream. 99 // The []byte is valid until Token is called again. 100 // At the end of the input stream, Token returns nil, io.EOF. 101 // 102 // Token guarantees that the delimiters [ ] { } it returns are properly nested 103 // and matched: if Token encounters an unexpected delimiter in the input, it 104 // will return an error. 105 // 106 // A valid token begins with one of the following: 107 // 108 // { Object start 109 // [ Array start 110 // } Object end 111 // ] Array End 112 // t JSON true 113 // f JSON false 114 // n JSON null 115 // " A string, possibly containing backslash escaped entites. 116 // -, 0-9 A number 117 // 118 // Commas and colons are elided. 119 func (d *Decoder) NextToken() ([]byte, error) { 120 return d.state(d) 121 } 122 123 func (d *Decoder) stateObjectString() ([]byte, error) { 124 tok := d.scanner.Next() 125 if len(tok) < 1 { 126 return nil, io.ErrUnexpectedEOF 127 } 128 switch tok[0] { 129 case '}': 130 if d.mustHaveValue { 131 d.scanner.offset -= len(tok) + 1 // Rewind to point to comma. 132 return nil, fmt.Errorf("stateObjectString: missing string key") 133 } 134 135 inObj := d.pop() 136 switch { 137 case d.len() == 0: 138 d.state = (*Decoder).stateEnd 139 case inObj: 140 d.state = (*Decoder).stateObjectComma 141 case !inObj: 142 d.state = (*Decoder).stateArrayComma 143 } 144 return tok, nil 145 case '"': 146 d.state = (*Decoder).stateObjectColon 147 return tok, nil 148 default: 149 return nil, fmt.Errorf("stateObjectString: missing string key") 150 } 151 } 152 153 func (d *Decoder) stateObjectColon() ([]byte, error) { 154 tok := d.scanner.Next() 155 if len(tok) < 1 { 156 return nil, io.ErrUnexpectedEOF 157 } 158 switch tok[0] { 159 case Colon: 160 d.state = (*Decoder).stateObjectValue 161 return d.NextToken() 162 default: 163 return tok, fmt.Errorf("stateObjectColon: expecting colon") 164 } 165 } 166 167 func (d *Decoder) stateObjectValue() ([]byte, error) { 168 tok := d.scanner.Next() 169 if len(tok) < 1 { 170 return nil, io.ErrUnexpectedEOF 171 } 172 switch tok[0] { 173 case '{': 174 d.state = (*Decoder).stateObjectString 175 d.push(true) 176 return tok, nil 177 case '[': 178 d.state = (*Decoder).stateArrayValue 179 d.push(false) 180 return tok, nil 181 default: 182 d.state = (*Decoder).stateObjectComma 183 return tok, nil 184 } 185 } 186 187 func (d *Decoder) stateObjectComma() (_ []byte, err error) { 188 tok := d.scanner.Next() 189 if len(tok) < 1 { 190 return nil, io.ErrUnexpectedEOF 191 } 192 switch tok[0] { 193 case '}': 194 inObj := d.pop() 195 switch { 196 case d.len() == 0: 197 d.state = (*Decoder).stateEnd 198 case inObj: 199 d.state = (*Decoder).stateObjectComma 200 case !inObj: 201 d.state = (*Decoder).stateArrayComma 202 } 203 return tok, nil 204 case Comma: 205 d.mustHaveValue = true 206 tok, err = d.stateObjectString() 207 d.mustHaveValue = false 208 return tok, err 209 default: 210 return tok, fmt.Errorf("stateObjectComma: expecting comma") 211 } 212 } 213 214 func (d *Decoder) stateArrayValue() ([]byte, error) { 215 tok := d.scanner.Next() 216 if len(tok) < 1 { 217 return nil, io.ErrUnexpectedEOF 218 } 219 switch tok[0] { 220 case '{': 221 d.state = (*Decoder).stateObjectString 222 d.push(true) 223 return tok, nil 224 case '[': 225 d.state = (*Decoder).stateArrayValue 226 d.push(false) 227 return tok, nil 228 case ']': 229 if d.mustHaveValue { 230 d.scanner.offset -= len(tok) + 1 // Rewind to point to comma. 231 return nil, fmt.Errorf("stateArrayValue: unexpected comma") 232 } 233 inObj := d.pop() 234 switch { 235 case d.len() == 0: 236 d.state = (*Decoder).stateEnd 237 case inObj: 238 d.state = (*Decoder).stateObjectComma 239 case !inObj: 240 d.state = (*Decoder).stateArrayComma 241 } 242 return tok, nil 243 case Comma: 244 return nil, fmt.Errorf("stateArrayValue: unexpected comma") 245 default: 246 d.state = (*Decoder).stateArrayComma 247 return tok, nil 248 } 249 } 250 251 func (d *Decoder) stateArrayComma() (_ []byte, err error) { 252 tok := d.scanner.Next() 253 if len(tok) < 1 { 254 return nil, io.ErrUnexpectedEOF 255 } 256 switch tok[0] { 257 case ']': 258 inObj := d.pop() 259 switch { 260 case d.len() == 0: 261 d.state = (*Decoder).stateEnd 262 case inObj: 263 d.state = (*Decoder).stateObjectComma 264 case !inObj: 265 d.state = (*Decoder).stateArrayComma 266 } 267 return tok, nil 268 case Comma: 269 d.mustHaveValue = true 270 tok, err = d.stateArrayValue() 271 d.mustHaveValue = false 272 return tok, err 273 default: 274 return nil, fmt.Errorf("stateArrayComma: expected comma, %v", d.stack) 275 } 276 } 277 278 func (d *Decoder) stateValue() ([]byte, error) { 279 tok := d.scanner.Next() 280 if len(tok) < 1 { 281 return nil, io.ErrUnexpectedEOF 282 } 283 switch tok[0] { 284 case '{': 285 d.state = (*Decoder).stateObjectString 286 d.push(true) 287 return tok, nil 288 case '[': 289 d.state = (*Decoder).stateArrayValue 290 d.push(false) 291 return tok, nil 292 case ',': 293 return nil, fmt.Errorf("stateValue: unexpected comma") 294 default: 295 d.state = (*Decoder).stateEnd 296 return tok, nil 297 } 298 } 299 300 func (d *Decoder) stateEnd() ([]byte, error) { return nil, io.EOF }