github.com/mdaxf/iac@v0.0.0-20240519030858-58a061660378/vendor_skip/go.mongodb.org/mongo-driver/bson/bsonrw/json_scanner.go (about) 1 // Copyright (C) MongoDB, Inc. 2017-present. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); you may 4 // not use this file except in compliance with the License. You may obtain 5 // a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 6 7 package bsonrw 8 9 import ( 10 "bytes" 11 "errors" 12 "fmt" 13 "io" 14 "math" 15 "strconv" 16 "unicode" 17 "unicode/utf16" 18 ) 19 20 type jsonTokenType byte 21 22 const ( 23 jttBeginObject jsonTokenType = iota 24 jttEndObject 25 jttBeginArray 26 jttEndArray 27 jttColon 28 jttComma 29 jttInt32 30 jttInt64 31 jttDouble 32 jttString 33 jttBool 34 jttNull 35 jttEOF 36 ) 37 38 type jsonToken struct { 39 t jsonTokenType 40 v interface{} 41 p int 42 } 43 44 type jsonScanner struct { 45 r io.Reader 46 buf []byte 47 pos int 48 lastReadErr error 49 } 50 51 // nextToken returns the next JSON token if one exists. A token is a character 52 // of the JSON grammar, a number, a string, or a literal. 53 func (js *jsonScanner) nextToken() (*jsonToken, error) { 54 c, err := js.readNextByte() 55 56 // keep reading until a non-space is encountered (break on read error or EOF) 57 for isWhiteSpace(c) && err == nil { 58 c, err = js.readNextByte() 59 } 60 61 if err == io.EOF { 62 return &jsonToken{t: jttEOF}, nil 63 } else if err != nil { 64 return nil, err 65 } 66 67 // switch on the character 68 switch c { 69 case '{': 70 return &jsonToken{t: jttBeginObject, v: byte('{'), p: js.pos - 1}, nil 71 case '}': 72 return &jsonToken{t: jttEndObject, v: byte('}'), p: js.pos - 1}, nil 73 case '[': 74 return &jsonToken{t: jttBeginArray, v: byte('['), p: js.pos - 1}, nil 75 case ']': 76 return &jsonToken{t: jttEndArray, v: byte(']'), p: js.pos - 1}, nil 77 case ':': 78 return &jsonToken{t: jttColon, v: byte(':'), p: js.pos - 1}, nil 79 case ',': 80 return &jsonToken{t: jttComma, v: byte(','), p: js.pos - 1}, nil 81 case '"': // RFC-8259 only allows for double quotes (") not single (') 82 return js.scanString() 83 default: 84 // check if it's a number 85 if c == '-' || isDigit(c) { 86 return js.scanNumber(c) 87 } else if c == 't' || c == 'f' || c == 'n' { 88 // maybe a literal 89 return js.scanLiteral(c) 90 } else { 91 return nil, fmt.Errorf("invalid JSON input. Position: %d. Character: %c", js.pos-1, c) 92 } 93 } 94 } 95 96 // readNextByte attempts to read the next byte from the buffer. If the buffer 97 // has been exhausted, this function calls readIntoBuf, thus refilling the 98 // buffer and resetting the read position to 0 99 func (js *jsonScanner) readNextByte() (byte, error) { 100 if js.pos >= len(js.buf) { 101 err := js.readIntoBuf() 102 103 if err != nil { 104 return 0, err 105 } 106 } 107 108 b := js.buf[js.pos] 109 js.pos++ 110 111 return b, nil 112 } 113 114 // readNNextBytes reads n bytes into dst, starting at offset 115 func (js *jsonScanner) readNNextBytes(dst []byte, n, offset int) error { 116 var err error 117 118 for i := 0; i < n; i++ { 119 dst[i+offset], err = js.readNextByte() 120 if err != nil { 121 return err 122 } 123 } 124 125 return nil 126 } 127 128 // readIntoBuf reads up to 512 bytes from the scanner's io.Reader into the buffer 129 func (js *jsonScanner) readIntoBuf() error { 130 if js.lastReadErr != nil { 131 js.buf = js.buf[:0] 132 js.pos = 0 133 return js.lastReadErr 134 } 135 136 if cap(js.buf) == 0 { 137 js.buf = make([]byte, 0, 512) 138 } 139 140 n, err := js.r.Read(js.buf[:cap(js.buf)]) 141 if err != nil { 142 js.lastReadErr = err 143 if n > 0 { 144 err = nil 145 } 146 } 147 js.buf = js.buf[:n] 148 js.pos = 0 149 150 return err 151 } 152 153 func isWhiteSpace(c byte) bool { 154 return c == ' ' || c == '\t' || c == '\r' || c == '\n' 155 } 156 157 func isDigit(c byte) bool { 158 return unicode.IsDigit(rune(c)) 159 } 160 161 func isValueTerminator(c byte) bool { 162 return c == ',' || c == '}' || c == ']' || isWhiteSpace(c) 163 } 164 165 // getu4 decodes the 4-byte hex sequence from the beginning of s, returning the hex value as a rune, 166 // or it returns -1. Note that the "\u" from the unicode escape sequence should not be present. 167 // It is copied and lightly modified from the Go JSON decode function at 168 // https://github.com/golang/go/blob/1b0a0316802b8048d69da49dc23c5a5ab08e8ae8/src/encoding/json/decode.go#L1169-L1188 169 func getu4(s []byte) rune { 170 if len(s) < 4 { 171 return -1 172 } 173 var r rune 174 for _, c := range s[:4] { 175 switch { 176 case '0' <= c && c <= '9': 177 c = c - '0' 178 case 'a' <= c && c <= 'f': 179 c = c - 'a' + 10 180 case 'A' <= c && c <= 'F': 181 c = c - 'A' + 10 182 default: 183 return -1 184 } 185 r = r*16 + rune(c) 186 } 187 return r 188 } 189 190 // scanString reads from an opening '"' to a closing '"' and handles escaped characters 191 func (js *jsonScanner) scanString() (*jsonToken, error) { 192 var b bytes.Buffer 193 var c byte 194 var err error 195 196 p := js.pos - 1 197 198 for { 199 c, err = js.readNextByte() 200 if err != nil { 201 if err == io.EOF { 202 return nil, errors.New("end of input in JSON string") 203 } 204 return nil, err 205 } 206 207 evalNextChar: 208 switch c { 209 case '\\': 210 c, err = js.readNextByte() 211 if err != nil { 212 if err == io.EOF { 213 return nil, errors.New("end of input in JSON string") 214 } 215 return nil, err 216 } 217 218 evalNextEscapeChar: 219 switch c { 220 case '"', '\\', '/': 221 b.WriteByte(c) 222 case 'b': 223 b.WriteByte('\b') 224 case 'f': 225 b.WriteByte('\f') 226 case 'n': 227 b.WriteByte('\n') 228 case 'r': 229 b.WriteByte('\r') 230 case 't': 231 b.WriteByte('\t') 232 case 'u': 233 us := make([]byte, 4) 234 err = js.readNNextBytes(us, 4, 0) 235 if err != nil { 236 return nil, fmt.Errorf("invalid unicode sequence in JSON string: %s", us) 237 } 238 239 rn := getu4(us) 240 241 // If the rune we just decoded is the high or low value of a possible surrogate pair, 242 // try to decode the next sequence as the low value of a surrogate pair. We're 243 // expecting the next sequence to be another Unicode escape sequence (e.g. "\uDD1E"), 244 // but need to handle cases where the input is not a valid surrogate pair. 245 // For more context on unicode surrogate pairs, see: 246 // https://www.christianfscott.com/rust-chars-vs-go-runes/ 247 // https://www.unicode.org/glossary/#high_surrogate_code_point 248 if utf16.IsSurrogate(rn) { 249 c, err = js.readNextByte() 250 if err != nil { 251 if err == io.EOF { 252 return nil, errors.New("end of input in JSON string") 253 } 254 return nil, err 255 } 256 257 // If the next value isn't the beginning of a backslash escape sequence, write 258 // the Unicode replacement character for the surrogate value and goto the 259 // beginning of the next char eval block. 260 if c != '\\' { 261 b.WriteRune(unicode.ReplacementChar) 262 goto evalNextChar 263 } 264 265 c, err = js.readNextByte() 266 if err != nil { 267 if err == io.EOF { 268 return nil, errors.New("end of input in JSON string") 269 } 270 return nil, err 271 } 272 273 // If the next value isn't the beginning of a unicode escape sequence, write the 274 // Unicode replacement character for the surrogate value and goto the beginning 275 // of the next escape char eval block. 276 if c != 'u' { 277 b.WriteRune(unicode.ReplacementChar) 278 goto evalNextEscapeChar 279 } 280 281 err = js.readNNextBytes(us, 4, 0) 282 if err != nil { 283 return nil, fmt.Errorf("invalid unicode sequence in JSON string: %s", us) 284 } 285 286 rn2 := getu4(us) 287 288 // Try to decode the pair of runes as a utf16 surrogate pair. If that fails, write 289 // the Unicode replacement character for the surrogate value and the 2nd decoded rune. 290 if rnPair := utf16.DecodeRune(rn, rn2); rnPair != unicode.ReplacementChar { 291 b.WriteRune(rnPair) 292 } else { 293 b.WriteRune(unicode.ReplacementChar) 294 b.WriteRune(rn2) 295 } 296 297 break 298 } 299 300 b.WriteRune(rn) 301 default: 302 return nil, fmt.Errorf("invalid escape sequence in JSON string '\\%c'", c) 303 } 304 case '"': 305 return &jsonToken{t: jttString, v: b.String(), p: p}, nil 306 default: 307 b.WriteByte(c) 308 } 309 } 310 } 311 312 // scanLiteral reads an unquoted sequence of characters and determines if it is one of 313 // three valid JSON literals (true, false, null); if so, it returns the appropriate 314 // jsonToken; otherwise, it returns an error 315 func (js *jsonScanner) scanLiteral(first byte) (*jsonToken, error) { 316 p := js.pos - 1 317 318 lit := make([]byte, 4) 319 lit[0] = first 320 321 err := js.readNNextBytes(lit, 3, 1) 322 if err != nil { 323 return nil, err 324 } 325 326 c5, err := js.readNextByte() 327 328 if bytes.Equal([]byte("true"), lit) && (isValueTerminator(c5) || err == io.EOF) { 329 js.pos = int(math.Max(0, float64(js.pos-1))) 330 return &jsonToken{t: jttBool, v: true, p: p}, nil 331 } else if bytes.Equal([]byte("null"), lit) && (isValueTerminator(c5) || err == io.EOF) { 332 js.pos = int(math.Max(0, float64(js.pos-1))) 333 return &jsonToken{t: jttNull, v: nil, p: p}, nil 334 } else if bytes.Equal([]byte("fals"), lit) { 335 if c5 == 'e' { 336 c5, err = js.readNextByte() 337 338 if isValueTerminator(c5) || err == io.EOF { 339 js.pos = int(math.Max(0, float64(js.pos-1))) 340 return &jsonToken{t: jttBool, v: false, p: p}, nil 341 } 342 } 343 } 344 345 return nil, fmt.Errorf("invalid JSON literal. Position: %d, literal: %s", p, lit) 346 } 347 348 type numberScanState byte 349 350 const ( 351 nssSawLeadingMinus numberScanState = iota 352 nssSawLeadingZero 353 nssSawIntegerDigits 354 nssSawDecimalPoint 355 nssSawFractionDigits 356 nssSawExponentLetter 357 nssSawExponentSign 358 nssSawExponentDigits 359 nssDone 360 nssInvalid 361 ) 362 363 // scanNumber reads a JSON number (according to RFC-8259) 364 func (js *jsonScanner) scanNumber(first byte) (*jsonToken, error) { 365 var b bytes.Buffer 366 var s numberScanState 367 var c byte 368 var err error 369 370 t := jttInt64 // assume it's an int64 until the type can be determined 371 start := js.pos - 1 372 373 b.WriteByte(first) 374 375 switch first { 376 case '-': 377 s = nssSawLeadingMinus 378 case '0': 379 s = nssSawLeadingZero 380 default: 381 s = nssSawIntegerDigits 382 } 383 384 for { 385 c, err = js.readNextByte() 386 387 if err != nil && err != io.EOF { 388 return nil, err 389 } 390 391 switch s { 392 case nssSawLeadingMinus: 393 switch c { 394 case '0': 395 s = nssSawLeadingZero 396 b.WriteByte(c) 397 default: 398 if isDigit(c) { 399 s = nssSawIntegerDigits 400 b.WriteByte(c) 401 } else { 402 s = nssInvalid 403 } 404 } 405 case nssSawLeadingZero: 406 switch c { 407 case '.': 408 s = nssSawDecimalPoint 409 b.WriteByte(c) 410 case 'e', 'E': 411 s = nssSawExponentLetter 412 b.WriteByte(c) 413 case '}', ']', ',': 414 s = nssDone 415 default: 416 if isWhiteSpace(c) || err == io.EOF { 417 s = nssDone 418 } else { 419 s = nssInvalid 420 } 421 } 422 case nssSawIntegerDigits: 423 switch c { 424 case '.': 425 s = nssSawDecimalPoint 426 b.WriteByte(c) 427 case 'e', 'E': 428 s = nssSawExponentLetter 429 b.WriteByte(c) 430 case '}', ']', ',': 431 s = nssDone 432 default: 433 if isWhiteSpace(c) || err == io.EOF { 434 s = nssDone 435 } else if isDigit(c) { 436 s = nssSawIntegerDigits 437 b.WriteByte(c) 438 } else { 439 s = nssInvalid 440 } 441 } 442 case nssSawDecimalPoint: 443 t = jttDouble 444 if isDigit(c) { 445 s = nssSawFractionDigits 446 b.WriteByte(c) 447 } else { 448 s = nssInvalid 449 } 450 case nssSawFractionDigits: 451 switch c { 452 case 'e', 'E': 453 s = nssSawExponentLetter 454 b.WriteByte(c) 455 case '}', ']', ',': 456 s = nssDone 457 default: 458 if isWhiteSpace(c) || err == io.EOF { 459 s = nssDone 460 } else if isDigit(c) { 461 s = nssSawFractionDigits 462 b.WriteByte(c) 463 } else { 464 s = nssInvalid 465 } 466 } 467 case nssSawExponentLetter: 468 t = jttDouble 469 switch c { 470 case '+', '-': 471 s = nssSawExponentSign 472 b.WriteByte(c) 473 default: 474 if isDigit(c) { 475 s = nssSawExponentDigits 476 b.WriteByte(c) 477 } else { 478 s = nssInvalid 479 } 480 } 481 case nssSawExponentSign: 482 if isDigit(c) { 483 s = nssSawExponentDigits 484 b.WriteByte(c) 485 } else { 486 s = nssInvalid 487 } 488 case nssSawExponentDigits: 489 switch c { 490 case '}', ']', ',': 491 s = nssDone 492 default: 493 if isWhiteSpace(c) || err == io.EOF { 494 s = nssDone 495 } else if isDigit(c) { 496 s = nssSawExponentDigits 497 b.WriteByte(c) 498 } else { 499 s = nssInvalid 500 } 501 } 502 } 503 504 switch s { 505 case nssInvalid: 506 return nil, fmt.Errorf("invalid JSON number. Position: %d", start) 507 case nssDone: 508 js.pos = int(math.Max(0, float64(js.pos-1))) 509 if t != jttDouble { 510 v, err := strconv.ParseInt(b.String(), 10, 64) 511 if err == nil { 512 if v < math.MinInt32 || v > math.MaxInt32 { 513 return &jsonToken{t: jttInt64, v: v, p: start}, nil 514 } 515 516 return &jsonToken{t: jttInt32, v: int32(v), p: start}, nil 517 } 518 } 519 520 v, err := strconv.ParseFloat(b.String(), 64) 521 if err != nil { 522 return nil, err 523 } 524 525 return &jsonToken{t: jttDouble, v: v, p: start}, nil 526 } 527 } 528 }