git.lukeshu.com/go/lowmemjson@v0.3.9-0.20230723050957-72f6d13f6fb2/decode_scan.go (about) 1 // Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> 2 // 3 // SPDX-License-Identifier: GPL-2.0-or-later 4 5 package lowmemjson 6 7 import ( 8 "io" 9 "unicode/utf8" 10 11 "git.lukeshu.com/go/lowmemjson/internal/jsonparse" 12 ) 13 14 type runeTypeScanner struct { 15 // everything that is not "initialized by constructor" starts 16 // out as the zero value. 17 18 inner io.RuneScanner // initialized by constructor 19 20 parser jsonparse.Parser // initialized by constructor 21 offset int64 22 23 rTypeOK bool 24 repeat bool 25 26 rRune rune 27 rSize int 28 rIsRune bool 29 rType jsonparse.RuneType 30 rErr error 31 } 32 33 // The returned error is a *ReadError, a *SyntaxError, or nil. 34 // An EOF condition is represented as one of: 35 // 36 // end of value: (_, 0, RuneTypeEOF, nil) 37 // end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) 38 // end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) 39 func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) { 40 switch { 41 case sc.rTypeOK && (sc.rType == jsonparse.RuneTypeError || sc.rType == jsonparse.RuneTypeEOF): 42 // do nothing 43 case sc.repeat: 44 sc.offset += int64(sc.rSize) 45 _, _, _ = sc.inner.ReadRune() 46 case sc.parser.IsAtBarrier(): 47 sc.rTypeOK = true 48 sc.rType = jsonparse.RuneTypeEOF 49 sc.rRune = 0 50 sc.rSize = 0 51 sc.rErr = nil 52 default: 53 sc.rTypeOK = true 54 again: 55 var err error 56 sc.rRune, sc.rSize, err = sc.inner.ReadRune() 57 sc.offset += int64(sc.rSize) 58 switch err { 59 case nil: 60 sc.rIsRune = true 61 if sc.rRune == utf8.RuneError && sc.rSize == 1 { 62 if bs, ok := sc.inner.(io.ByteScanner); ok { 63 _ = bs.UnreadByte() // UnreadRune doesn't back up the ReadByte-pos 64 b, _ := bs.ReadByte() 65 _ = bs.UnreadByte() 66 _, _, _ = sc.inner.ReadRune() 67 sc.rRune = rune(b) 68 sc.rIsRune = false 69 } 70 } 71 sc.rType, err = sc.parser.HandleRune(sc.rRune, sc.rIsRune) 72 if err != nil { 73 sc.rErr = &DecodeSyntaxError{ 74 Offset: sc.offset - int64(sc.rSize), 75 Err: err, 76 } 77 } else { 78 sc.rErr = nil 79 } 80 switch sc.rType { 81 case jsonparse.RuneTypeSpace: 82 goto again 83 case jsonparse.RuneTypeEOF: 84 sc.offset -= int64(sc.rSize) 85 sc.rRune = 0 86 sc.rSize = 0 87 _ = sc.inner.UnreadRune() 88 } 89 case io.EOF: 90 sc.rType, err = sc.parser.HandleEOF() 91 if err != nil { 92 sc.rErr = &DecodeSyntaxError{ 93 Offset: sc.offset, 94 Err: err, 95 } 96 } else { 97 sc.rErr = nil 98 } 99 default: 100 sc.rType = 0 101 sc.rErr = &DecodeReadError{ 102 Offset: sc.offset, 103 Err: err, 104 } 105 } 106 } 107 sc.repeat = false 108 if sc.rSize > 0 && !sc.rIsRune { 109 return utf8.RuneError, sc.rSize, sc.rType, sc.rErr 110 } 111 return sc.rRune, sc.rSize, sc.rType, sc.rErr 112 } 113 114 // UnreadRune undoes a call to .ReadRuneType(). 115 // 116 // If the last call to .ReadRuneType() has already been unread, or if 117 // that call returned a rune with size 0, then ErrInvalidUnreadRune is 118 // returned. Otherwise, nil is returned. 119 func (sc *runeTypeScanner) UnreadRune() error { 120 if sc.repeat || sc.rSize == 0 { 121 return ErrInvalidUnreadRune 122 } 123 sc.repeat = true 124 sc.offset -= int64(sc.rSize) 125 _ = sc.inner.UnreadRune() 126 return nil 127 } 128 129 func (sc *runeTypeScanner) InputOffset() int64 { 130 return sc.offset 131 } 132 133 func (sc *runeTypeScanner) PushReadBarrier() { 134 sc.parser.PushReadBarrier() 135 } 136 137 func (sc *runeTypeScanner) PopReadBarrier() { 138 sc.parser.PopBarrier() 139 switch { 140 case sc.repeat: 141 // re-figure the rType and rErr 142 var err error 143 sc.rType, err = sc.parser.HandleRune(sc.rRune, sc.rIsRune) 144 if err != nil { 145 sc.rErr = &DecodeSyntaxError{ 146 Offset: sc.offset - int64(sc.rSize), 147 Err: err, 148 } 149 } else { 150 sc.rErr = nil 151 } 152 case sc.rTypeOK && sc.rType == jsonparse.RuneTypeEOF: 153 sc.rTypeOK = false // forget the sticky EOF 154 } 155 } 156 157 func (sc *runeTypeScanner) Reset() { 158 sc.parser.Reset() 159 sc.rTypeOK = false // forget any sticky errors/EOF 160 sc.repeat = false // feed the rune (if any) through the parser again 161 }