github.com/searKing/golang/go@v1.2.117/bufio/pair.go (about) 1 // Copyright 2022 The searKing Author. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package bufio 6 7 import ( 8 "bytes" 9 "errors" 10 "fmt" 11 "io" 12 "strconv" 13 14 "github.com/searKing/golang/go/container/stack" 15 slices_ "github.com/searKing/golang/go/exp/slices" 16 ) 17 18 var ( 19 ErrMismatchTokenPair = errors.New("mismatch token pair") 20 ErrInvalidStartToken = errors.New("invalid start token") 21 ) 22 23 type DelimiterPair struct { 24 start byte 25 end byte 26 } 27 28 // A PairScanner reads and decodes Pair wrapped values from an input stream, like a json、xml. 29 type PairScanner struct { 30 r io.Reader 31 discardLeading bool // discard any char until we meet a start delimiter at list 32 buf []byte 33 scanp int // start of unread data in buf 34 scanned int64 // amount of data already scanned 35 err error 36 } 37 38 // NewPairScanner returns a new scanner that reads from r. 39 // 40 // The scanner introduces its own buffering and may 41 // read data from r beyond the paired values requested. 42 func NewPairScanner(r io.Reader) *PairScanner { 43 return &PairScanner{r: r} 44 } 45 46 func (s *PairScanner) SetDiscardLeading(discard bool) *PairScanner { 47 s.discardLeading = discard 48 return s 49 } 50 51 func (s *PairScanner) ScanDelimiters(delimiters string) (line []byte, err error) { 52 var pairs []DelimiterPair 53 var isPair bool 54 var lastDelimiter byte 55 56 for _, delimiter := range []byte(delimiters) { 57 if !isPair { 58 lastDelimiter = delimiter 59 isPair = true 60 continue 61 } 62 pairs = append(pairs, DelimiterPair{ 63 lastDelimiter, delimiter, 64 }) 65 isPair = false 66 } 67 68 return s.Scan(pairs) 69 70 } 71 72 // Scan reads the next value complete wrapped by pair delimiters from its 73 // input and stores it in the value pointed to by v. 74 func (s *PairScanner) Scan(pairs []DelimiterPair) (line []byte, err error) { 75 if s.err != nil { 76 return nil, s.err 77 } 78 79 // Read whole value into buffer. 80 n, err := s.readValue(pairs) 81 if err != nil { 82 return nil, err 83 } 84 line = s.buf[s.scanp : s.scanp+n] 85 s.scanp += n 86 87 return line, nil 88 } 89 90 // Buffered returns a reader of the data remaining in the PairScanner's 91 // buffer. The reader is valid until the next call to Decode. 92 func (s *PairScanner) Buffered() io.Reader { 93 return bytes.NewReader(s.buf[s.scanp:]) 94 } 95 96 // readValue reads a JSON value into dec.buf. 97 // It returns the length of the encoding. 98 func (s *PairScanner) readValue(pairs []DelimiterPair) (int, error) { 99 var delimiters stack.Stack 100 scanp := s.scanp 101 var err error 102 Input: 103 // help the compiler see that scanp is never negative, so it can remove 104 // some bounds checks below. 105 for scanp >= 0 { 106 107 // Look in the buffer for a new value. 108 for i, c := range s.buf[scanp:] { 109 delimiterPair, ok := findMatchedTokenPair(c, pairs) 110 if !ok { 111 if delimiters.Len() == 0 { 112 // no delimiter have been seen yet 113 // discard any char until we meet a start delimiter at list 114 if !s.discardLeading { 115 return 0, s.tokenError(c, ErrInvalidStartToken) 116 } 117 s.scanp++ 118 } 119 // read next char 120 continue 121 } 122 if c == delimiterPair.start { 123 delimiters.Push(c) 124 continue 125 } // c == delimiterPair.end 126 // no delimiter have been seen yet 127 if delimiters.Len() == 0 { 128 // discard any char until we meet a start delimiter at list 129 if !s.discardLeading { 130 return 0, s.tokenError(c, ErrInvalidStartToken) 131 } 132 s.scanp++ 133 continue 134 } 135 136 lastDelimiter := delimiters.Peek().Value.(byte) 137 if lastDelimiter != delimiterPair.start { 138 return 0, s.tokenError(c, ErrMismatchTokenPair) 139 } 140 delimiters.Pop() 141 // a perfect object is get, just return 142 if delimiters.Len() == 0 { 143 scanp += i + 1 144 break Input 145 } 146 147 } 148 scanp = len(s.buf) 149 150 // Did the last read have an error? 151 // Delayed until now to allow buffer scan. 152 if err != nil { 153 if err == io.EOF { 154 if len(s.buf) > 0 { 155 err = io.ErrUnexpectedEOF 156 } 157 } 158 s.err = err 159 return 0, err 160 } 161 162 n := scanp - s.scanp 163 err = s.refill() 164 scanp = s.scanp + n 165 } 166 return scanp - s.scanp, nil 167 } 168 169 func (s *PairScanner) refill() error { 170 // Make room to read more into the buffer. 171 // First slide down data already consumed. 172 if s.scanp > 0 { 173 s.scanned += int64(s.scanp) 174 n := copy(s.buf, s.buf[s.scanp:]) 175 s.buf = s.buf[:n] 176 s.scanp = 0 177 } 178 179 // Grow buffer if not large enough. 180 const minRead = 512 181 if cap(s.buf)-len(s.buf) < minRead { 182 newBuf := make([]byte, len(s.buf), 2*cap(s.buf)+minRead) 183 copy(newBuf, s.buf) 184 s.buf = newBuf 185 } 186 187 // Read. Delay error for next iteration (after scan). 188 n, err := s.r.Read(s.buf[len(s.buf):cap(s.buf)]) 189 s.buf = s.buf[0 : len(s.buf)+n] 190 191 return err 192 } 193 194 func (s *PairScanner) tokenError(c byte, err error) error { 195 return fmt.Errorf("invalid character %s at %d: %w", quoteChar(c), s.InputOffset(), err) 196 } 197 198 // More reports whether there is another element in the 199 // current array or object being parsed. 200 func (s *PairScanner) More() bool { 201 c, err := s.peek() 202 return err == nil && c != ']' && c != '}' 203 } 204 205 func (s *PairScanner) peek() (byte, error) { 206 var err error 207 for { 208 if s.scanp < len(s.buf) { 209 c := s.buf[s.scanp] 210 return c, nil 211 } 212 // buffer has been scanned, now report any error 213 if err != nil { 214 return 0, err 215 } 216 err = s.refill() 217 } 218 } 219 220 // InputOffset returns the input stream byte offset of the current scanner position. 221 // The offset gives the location of the end of the most recently returned token 222 // and the beginning of the next token. 223 func (s *PairScanner) InputOffset() int64 { 224 return s.scanned + int64(s.scanp) 225 } 226 227 func findMatchedTokenPair(c byte, pairs []DelimiterPair) (tokenPair DelimiterPair, has bool) { 228 return slices_.FirstFunc(pairs, func(pair DelimiterPair) bool { 229 return c == pair.start || c == pair.end 230 }) 231 } 232 233 // quoteChar formats c as a quoted character literal. 234 func quoteChar(c byte) string { 235 // special cases - different from quoted strings 236 if c == '\'' { 237 return `'\''` 238 } 239 if c == '"' { 240 return `'"'` 241 } 242 243 // use quoted string with different quotation marks 244 s := strconv.Quote(string(c)) 245 return "'" + s[1:len(s)-1] + "'" 246 }