github.com/searKing/golang/go@v1.2.74/bufio/pair.go (about) 1 // Copyright 2022 The searKing Author. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package bufio 6 7 import ( 8 "bytes" 9 "errors" 10 "io" 11 "unicode" 12 13 "github.com/searKing/golang/go/container/slice" 14 "github.com/searKing/golang/go/container/stack" 15 ) 16 17 var ( 18 ErrMismatchTokenPair = errors.New("mismatch token pair") 19 ErrInvalidStartToken = errors.New("invalid start token") 20 ) 21 22 type DelimiterPair struct { 23 start byte 24 end byte 25 } 26 27 // A PairScanner reads and decodes Pair wrapped values from an input stream, like a json、xml. 28 type PairScanner struct { 29 r io.Reader 30 discardLeading bool // discard any char until we meet a start delimeter at list 31 buf []byte 32 scanp int // start of unread data in buf 33 scanned int64 // amount of data already scanned 34 err error 35 } 36 37 // NewPairScanner returns a new scanner that reads from r. 38 // 39 // The scanner introduces its own buffering and may 40 // read data from r beyond the JSON values requested. 41 func NewPairScanner(r io.Reader) *PairScanner { 42 return &PairScanner{r: r} 43 } 44 45 func (pairScanner *PairScanner) SetDiscardLeading(discard bool) *PairScanner { 46 pairScanner.discardLeading = discard 47 return pairScanner 48 } 49 50 func (pairScanner *PairScanner) ScanDelimiters(delimiters string) (line []byte, err error) { 51 var pairs []DelimiterPair 52 var isPair bool 53 var lastDelimiter byte 54 55 for _, delimiter := range []byte(delimiters) { 56 if !isPair { 57 lastDelimiter = delimiter 58 isPair = true 59 continue 60 } 61 pairs = append(pairs, DelimiterPair{ 62 lastDelimiter, delimiter, 63 }) 64 isPair = false 65 } 66 67 return pairScanner.Scan(pairs) 68 69 } 70 71 // Scan reads the next value complete wrapped by pair delimiters from its 72 // input and stores it in the value pointed to by v. 73 func (pairScanner *PairScanner) Scan(pairs []DelimiterPair) (line []byte, err error) { 74 if pairScanner.err != nil { 75 return nil, pairScanner.err 76 } 77 78 // Read whole value into buffer. 79 n, err := pairScanner.readValue(pairs) 80 if err != nil { 81 return nil, err 82 } 83 line = pairScanner.buf[pairScanner.scanp : pairScanner.scanp+n] 84 pairScanner.scanp += n 85 86 return line, nil 87 } 88 89 // Buffered returns a reader of the data remaining in the PairScanner's 90 // buffer. The reader is valid until the next call to Decode. 91 func (pairScanner *PairScanner) Buffered() io.Reader { 92 return bytes.NewReader(pairScanner.buf[pairScanner.scanp:]) 93 } 94 95 // readValue reads a JSON value into dec.buf. 96 // It returns the length of the encoding. 97 func (pairScanner *PairScanner) readValue(pairs []DelimiterPair) (int, error) { 98 var delimiters stack.Stack 99 scanp := pairScanner.scanp 100 var err error 101 Input: 102 for { 103 // Look in the buffer for a new value. 104 for i, c := range pairScanner.buf[scanp:] { 105 delimiterPair, ok := findMatchedTokenPair(c, pairs) 106 if !ok && delimiters.Len() == 0 { 107 // no delimiter have been seen yet 108 // discard any char until we meet a start delimeter at list 109 if pairScanner.discardLeading { 110 pairScanner.scanp += 1 111 continue 112 } 113 continue 114 } 115 if !ok { 116 // read next char 117 continue 118 } 119 if c == delimiterPair.start { 120 delimiters.Push(c) 121 } else { //c == delimiterPair.end 122 // no delimiter have been seen yet 123 if delimiters.Len() == 0 { 124 // discard any char until we meet a start delimeter at list 125 pairScanner.scanp += 1 126 if pairScanner.discardLeading { 127 continue 128 } 129 return 0, ErrInvalidStartToken 130 } 131 132 lastDelimiter := delimiters.Peek().Value.(byte) 133 if lastDelimiter != delimiterPair.start { 134 return 0, ErrMismatchTokenPair 135 } 136 delimiters.Pop() 137 // a perfect object is get, just return 138 if delimiters.Len() == 0 { 139 scanp += i + 1 140 break Input 141 } 142 } 143 } 144 scanp = len(pairScanner.buf) 145 146 // Did the last read have an error? 147 // Delayed until now to allow buffer scan. 148 if err != nil { 149 if err == io.EOF { 150 if nonSpace(pairScanner.buf) { 151 err = io.ErrUnexpectedEOF 152 } 153 } 154 pairScanner.err = err 155 return 0, err 156 } 157 158 n := scanp - pairScanner.scanp 159 err = pairScanner.refill() 160 scanp = pairScanner.scanp + n 161 } 162 return scanp - pairScanner.scanp, nil 163 } 164 165 func (pairScanner *PairScanner) refill() error { 166 // Make room to read more into the buffer. 167 // First slide down data already consumed. 168 if pairScanner.scanp > 0 { 169 pairScanner.scanned += int64(pairScanner.scanp) 170 n := copy(pairScanner.buf, pairScanner.buf[pairScanner.scanp:]) 171 pairScanner.buf = pairScanner.buf[:n] 172 pairScanner.scanp = 0 173 } 174 175 // Grow buffer if not large enough. 176 const minRead = 512 177 if cap(pairScanner.buf)-len(pairScanner.buf) < minRead { 178 newBuf := make([]byte, len(pairScanner.buf), 2*cap(pairScanner.buf)+minRead) 179 copy(newBuf, pairScanner.buf) 180 pairScanner.buf = newBuf 181 } 182 183 // Read. Delay error for next iteration (after scan). 184 n, err := pairScanner.r.Read(pairScanner.buf[len(pairScanner.buf):cap(pairScanner.buf)]) 185 pairScanner.buf = pairScanner.buf[0 : len(pairScanner.buf)+n] 186 187 return err 188 } 189 190 func findMatchedTokenPair(c byte, pairs []DelimiterPair) (tokenPair DelimiterPair, has bool) { 191 opt := slice.NewStream().WithSlice(pairs).FindFirst(func(e interface{}) bool { 192 pair := e.(DelimiterPair) 193 if pair.start == c || pair.end == c { 194 return true 195 } 196 return false 197 }) 198 if !opt.IsPresent() { 199 return tokenPair, false 200 } 201 return opt.Get().(DelimiterPair), true 202 } 203 204 func nonSpace(b []byte) bool { 205 for _, c := range b { 206 if !unicode.IsSpace(rune(c)) { 207 return true 208 } 209 } 210 return false 211 }