github.com/searKing/golang/go@v1.2.74/bufio/pair.go (about)

     1  // Copyright 2022 The searKing Author. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bufio
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"io"
    11  	"unicode"
    12  
    13  	"github.com/searKing/golang/go/container/slice"
    14  	"github.com/searKing/golang/go/container/stack"
    15  )
    16  
    17  var (
    18  	ErrMismatchTokenPair = errors.New("mismatch token pair")
    19  	ErrInvalidStartToken = errors.New("invalid start token")
    20  )
    21  
    22  type DelimiterPair struct {
    23  	start byte
    24  	end   byte
    25  }
    26  
    27  // A PairScanner reads and decodes Pair wrapped values from an input stream, like a json、xml.
    28  type PairScanner struct {
    29  	r              io.Reader
    30  	discardLeading bool // discard any char until we meet a start delimeter at list
    31  	buf            []byte
    32  	scanp          int   // start of unread data in buf
    33  	scanned        int64 // amount of data already scanned
    34  	err            error
    35  }
    36  
    37  // NewPairScanner returns a new scanner that reads from r.
    38  //
    39  // The scanner introduces its own buffering and may
    40  // read data from r beyond the JSON values requested.
    41  func NewPairScanner(r io.Reader) *PairScanner {
    42  	return &PairScanner{r: r}
    43  }
    44  
    45  func (pairScanner *PairScanner) SetDiscardLeading(discard bool) *PairScanner {
    46  	pairScanner.discardLeading = discard
    47  	return pairScanner
    48  }
    49  
    50  func (pairScanner *PairScanner) ScanDelimiters(delimiters string) (line []byte, err error) {
    51  	var pairs []DelimiterPair
    52  	var isPair bool
    53  	var lastDelimiter byte
    54  
    55  	for _, delimiter := range []byte(delimiters) {
    56  		if !isPair {
    57  			lastDelimiter = delimiter
    58  			isPair = true
    59  			continue
    60  		}
    61  		pairs = append(pairs, DelimiterPair{
    62  			lastDelimiter, delimiter,
    63  		})
    64  		isPair = false
    65  	}
    66  
    67  	return pairScanner.Scan(pairs)
    68  
    69  }
    70  
    71  // Scan reads the next value complete wrapped by pair delimiters from its
    72  // input and stores it in the value pointed to by v.
    73  func (pairScanner *PairScanner) Scan(pairs []DelimiterPair) (line []byte, err error) {
    74  	if pairScanner.err != nil {
    75  		return nil, pairScanner.err
    76  	}
    77  
    78  	// Read whole value into buffer.
    79  	n, err := pairScanner.readValue(pairs)
    80  	if err != nil {
    81  		return nil, err
    82  	}
    83  	line = pairScanner.buf[pairScanner.scanp : pairScanner.scanp+n]
    84  	pairScanner.scanp += n
    85  
    86  	return line, nil
    87  }
    88  
    89  // Buffered returns a reader of the data remaining in the PairScanner's
    90  // buffer. The reader is valid until the next call to Decode.
    91  func (pairScanner *PairScanner) Buffered() io.Reader {
    92  	return bytes.NewReader(pairScanner.buf[pairScanner.scanp:])
    93  }
    94  
    95  // readValue reads a JSON value into dec.buf.
    96  // It returns the length of the encoding.
    97  func (pairScanner *PairScanner) readValue(pairs []DelimiterPair) (int, error) {
    98  	var delimiters stack.Stack
    99  	scanp := pairScanner.scanp
   100  	var err error
   101  Input:
   102  	for {
   103  		// Look in the buffer for a new value.
   104  		for i, c := range pairScanner.buf[scanp:] {
   105  			delimiterPair, ok := findMatchedTokenPair(c, pairs)
   106  			if !ok && delimiters.Len() == 0 {
   107  				// no delimiter have been seen yet
   108  				// discard any char until we meet a start delimeter at list
   109  				if pairScanner.discardLeading {
   110  					pairScanner.scanp += 1
   111  					continue
   112  				}
   113  				continue
   114  			}
   115  			if !ok {
   116  				// read next char
   117  				continue
   118  			}
   119  			if c == delimiterPair.start {
   120  				delimiters.Push(c)
   121  			} else { //c == delimiterPair.end
   122  				// no delimiter have been seen yet
   123  				if delimiters.Len() == 0 {
   124  					// discard any char until we meet a start delimeter at list
   125  					pairScanner.scanp += 1
   126  					if pairScanner.discardLeading {
   127  						continue
   128  					}
   129  					return 0, ErrInvalidStartToken
   130  				}
   131  
   132  				lastDelimiter := delimiters.Peek().Value.(byte)
   133  				if lastDelimiter != delimiterPair.start {
   134  					return 0, ErrMismatchTokenPair
   135  				}
   136  				delimiters.Pop()
   137  				// a perfect object is get, just return
   138  				if delimiters.Len() == 0 {
   139  					scanp += i + 1
   140  					break Input
   141  				}
   142  			}
   143  		}
   144  		scanp = len(pairScanner.buf)
   145  
   146  		// Did the last read have an error?
   147  		// Delayed until now to allow buffer scan.
   148  		if err != nil {
   149  			if err == io.EOF {
   150  				if nonSpace(pairScanner.buf) {
   151  					err = io.ErrUnexpectedEOF
   152  				}
   153  			}
   154  			pairScanner.err = err
   155  			return 0, err
   156  		}
   157  
   158  		n := scanp - pairScanner.scanp
   159  		err = pairScanner.refill()
   160  		scanp = pairScanner.scanp + n
   161  	}
   162  	return scanp - pairScanner.scanp, nil
   163  }
   164  
   165  func (pairScanner *PairScanner) refill() error {
   166  	// Make room to read more into the buffer.
   167  	// First slide down data already consumed.
   168  	if pairScanner.scanp > 0 {
   169  		pairScanner.scanned += int64(pairScanner.scanp)
   170  		n := copy(pairScanner.buf, pairScanner.buf[pairScanner.scanp:])
   171  		pairScanner.buf = pairScanner.buf[:n]
   172  		pairScanner.scanp = 0
   173  	}
   174  
   175  	// Grow buffer if not large enough.
   176  	const minRead = 512
   177  	if cap(pairScanner.buf)-len(pairScanner.buf) < minRead {
   178  		newBuf := make([]byte, len(pairScanner.buf), 2*cap(pairScanner.buf)+minRead)
   179  		copy(newBuf, pairScanner.buf)
   180  		pairScanner.buf = newBuf
   181  	}
   182  
   183  	// Read. Delay error for next iteration (after scan).
   184  	n, err := pairScanner.r.Read(pairScanner.buf[len(pairScanner.buf):cap(pairScanner.buf)])
   185  	pairScanner.buf = pairScanner.buf[0 : len(pairScanner.buf)+n]
   186  
   187  	return err
   188  }
   189  
   190  func findMatchedTokenPair(c byte, pairs []DelimiterPair) (tokenPair DelimiterPair, has bool) {
   191  	opt := slice.NewStream().WithSlice(pairs).FindFirst(func(e interface{}) bool {
   192  		pair := e.(DelimiterPair)
   193  		if pair.start == c || pair.end == c {
   194  			return true
   195  		}
   196  		return false
   197  	})
   198  	if !opt.IsPresent() {
   199  		return tokenPair, false
   200  	}
   201  	return opt.Get().(DelimiterPair), true
   202  }
   203  
   204  func nonSpace(b []byte) bool {
   205  	for _, c := range b {
   206  		if !unicode.IsSpace(rune(c)) {
   207  			return true
   208  		}
   209  	}
   210  	return false
   211  }