github.com/searKing/golang/go@v1.2.117/bufio/pair.go (about)

     1  // Copyright 2022 The searKing Author. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bufio
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"strconv"
    13  
    14  	"github.com/searKing/golang/go/container/stack"
    15  	slices_ "github.com/searKing/golang/go/exp/slices"
    16  )
    17  
    18  var (
    19  	ErrMismatchTokenPair = errors.New("mismatch token pair")
    20  	ErrInvalidStartToken = errors.New("invalid start token")
    21  )
    22  
    23  type DelimiterPair struct {
    24  	start byte
    25  	end   byte
    26  }
    27  
    28  // A PairScanner reads and decodes Pair wrapped values from an input stream, like a json、xml.
    29  type PairScanner struct {
    30  	r              io.Reader
    31  	discardLeading bool // discard any char until we meet a start delimiter at list
    32  	buf            []byte
    33  	scanp          int   // start of unread data in buf
    34  	scanned        int64 // amount of data already scanned
    35  	err            error
    36  }
    37  
    38  // NewPairScanner returns a new scanner that reads from r.
    39  //
    40  // The scanner introduces its own buffering and may
    41  // read data from r beyond the paired values requested.
    42  func NewPairScanner(r io.Reader) *PairScanner {
    43  	return &PairScanner{r: r}
    44  }
    45  
    46  func (s *PairScanner) SetDiscardLeading(discard bool) *PairScanner {
    47  	s.discardLeading = discard
    48  	return s
    49  }
    50  
    51  func (s *PairScanner) ScanDelimiters(delimiters string) (line []byte, err error) {
    52  	var pairs []DelimiterPair
    53  	var isPair bool
    54  	var lastDelimiter byte
    55  
    56  	for _, delimiter := range []byte(delimiters) {
    57  		if !isPair {
    58  			lastDelimiter = delimiter
    59  			isPair = true
    60  			continue
    61  		}
    62  		pairs = append(pairs, DelimiterPair{
    63  			lastDelimiter, delimiter,
    64  		})
    65  		isPair = false
    66  	}
    67  
    68  	return s.Scan(pairs)
    69  
    70  }
    71  
    72  // Scan reads the next value complete wrapped by pair delimiters from its
    73  // input and stores it in the value pointed to by v.
    74  func (s *PairScanner) Scan(pairs []DelimiterPair) (line []byte, err error) {
    75  	if s.err != nil {
    76  		return nil, s.err
    77  	}
    78  
    79  	// Read whole value into buffer.
    80  	n, err := s.readValue(pairs)
    81  	if err != nil {
    82  		return nil, err
    83  	}
    84  	line = s.buf[s.scanp : s.scanp+n]
    85  	s.scanp += n
    86  
    87  	return line, nil
    88  }
    89  
    90  // Buffered returns a reader of the data remaining in the PairScanner's
    91  // buffer. The reader is valid until the next call to Decode.
    92  func (s *PairScanner) Buffered() io.Reader {
    93  	return bytes.NewReader(s.buf[s.scanp:])
    94  }
    95  
    96  // readValue reads a JSON value into dec.buf.
    97  // It returns the length of the encoding.
    98  func (s *PairScanner) readValue(pairs []DelimiterPair) (int, error) {
    99  	var delimiters stack.Stack
   100  	scanp := s.scanp
   101  	var err error
   102  Input:
   103  	// help the compiler see that scanp is never negative, so it can remove
   104  	// some bounds checks below.
   105  	for scanp >= 0 {
   106  
   107  		// Look in the buffer for a new value.
   108  		for i, c := range s.buf[scanp:] {
   109  			delimiterPair, ok := findMatchedTokenPair(c, pairs)
   110  			if !ok {
   111  				if delimiters.Len() == 0 {
   112  					// no delimiter have been seen yet
   113  					// discard any char until we meet a start delimiter at list
   114  					if !s.discardLeading {
   115  						return 0, s.tokenError(c, ErrInvalidStartToken)
   116  					}
   117  					s.scanp++
   118  				}
   119  				// read next char
   120  				continue
   121  			}
   122  			if c == delimiterPair.start {
   123  				delimiters.Push(c)
   124  				continue
   125  			} // c == delimiterPair.end
   126  			// no delimiter have been seen yet
   127  			if delimiters.Len() == 0 {
   128  				// discard any char until we meet a start delimiter at list
   129  				if !s.discardLeading {
   130  					return 0, s.tokenError(c, ErrInvalidStartToken)
   131  				}
   132  				s.scanp++
   133  				continue
   134  			}
   135  
   136  			lastDelimiter := delimiters.Peek().Value.(byte)
   137  			if lastDelimiter != delimiterPair.start {
   138  				return 0, s.tokenError(c, ErrMismatchTokenPair)
   139  			}
   140  			delimiters.Pop()
   141  			// a perfect object is get, just return
   142  			if delimiters.Len() == 0 {
   143  				scanp += i + 1
   144  				break Input
   145  			}
   146  
   147  		}
   148  		scanp = len(s.buf)
   149  
   150  		// Did the last read have an error?
   151  		// Delayed until now to allow buffer scan.
   152  		if err != nil {
   153  			if err == io.EOF {
   154  				if len(s.buf) > 0 {
   155  					err = io.ErrUnexpectedEOF
   156  				}
   157  			}
   158  			s.err = err
   159  			return 0, err
   160  		}
   161  
   162  		n := scanp - s.scanp
   163  		err = s.refill()
   164  		scanp = s.scanp + n
   165  	}
   166  	return scanp - s.scanp, nil
   167  }
   168  
   169  func (s *PairScanner) refill() error {
   170  	// Make room to read more into the buffer.
   171  	// First slide down data already consumed.
   172  	if s.scanp > 0 {
   173  		s.scanned += int64(s.scanp)
   174  		n := copy(s.buf, s.buf[s.scanp:])
   175  		s.buf = s.buf[:n]
   176  		s.scanp = 0
   177  	}
   178  
   179  	// Grow buffer if not large enough.
   180  	const minRead = 512
   181  	if cap(s.buf)-len(s.buf) < minRead {
   182  		newBuf := make([]byte, len(s.buf), 2*cap(s.buf)+minRead)
   183  		copy(newBuf, s.buf)
   184  		s.buf = newBuf
   185  	}
   186  
   187  	// Read. Delay error for next iteration (after scan).
   188  	n, err := s.r.Read(s.buf[len(s.buf):cap(s.buf)])
   189  	s.buf = s.buf[0 : len(s.buf)+n]
   190  
   191  	return err
   192  }
   193  
   194  func (s *PairScanner) tokenError(c byte, err error) error {
   195  	return fmt.Errorf("invalid character %s at %d: %w", quoteChar(c), s.InputOffset(), err)
   196  }
   197  
   198  // More reports whether there is another element in the
   199  // current array or object being parsed.
   200  func (s *PairScanner) More() bool {
   201  	c, err := s.peek()
   202  	return err == nil && c != ']' && c != '}'
   203  }
   204  
   205  func (s *PairScanner) peek() (byte, error) {
   206  	var err error
   207  	for {
   208  		if s.scanp < len(s.buf) {
   209  			c := s.buf[s.scanp]
   210  			return c, nil
   211  		}
   212  		// buffer has been scanned, now report any error
   213  		if err != nil {
   214  			return 0, err
   215  		}
   216  		err = s.refill()
   217  	}
   218  }
   219  
   220  // InputOffset returns the input stream byte offset of the current scanner position.
   221  // The offset gives the location of the end of the most recently returned token
   222  // and the beginning of the next token.
   223  func (s *PairScanner) InputOffset() int64 {
   224  	return s.scanned + int64(s.scanp)
   225  }
   226  
   227  func findMatchedTokenPair(c byte, pairs []DelimiterPair) (tokenPair DelimiterPair, has bool) {
   228  	return slices_.FirstFunc(pairs, func(pair DelimiterPair) bool {
   229  		return c == pair.start || c == pair.end
   230  	})
   231  }
   232  
   233  // quoteChar formats c as a quoted character literal.
   234  func quoteChar(c byte) string {
   235  	// special cases - different from quoted strings
   236  	if c == '\'' {
   237  		return `'\''`
   238  	}
   239  	if c == '"' {
   240  		return `'"'`
   241  	}
   242  
   243  	// use quoted string with different quotation marks
   244  	s := strconv.Quote(string(c))
   245  	return "'" + s[1:len(s)-1] + "'"
   246  }