github.com/ari-anchor/sei-tendermint@v0.0.0-20230519144642-dc826b7b56bb/internal/pubsub/query/syntax/scanner.go (about)

     1  package syntax
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"strings"
     9  	"time"
    10  	"unicode"
    11  )
    12  
    13  // Token is the type of a lexical token in the query grammar.
    14  type Token byte
    15  
    16  const (
    17  	TInvalid  = iota // invalid or unknown token
    18  	TTag             // field tag: x.y
    19  	TString          // string value: 'foo bar'
    20  	TNumber          // number: 0, 15.5, 100
    21  	TTime            // timestamp: TIME yyyy-mm-ddThh:mm:ss([-+]hh:mm|Z)
    22  	TDate            // datestamp: DATE yyyy-mm-dd
    23  	TAnd             // operator: AND
    24  	TContains        // operator: CONTAINS
    25  	TExists          // operator: EXISTS
    26  	TEq              // operator: =
    27  	TLt              // operator: <
    28  	TLeq             // operator: <=
    29  	TGt              // operator: >
    30  	TGeq             // operator: >=
    31  
    32  	// Do not reorder these values without updating the scanner code.
    33  )
    34  
    35  var tString = [...]string{
    36  	TInvalid:  "invalid token",
    37  	TTag:      "tag",
    38  	TString:   "string",
    39  	TNumber:   "number",
    40  	TTime:     "timestamp",
    41  	TDate:     "datestamp",
    42  	TAnd:      "AND operator",
    43  	TContains: "CONTAINS operator",
    44  	TExists:   "EXISTS operator",
    45  	TEq:       "= operator",
    46  	TLt:       "< operator",
    47  	TLeq:      "<= operator",
    48  	TGt:       "> operator",
    49  	TGeq:      ">= operator",
    50  }
    51  
    52  func (t Token) String() string {
    53  	v := int(t)
    54  	if v > len(tString) {
    55  		return "unknown token type"
    56  	}
    57  	return tString[v]
    58  }
    59  
    60  const (
    61  	// TimeFormat is the format string used for timestamp values.
    62  	TimeFormat = time.RFC3339
    63  
    64  	// DateFormat is the format string used for datestamp values.
    65  	DateFormat = "2006-01-02"
    66  )
    67  
    68  // Scanner reads lexical tokens of the query language from an input stream.
    69  // Each call to Next advances the scanner to the next token, or reports an
    70  // error.
    71  type Scanner struct {
    72  	r   *bufio.Reader
    73  	buf bytes.Buffer
    74  	tok Token
    75  	err error
    76  
    77  	pos, last, end int
    78  }
    79  
    80  // NewScanner constructs a new scanner that reads from r.
    81  func NewScanner(r io.Reader) *Scanner { return &Scanner{r: bufio.NewReader(r)} }
    82  
    83  // Next advances s to the next token in the input, or reports an error.  At the
    84  // end of input, Next returns io.EOF.
    85  func (s *Scanner) Next() error {
    86  	s.buf.Reset()
    87  	s.pos = s.end
    88  	s.tok = TInvalid
    89  	s.err = nil
    90  
    91  	for {
    92  		ch, err := s.rune()
    93  		if err != nil {
    94  			return s.fail(err)
    95  		}
    96  		if unicode.IsSpace(ch) {
    97  			s.pos = s.end
    98  			continue // skip whitespace
    99  		}
   100  		if '0' <= ch && ch <= '9' {
   101  			return s.scanNumber(ch)
   102  		} else if isTagRune(ch) {
   103  			return s.scanTagLike(ch)
   104  		}
   105  		switch ch {
   106  		case '\'':
   107  			return s.scanString(ch)
   108  		case '<', '>', '=':
   109  			return s.scanCompare(ch)
   110  		default:
   111  			return s.invalid(ch)
   112  		}
   113  	}
   114  }
   115  
   116  // Token returns the type of the current input token.
   117  func (s *Scanner) Token() Token { return s.tok }
   118  
   119  // Text returns the text of the current input token.
   120  func (s *Scanner) Text() string { return s.buf.String() }
   121  
   122  // Pos returns the start offset of the current token in the input.
   123  func (s *Scanner) Pos() int { return s.pos }
   124  
   125  // Err returns the last error reported by Next, if any.
   126  func (s *Scanner) Err() error { return s.err }
   127  
   128  // scanNumber scans for numbers with optional fractional parts.
   129  // Examples: 0, 1, 3.14
   130  func (s *Scanner) scanNumber(first rune) error {
   131  	s.buf.WriteRune(first)
   132  	if err := s.scanWhile(isDigit); err != nil {
   133  		return err
   134  	}
   135  
   136  	ch, err := s.rune()
   137  	if err != nil && err != io.EOF {
   138  		return err
   139  	}
   140  	if ch == '.' {
   141  		s.buf.WriteRune(ch)
   142  		if err := s.scanWhile(isDigit); err != nil {
   143  			return err
   144  		}
   145  	} else {
   146  		s.unrune()
   147  	}
   148  	s.tok = TNumber
   149  	return nil
   150  }
   151  
   152  func (s *Scanner) scanString(first rune) error {
   153  	// discard opening quote
   154  	for {
   155  		ch, err := s.rune()
   156  		if err != nil {
   157  			return s.fail(err)
   158  		} else if ch == first {
   159  			// discard closing quote
   160  			s.tok = TString
   161  			return nil
   162  		}
   163  		s.buf.WriteRune(ch)
   164  	}
   165  }
   166  
   167  func (s *Scanner) scanCompare(first rune) error {
   168  	s.buf.WriteRune(first)
   169  	switch first {
   170  	case '=':
   171  		s.tok = TEq
   172  		return nil
   173  	case '<':
   174  		s.tok = TLt
   175  	case '>':
   176  		s.tok = TGt
   177  	default:
   178  		return s.invalid(first)
   179  	}
   180  
   181  	ch, err := s.rune()
   182  	if err == io.EOF {
   183  		return nil // the assigned token is correct
   184  	} else if err != nil {
   185  		return s.fail(err)
   186  	}
   187  	if ch == '=' {
   188  		s.buf.WriteRune(ch)
   189  		s.tok++ // depends on token order
   190  		return nil
   191  	}
   192  	s.unrune()
   193  	return nil
   194  }
   195  
   196  func (s *Scanner) scanTagLike(first rune) error {
   197  	s.buf.WriteRune(first)
   198  	var hasSpace bool
   199  	for {
   200  		ch, err := s.rune()
   201  		if err == io.EOF {
   202  			break
   203  		} else if err != nil {
   204  			return s.fail(err)
   205  		}
   206  		if !isTagRune(ch) {
   207  			hasSpace = ch == ' ' // to check for TIME, DATE
   208  			break
   209  		}
   210  		s.buf.WriteRune(ch)
   211  	}
   212  
   213  	text := s.buf.String()
   214  	switch text {
   215  	case "TIME":
   216  		if hasSpace {
   217  			return s.scanTimestamp()
   218  		}
   219  		s.tok = TTag
   220  	case "DATE":
   221  		if hasSpace {
   222  			return s.scanDatestamp()
   223  		}
   224  		s.tok = TTag
   225  	case "AND":
   226  		s.tok = TAnd
   227  	case "EXISTS":
   228  		s.tok = TExists
   229  	case "CONTAINS":
   230  		s.tok = TContains
   231  	default:
   232  		s.tok = TTag
   233  	}
   234  	s.unrune()
   235  	return nil
   236  }
   237  
   238  func (s *Scanner) scanTimestamp() error {
   239  	s.buf.Reset() // discard "TIME" label
   240  	if err := s.scanWhile(isTimeRune); err != nil {
   241  		return err
   242  	}
   243  	if ts, err := time.Parse(TimeFormat, s.buf.String()); err != nil {
   244  		return s.fail(fmt.Errorf("invalid TIME value: %w", err))
   245  	} else if y := ts.Year(); y < 1900 || y > 2999 {
   246  		return s.fail(fmt.Errorf("timestamp year %d out of range", ts.Year()))
   247  	}
   248  	s.tok = TTime
   249  	return nil
   250  }
   251  
   252  func (s *Scanner) scanDatestamp() error {
   253  	s.buf.Reset() // discard "DATE" label
   254  	if err := s.scanWhile(isDateRune); err != nil {
   255  		return err
   256  	}
   257  	if ts, err := time.Parse(DateFormat, s.buf.String()); err != nil {
   258  		return s.fail(fmt.Errorf("invalid DATE value: %w", err))
   259  	} else if y := ts.Year(); y < 1900 || y > 2999 {
   260  		return s.fail(fmt.Errorf("datestamp year %d out of range", ts.Year()))
   261  	}
   262  	s.tok = TDate
   263  	return nil
   264  }
   265  
   266  func (s *Scanner) scanWhile(ok func(rune) bool) error {
   267  	for {
   268  		ch, err := s.rune()
   269  		if err == io.EOF {
   270  			return nil
   271  		} else if err != nil {
   272  			return s.fail(err)
   273  		} else if !ok(ch) {
   274  			s.unrune()
   275  			return nil
   276  		}
   277  		s.buf.WriteRune(ch)
   278  	}
   279  }
   280  
   281  func (s *Scanner) rune() (rune, error) {
   282  	ch, nb, err := s.r.ReadRune()
   283  	s.last = nb
   284  	s.end += nb
   285  	return ch, err
   286  }
   287  
   288  func (s *Scanner) unrune() {
   289  	_ = s.r.UnreadRune()
   290  	s.end -= s.last
   291  }
   292  
   293  func (s *Scanner) fail(err error) error {
   294  	s.err = err
   295  	return err
   296  }
   297  
   298  func (s *Scanner) invalid(ch rune) error {
   299  	return s.fail(fmt.Errorf("invalid input %c at offset %d", ch, s.end))
   300  }
   301  
   302  func isDigit(r rune) bool { return '0' <= r && r <= '9' }
   303  
   304  func isTagRune(r rune) bool {
   305  	return r == '.' || r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
   306  }
   307  
   308  func isTimeRune(r rune) bool {
   309  	return strings.ContainsRune("-T:+Z", r) || isDigit(r)
   310  }
   311  
   312  func isDateRune(r rune) bool { return isDigit(r) || r == '-' }