github.com/ari-anchor/sei-tendermint@v0.0.0-20230519144642-dc826b7b56bb/internal/pubsub/query/syntax/scanner.go (about) 1 package syntax 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "io" 8 "strings" 9 "time" 10 "unicode" 11 ) 12 13 // Token is the type of a lexical token in the query grammar. 14 type Token byte 15 16 const ( 17 TInvalid = iota // invalid or unknown token 18 TTag // field tag: x.y 19 TString // string value: 'foo bar' 20 TNumber // number: 0, 15.5, 100 21 TTime // timestamp: TIME yyyy-mm-ddThh:mm:ss([-+]hh:mm|Z) 22 TDate // datestamp: DATE yyyy-mm-dd 23 TAnd // operator: AND 24 TContains // operator: CONTAINS 25 TExists // operator: EXISTS 26 TEq // operator: = 27 TLt // operator: < 28 TLeq // operator: <= 29 TGt // operator: > 30 TGeq // operator: >= 31 32 // Do not reorder these values without updating the scanner code. 33 ) 34 35 var tString = [...]string{ 36 TInvalid: "invalid token", 37 TTag: "tag", 38 TString: "string", 39 TNumber: "number", 40 TTime: "timestamp", 41 TDate: "datestamp", 42 TAnd: "AND operator", 43 TContains: "CONTAINS operator", 44 TExists: "EXISTS operator", 45 TEq: "= operator", 46 TLt: "< operator", 47 TLeq: "<= operator", 48 TGt: "> operator", 49 TGeq: ">= operator", 50 } 51 52 func (t Token) String() string { 53 v := int(t) 54 if v > len(tString) { 55 return "unknown token type" 56 } 57 return tString[v] 58 } 59 60 const ( 61 // TimeFormat is the format string used for timestamp values. 62 TimeFormat = time.RFC3339 63 64 // DateFormat is the format string used for datestamp values. 65 DateFormat = "2006-01-02" 66 ) 67 68 // Scanner reads lexical tokens of the query language from an input stream. 69 // Each call to Next advances the scanner to the next token, or reports an 70 // error. 71 type Scanner struct { 72 r *bufio.Reader 73 buf bytes.Buffer 74 tok Token 75 err error 76 77 pos, last, end int 78 } 79 80 // NewScanner constructs a new scanner that reads from r. 81 func NewScanner(r io.Reader) *Scanner { return &Scanner{r: bufio.NewReader(r)} } 82 83 // Next advances s to the next token in the input, or reports an error. At the 84 // end of input, Next returns io.EOF. 85 func (s *Scanner) Next() error { 86 s.buf.Reset() 87 s.pos = s.end 88 s.tok = TInvalid 89 s.err = nil 90 91 for { 92 ch, err := s.rune() 93 if err != nil { 94 return s.fail(err) 95 } 96 if unicode.IsSpace(ch) { 97 s.pos = s.end 98 continue // skip whitespace 99 } 100 if '0' <= ch && ch <= '9' { 101 return s.scanNumber(ch) 102 } else if isTagRune(ch) { 103 return s.scanTagLike(ch) 104 } 105 switch ch { 106 case '\'': 107 return s.scanString(ch) 108 case '<', '>', '=': 109 return s.scanCompare(ch) 110 default: 111 return s.invalid(ch) 112 } 113 } 114 } 115 116 // Token returns the type of the current input token. 117 func (s *Scanner) Token() Token { return s.tok } 118 119 // Text returns the text of the current input token. 120 func (s *Scanner) Text() string { return s.buf.String() } 121 122 // Pos returns the start offset of the current token in the input. 123 func (s *Scanner) Pos() int { return s.pos } 124 125 // Err returns the last error reported by Next, if any. 126 func (s *Scanner) Err() error { return s.err } 127 128 // scanNumber scans for numbers with optional fractional parts. 129 // Examples: 0, 1, 3.14 130 func (s *Scanner) scanNumber(first rune) error { 131 s.buf.WriteRune(first) 132 if err := s.scanWhile(isDigit); err != nil { 133 return err 134 } 135 136 ch, err := s.rune() 137 if err != nil && err != io.EOF { 138 return err 139 } 140 if ch == '.' { 141 s.buf.WriteRune(ch) 142 if err := s.scanWhile(isDigit); err != nil { 143 return err 144 } 145 } else { 146 s.unrune() 147 } 148 s.tok = TNumber 149 return nil 150 } 151 152 func (s *Scanner) scanString(first rune) error { 153 // discard opening quote 154 for { 155 ch, err := s.rune() 156 if err != nil { 157 return s.fail(err) 158 } else if ch == first { 159 // discard closing quote 160 s.tok = TString 161 return nil 162 } 163 s.buf.WriteRune(ch) 164 } 165 } 166 167 func (s *Scanner) scanCompare(first rune) error { 168 s.buf.WriteRune(first) 169 switch first { 170 case '=': 171 s.tok = TEq 172 return nil 173 case '<': 174 s.tok = TLt 175 case '>': 176 s.tok = TGt 177 default: 178 return s.invalid(first) 179 } 180 181 ch, err := s.rune() 182 if err == io.EOF { 183 return nil // the assigned token is correct 184 } else if err != nil { 185 return s.fail(err) 186 } 187 if ch == '=' { 188 s.buf.WriteRune(ch) 189 s.tok++ // depends on token order 190 return nil 191 } 192 s.unrune() 193 return nil 194 } 195 196 func (s *Scanner) scanTagLike(first rune) error { 197 s.buf.WriteRune(first) 198 var hasSpace bool 199 for { 200 ch, err := s.rune() 201 if err == io.EOF { 202 break 203 } else if err != nil { 204 return s.fail(err) 205 } 206 if !isTagRune(ch) { 207 hasSpace = ch == ' ' // to check for TIME, DATE 208 break 209 } 210 s.buf.WriteRune(ch) 211 } 212 213 text := s.buf.String() 214 switch text { 215 case "TIME": 216 if hasSpace { 217 return s.scanTimestamp() 218 } 219 s.tok = TTag 220 case "DATE": 221 if hasSpace { 222 return s.scanDatestamp() 223 } 224 s.tok = TTag 225 case "AND": 226 s.tok = TAnd 227 case "EXISTS": 228 s.tok = TExists 229 case "CONTAINS": 230 s.tok = TContains 231 default: 232 s.tok = TTag 233 } 234 s.unrune() 235 return nil 236 } 237 238 func (s *Scanner) scanTimestamp() error { 239 s.buf.Reset() // discard "TIME" label 240 if err := s.scanWhile(isTimeRune); err != nil { 241 return err 242 } 243 if ts, err := time.Parse(TimeFormat, s.buf.String()); err != nil { 244 return s.fail(fmt.Errorf("invalid TIME value: %w", err)) 245 } else if y := ts.Year(); y < 1900 || y > 2999 { 246 return s.fail(fmt.Errorf("timestamp year %d out of range", ts.Year())) 247 } 248 s.tok = TTime 249 return nil 250 } 251 252 func (s *Scanner) scanDatestamp() error { 253 s.buf.Reset() // discard "DATE" label 254 if err := s.scanWhile(isDateRune); err != nil { 255 return err 256 } 257 if ts, err := time.Parse(DateFormat, s.buf.String()); err != nil { 258 return s.fail(fmt.Errorf("invalid DATE value: %w", err)) 259 } else if y := ts.Year(); y < 1900 || y > 2999 { 260 return s.fail(fmt.Errorf("datestamp year %d out of range", ts.Year())) 261 } 262 s.tok = TDate 263 return nil 264 } 265 266 func (s *Scanner) scanWhile(ok func(rune) bool) error { 267 for { 268 ch, err := s.rune() 269 if err == io.EOF { 270 return nil 271 } else if err != nil { 272 return s.fail(err) 273 } else if !ok(ch) { 274 s.unrune() 275 return nil 276 } 277 s.buf.WriteRune(ch) 278 } 279 } 280 281 func (s *Scanner) rune() (rune, error) { 282 ch, nb, err := s.r.ReadRune() 283 s.last = nb 284 s.end += nb 285 return ch, err 286 } 287 288 func (s *Scanner) unrune() { 289 _ = s.r.UnreadRune() 290 s.end -= s.last 291 } 292 293 func (s *Scanner) fail(err error) error { 294 s.err = err 295 return err 296 } 297 298 func (s *Scanner) invalid(ch rune) error { 299 return s.fail(fmt.Errorf("invalid input %c at offset %d", ch, s.end)) 300 } 301 302 func isDigit(r rune) bool { return '0' <= r && r <= '9' } 303 304 func isTagRune(r rune) bool { 305 return r == '.' || r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 306 } 307 308 func isTimeRune(r rune) bool { 309 return strings.ContainsRune("-T:+Z", r) || isDigit(r) 310 } 311 312 func isDateRune(r rune) bool { return isDigit(r) || r == '-' }