github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/logql/syntax/lex.go

github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/logql/syntax/lex.go (about)

     1  package syntax
     2  
     3  import (
     4  	"strings"
     5  	"text/scanner"
     6  	"time"
     7  	"unicode"
     8  	"unicode/utf8"
     9  
    10  	"github.com/dustin/go-humanize"
    11  	"github.com/prometheus/common/model"
    12  	"github.com/prometheus/prometheus/util/strutil"
    13  
    14  	"github.com/grafana/loki/pkg/logqlmodel"
    15  )
    16  
    17  var tokens = map[string]int{
    18  	",":            COMMA,
    19  	".":            DOT,
    20  	"{":            OPEN_BRACE,
    21  	"}":            CLOSE_BRACE,
    22  	"=":            EQ,
    23  	OpTypeNEQ:      NEQ,
    24  	"=~":           RE,
    25  	"!~":           NRE,
    26  	"|=":           PIPE_EXACT,
    27  	"|~":           PIPE_MATCH,
    28  	OpPipe:         PIPE,
    29  	OpUnwrap:       UNWRAP,
    30  	"(":            OPEN_PARENTHESIS,
    31  	")":            CLOSE_PARENTHESIS,
    32  	"by":           BY,
    33  	"without":      WITHOUT,
    34  	"bool":         BOOL,
    35  	"[":            OPEN_BRACKET,
    36  	"]":            CLOSE_BRACKET,
    37  	OpLabelReplace: LABEL_REPLACE,
    38  	OpOffset:       OFFSET,
    39  	OpOn:           ON,
    40  	OpIgnoring:     IGNORING,
    41  	OpGroupLeft:    GROUP_LEFT,
    42  	OpGroupRight:   GROUP_RIGHT,
    43  
    44  	// binops
    45  	OpTypeOr:     OR,
    46  	OpTypeAnd:    AND,
    47  	OpTypeUnless: UNLESS,
    48  	OpTypeAdd:    ADD,
    49  	OpTypeSub:    SUB,
    50  	OpTypeMul:    MUL,
    51  	OpTypeDiv:    DIV,
    52  	OpTypeMod:    MOD,
    53  	OpTypePow:    POW,
    54  	// comparison binops
    55  	OpTypeCmpEQ: CMP_EQ,
    56  	OpTypeGT:    GT,
    57  	OpTypeGTE:   GTE,
    58  	OpTypeLT:    LT,
    59  	OpTypeLTE:   LTE,
    60  
    61  	// parsers
    62  	OpParserTypeJSON:    JSON,
    63  	OpParserTypeRegexp:  REGEXP,
    64  	OpParserTypeLogfmt:  LOGFMT,
    65  	OpParserTypeUnpack:  UNPACK,
    66  	OpParserTypePattern: PATTERN,
    67  
    68  	// fmt
    69  	OpFmtLabel: LABEL_FMT,
    70  	OpFmtLine:  LINE_FMT,
    71  
    72  	// filter functions
    73  	OpFilterIP: IP,
    74  }
    75  
    76  // functionTokens are tokens that needs to be suffixes with parenthesis
    77  var functionTokens = map[string]int{
    78  	// range vec ops
    79  	OpRangeTypeRate:        RATE,
    80  	OpRangeTypeRateCounter: RATE_COUNTER,
    81  	OpRangeTypeCount:       COUNT_OVER_TIME,
    82  	OpRangeTypeBytesRate:   BYTES_RATE,
    83  	OpRangeTypeBytes:       BYTES_OVER_TIME,
    84  	OpRangeTypeAvg:         AVG_OVER_TIME,
    85  	OpRangeTypeSum:         SUM_OVER_TIME,
    86  	OpRangeTypeMin:         MIN_OVER_TIME,
    87  	OpRangeTypeMax:         MAX_OVER_TIME,
    88  	OpRangeTypeStdvar:      STDVAR_OVER_TIME,
    89  	OpRangeTypeStddev:      STDDEV_OVER_TIME,
    90  	OpRangeTypeQuantile:    QUANTILE_OVER_TIME,
    91  	OpRangeTypeFirst:       FIRST_OVER_TIME,
    92  	OpRangeTypeLast:        LAST_OVER_TIME,
    93  	OpRangeTypeAbsent:      ABSENT_OVER_TIME,
    94  
    95  	// vec ops
    96  	OpTypeSum:      SUM,
    97  	OpTypeAvg:      AVG,
    98  	OpTypeMax:      MAX,
    99  	OpTypeMin:      MIN,
   100  	OpTypeCount:    COUNT,
   101  	OpTypeStddev:   STDDEV,
   102  	OpTypeStdvar:   STDVAR,
   103  	OpTypeBottomK:  BOTTOMK,
   104  	OpTypeTopK:     TOPK,
   105  	OpLabelReplace: LABEL_REPLACE,
   106  
   107  	// conversion Op
   108  	OpConvBytes:           BYTES_CONV,
   109  	OpConvDuration:        DURATION_CONV,
   110  	OpConvDurationSeconds: DURATION_SECONDS_CONV,
   111  
   112  	// filterOp
   113  	OpFilterIP: IP,
   114  }
   115  
   116  type lexer struct {
   117  	scanner.Scanner
   118  	errs    []logqlmodel.ParseError
   119  	builder strings.Builder
   120  }
   121  
   122  func (l *lexer) Lex(lval *exprSymType) int {
   123  	r := l.Scan()
   124  
   125  	switch r {
   126  	case '#':
   127  		// Scan until a newline or EOF is encountered
   128  		for next := l.Peek(); !(next == '\n' || next == scanner.EOF); next = l.Next() {
   129  		}
   130  
   131  		return l.Lex(lval)
   132  
   133  	case scanner.EOF:
   134  		return 0
   135  
   136  	case scanner.Int, scanner.Float:
   137  		numberText := l.TokenText()
   138  
   139  		duration, ok := tryScanDuration(numberText, &l.Scanner)
   140  		if ok {
   141  			lval.duration = duration
   142  			return DURATION
   143  		}
   144  
   145  		bytes, ok := tryScanBytes(numberText, &l.Scanner)
   146  		if ok {
   147  			lval.bytes = bytes
   148  			return BYTES
   149  		}
   150  
   151  		lval.str = numberText
   152  		return NUMBER
   153  
   154  	case scanner.String, scanner.RawString:
   155  		var err error
   156  		tokenText := l.TokenText()
   157  		if !utf8.ValidString(tokenText) {
   158  			l.Error("invalid UTF-8 rune")
   159  			return 0
   160  		}
   161  		lval.str, err = strutil.Unquote(tokenText)
   162  		if err != nil {
   163  			l.Error(err.Error())
   164  			return 0
   165  		}
   166  		return STRING
   167  	}
   168  
   169  	// scanning duration tokens
   170  	if r == '[' {
   171  		l.builder.Reset()
   172  		for r := l.Next(); r != scanner.EOF; r = l.Next() {
   173  			if r == ']' {
   174  				i, err := model.ParseDuration(l.builder.String())
   175  				if err != nil {
   176  					l.Error(err.Error())
   177  					return 0
   178  				}
   179  				lval.duration = time.Duration(i)
   180  				return RANGE
   181  			}
   182  			_, _ = l.builder.WriteRune(r)
   183  		}
   184  		l.Error("missing closing ']' in duration")
   185  		return 0
   186  	}
   187  
   188  	tokenText := l.TokenText()
   189  	tokenNext := tokenText + string(l.Peek())
   190  	if tok, ok := functionTokens[tokenNext]; ok {
   191  		// create a copy to advance to the entire token for testing suffix
   192  		sc := l.Scanner
   193  		sc.Next()
   194  		if isFunction(sc) {
   195  			l.Next()
   196  			return tok
   197  		}
   198  	}
   199  
   200  	if tok, ok := functionTokens[tokenText]; ok {
   201  		if !isFunction(l.Scanner) {
   202  			lval.str = tokenText
   203  			return IDENTIFIER
   204  		}
   205  		return tok
   206  	}
   207  
   208  	if tok, ok := tokens[tokenNext]; ok {
   209  		l.Next()
   210  		return tok
   211  	}
   212  
   213  	if tok, ok := tokens[tokenText]; ok {
   214  		return tok
   215  	}
   216  
   217  	lval.str = tokenText
   218  	return IDENTIFIER
   219  }
   220  
   221  func (l *lexer) Error(msg string) {
   222  	l.errs = append(l.errs, logqlmodel.NewParseError(msg, l.Line, l.Column))
   223  }
   224  
   225  func tryScanDuration(number string, l *scanner.Scanner) (time.Duration, bool) {
   226  	var sb strings.Builder
   227  	sb.WriteString(number)
   228  	// copy the scanner to avoid advancing it in case it's not a duration.
   229  	s := *l
   230  	consumed := 0
   231  	for r := s.Peek(); r != scanner.EOF && !unicode.IsSpace(r); r = s.Peek() {
   232  		if !unicode.IsNumber(r) && !isDurationRune(r) && r != '.' {
   233  			break
   234  		}
   235  		_, _ = sb.WriteRune(r)
   236  		_ = s.Next()
   237  		consumed++
   238  	}
   239  
   240  	if consumed == 0 {
   241  		return 0, false
   242  	}
   243  	// we've found more characters before a whitespace or the end
   244  	durationString := sb.String()
   245  	duration, err := parseDuration(durationString)
   246  	if err != nil {
   247  		return 0, false
   248  	}
   249  
   250  	// we need to consume the scanner, now that we know this is a duration.
   251  	for i := 0; i < consumed; i++ {
   252  		_ = l.Next()
   253  	}
   254  
   255  	return duration, true
   256  }
   257  
   258  func parseDuration(d string) (time.Duration, error) {
   259  	var duration time.Duration
   260  	// Try to parse promql style durations first, to ensure that we support the same duration
   261  	// units as promql
   262  	prometheusDuration, err := model.ParseDuration(d)
   263  	if err != nil {
   264  		// Fall back to standard library's time.ParseDuration if a promql style
   265  		// duration couldn't be parsed.
   266  		duration, err = time.ParseDuration(d)
   267  		if err != nil {
   268  			return 0, err
   269  		}
   270  	} else {
   271  		duration = time.Duration(prometheusDuration)
   272  	}
   273  
   274  	return duration, nil
   275  }
   276  
   277  func isDurationRune(r rune) bool {
   278  	// "ns", "us" (or "µs"), "ms", "s", "m", "h", "d", "w", "y".
   279  	switch r {
   280  	case 'n', 'u', 'µ', 'm', 's', 'h', 'd', 'w', 'y':
   281  		return true
   282  	default:
   283  		return false
   284  	}
   285  }
   286  
   287  func tryScanBytes(number string, l *scanner.Scanner) (uint64, bool) {
   288  	var sb strings.Builder
   289  	sb.WriteString(number)
   290  	// copy the scanner to avoid advancing it in case it's not a duration.
   291  	s := *l
   292  	consumed := 0
   293  	for r := s.Peek(); r != scanner.EOF && !unicode.IsSpace(r); r = s.Peek() {
   294  		if !unicode.IsNumber(r) && !isBytesSizeRune(r) && r != '.' {
   295  			break
   296  		}
   297  		_, _ = sb.WriteRune(r)
   298  		_ = s.Next()
   299  		consumed++
   300  	}
   301  
   302  	if consumed == 0 {
   303  		return 0, false
   304  	}
   305  	// we've found more characters before a whitespace or the end
   306  	b, err := humanize.ParseBytes(sb.String())
   307  	if err != nil {
   308  		return 0, false
   309  	}
   310  	// we need to consume the scanner, now that we know this is a duration.
   311  	for i := 0; i < consumed; i++ {
   312  		_ = l.Next()
   313  	}
   314  	return b, true
   315  }
   316  
   317  func isBytesSizeRune(r rune) bool {
   318  	// Accept: B, kB, MB, GB, TB, PB, KB, KiB, MiB, GiB, TiB, PiB
   319  	// Do not accept: EB, ZB, YB, PiB, ZiB and YiB. They are not supported since the value migh not be represented in an uint64
   320  	switch r {
   321  	case 'B', 'i', 'k', 'K', 'M', 'G', 'T', 'P':
   322  		return true
   323  	default:
   324  		return false
   325  	}
   326  }
   327  
   328  // isFunction check if the next runes are either an open parenthesis
   329  // or by/without tokens. This allows to dissociate functions and identifier correctly.
   330  func isFunction(sc scanner.Scanner) bool {
   331  	var sb strings.Builder
   332  	sc = trimSpace(sc)
   333  	for r := sc.Next(); r != scanner.EOF; r = sc.Next() {
   334  		sb.WriteRune(r)
   335  		switch sb.String() {
   336  		case "(":
   337  			return true
   338  		case "by", "without":
   339  			sc = trimSpace(sc)
   340  			return sc.Next() == '('
   341  		}
   342  	}
   343  	return false
   344  }
   345  
   346  func trimSpace(l scanner.Scanner) scanner.Scanner {
   347  	for n := l.Peek(); n != scanner.EOF; n = l.Peek() {
   348  		if unicode.IsSpace(n) {
   349  			l.Next()
   350  			continue
   351  		}
   352  		return l
   353  	}
   354  	return l
   355  }