github.com/alibaba/ilogtail/pkg@v0.0.0-20250526110833-c53b480d046c/fmtstr/formatstring.go (about)

     1  // Licensed to Elasticsearch B.V. under one or more contributor
     2  // license agreements. See the NOTICE file distributed with
     3  // this work for additional information regarding copyright
     4  // ownership. Elasticsearch B.V. licenses this file to you under
     5  // the Apache License, Version 2.0 (the "License"); you may
     6  // not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing,
    12  // software distributed under the License is distributed on an
    13  // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    14  // KIND, either express or implied.  See the License for the
    15  // specific language governing permissions and limitations
    16  // under the License.
    17  
    18  package fmtstr
    19  
    20  import (
    21  	"bytes"
    22  	"errors"
    23  	"fmt"
    24  	"strings"
    25  )
    26  
    27  type FormatEvaler interface {
    28  	// Eval will execute the format and writes the results into
    29  	// the provided output buffer. Returns error on failure.
    30  	Eval(ctx interface{}, out *bytes.Buffer) error
    31  }
    32  
    33  // StringFormatter interface extends FormatEvaler adding support for querying
    34  // formatter meta data.
    35  type StringFormatter interface {
    36  	FormatEvaler
    37  
    38  	// Run execute the formatter returning the generated string.
    39  	Run(ctx interface{}) (string, error)
    40  
    41  	// IsConst returns true, if execution of formatter will always return the
    42  	// same constant string.
    43  	IsConst() bool
    44  }
    45  
    46  // VariableOp defines one expansion variable, including operator and parameter.
    47  // variable operations are always introduced by a colon ':'.
    48  // For example the format string %{x:p1:?p2} has 2 variable operations
    49  // (":", "p1") and (":?", "p2"). It's up to concrete format string implementation
    50  // to compile and interpret variable ops.
    51  type VariableOp struct {
    52  	op    string
    53  	param string
    54  }
    55  
    56  type constStringFormatter struct {
    57  	s string
    58  }
    59  
    60  type execStringFormatter struct {
    61  	evalers []FormatEvaler
    62  }
    63  
    64  type formatElement interface {
    65  	compile(ctx *compileCtx) (FormatEvaler, error)
    66  }
    67  
    68  type compileCtx struct {
    69  	compileVariable VariableCompiler
    70  }
    71  
    72  // VariableCompiler is used to compile a variable expansion into
    73  // an FormatEvaler to be used with the format-string.
    74  type VariableCompiler func(string, []VariableOp) (FormatEvaler, error)
    75  
    76  // StringElement implements StringFormatter always returning a constant string.
    77  type StringElement struct {
    78  	S string
    79  }
    80  
    81  type variableElement struct {
    82  	field string
    83  	ops   []VariableOp
    84  }
    85  
    86  type token struct {
    87  	typ tokenType
    88  	val string
    89  }
    90  
    91  type tokenType uint16
    92  
    93  type lexer chan token
    94  
    95  const (
    96  	tokErr tokenType = iota + 1
    97  	tokString
    98  	tokOpen
    99  	tokClose
   100  	tokOperator
   101  )
   102  
   103  var (
   104  	openToken  = token{tokOpen, "%{"}
   105  	closeToken = token{tokClose, "}"}
   106  )
   107  
   108  var (
   109  	errNestedVar          = errors.New("format string variables can not be nested")
   110  	errUnexpectedOperator = errors.New("unexpected formatter operator")
   111  	errMissingClose       = errors.New("missing closing '}'")
   112  	errEmptyFormat        = errors.New("empty format expansion")
   113  	errParamsOpsMismatch  = errors.New("more parameters then ops parsed")
   114  )
   115  
   116  // Compile compiles an input format string into a StringFormatter. The variable
   117  // compiler `vc` is invoked for every variable expansion found in the input format
   118  // string. Returns error on parse failure or if variable compiler fails.
   119  //
   120  // Variable expansion are enclosed in expansion braces `%{<expansion>}`.
   121  // The `<expansion>` can contain additional parameters separated by ops
   122  // introduced by colons ':'. For example the format string `%{value:v1:?v2}`
   123  // will be parsed into variable expansion on `value` with variable ops
   124  // `[(":", "v1"), (":?", "v2")]`. It's up to the variable compiler to interpret
   125  // content and variable ops.
   126  //
   127  // The back-slash character `\` acts as escape character.
   128  func Compile(in string, vc VariableCompiler) (StringFormatter, error) {
   129  	ctx := &compileCtx{vc}
   130  	return compile(ctx, in)
   131  }
   132  
   133  func CompileKeys(in string) ([]string, error) {
   134  	keys := make([]string, 0)
   135  	lexerChan := makeLexer(in)
   136  	defer lexerChan.Finish()
   137  	// parse format string
   138  	elements, err := parse(lexerChan)
   139  	if err != nil {
   140  		return nil, err
   141  	}
   142  	for i := range elements {
   143  		var ele = elements[i]
   144  		if s, ok := ele.(variableElement); ok {
   145  			keys = append(keys, s.field)
   146  		}
   147  	}
   148  	return keys, err
   149  }
   150  
   151  func compile(ctx *compileCtx, in string) (StringFormatter, error) {
   152  	lexerChan := makeLexer(in)
   153  	defer lexerChan.Finish()
   154  
   155  	// parse format string
   156  	elements, err := parse(lexerChan)
   157  	if err != nil {
   158  		return nil, err
   159  	}
   160  
   161  	// compile elements into evaluators
   162  	evalers := make([]FormatEvaler, len(elements))
   163  	for i := range elements {
   164  		evalers[i], err = elements[i].compile(ctx)
   165  		if err != nil {
   166  			return nil, err
   167  		}
   168  	}
   169  	evalers = optimize(evalers)
   170  
   171  	// try to create constant formatter for constant string
   172  	if len(evalers) == 1 {
   173  		if se, ok := evalers[0].(StringElement); ok {
   174  			return constStringFormatter{se.S}, nil
   175  		}
   176  	}
   177  
   178  	// create executable string formatter
   179  	fmt := execStringFormatter{
   180  		evalers: evalers,
   181  	}
   182  	return fmt, nil
   183  }
   184  
   185  // optimize optimizes the sequence of evaluators by combining consecutive
   186  // StringElement instances into one StringElement
   187  func optimize(in []FormatEvaler) []FormatEvaler {
   188  	out := in[:0]
   189  
   190  	var active StringElement
   191  	isActive := false
   192  
   193  	for _, evaler := range in {
   194  		se, isString := evaler.(StringElement)
   195  		if !isString {
   196  			if isActive {
   197  				out = append(out, active)
   198  				isActive = false
   199  			}
   200  			out = append(out, evaler)
   201  			continue
   202  		}
   203  
   204  		if !isActive {
   205  			active = se
   206  			isActive = true
   207  			continue
   208  		}
   209  		active.S += se.S
   210  	}
   211  
   212  	if isActive {
   213  		out = append(out, active)
   214  	}
   215  
   216  	return out
   217  }
   218  
   219  func (f constStringFormatter) Eval(_ interface{}, out *bytes.Buffer) error {
   220  	_, err := out.WriteString(f.s)
   221  	return err
   222  }
   223  
   224  func (f constStringFormatter) Run(_ interface{}) (string, error) {
   225  	return f.s, nil
   226  }
   227  
   228  func (f constStringFormatter) IsConst() bool {
   229  	return true
   230  }
   231  
   232  func (f execStringFormatter) Eval(ctx interface{}, out *bytes.Buffer) error {
   233  	for _, evaler := range f.evalers {
   234  		if err := evaler.Eval(ctx, out); err != nil {
   235  			return err
   236  		}
   237  	}
   238  	return nil
   239  }
   240  
   241  func (f execStringFormatter) Run(ctx interface{}) (string, error) {
   242  	buf := bytes.NewBuffer(nil)
   243  	if err := f.Eval(ctx, buf); err != nil {
   244  		return "", err
   245  	}
   246  	return buf.String(), nil
   247  }
   248  
   249  func (f execStringFormatter) IsConst() bool {
   250  	return false
   251  }
   252  
   253  func (e StringElement) compile(ctx *compileCtx) (FormatEvaler, error) {
   254  	return e, nil
   255  }
   256  
   257  // Eval write the string elements constant string value into
   258  // output buffer.
   259  func (e StringElement) Eval(_ interface{}, out *bytes.Buffer) error {
   260  	_, err := out.WriteString(e.S)
   261  	return err
   262  }
   263  
   264  func makeVariableElement(f string, ops, params []string) (variableElement, error) {
   265  	if len(params) > len(ops) {
   266  		return variableElement{}, errParamsOpsMismatch
   267  	}
   268  
   269  	out := make([]VariableOp, len(ops))
   270  	for i := range params {
   271  		out[i] = VariableOp{op: ops[i], param: params[i]}
   272  	}
   273  	if len(ops) > len(params) {
   274  		i := len(ops) - 1
   275  		out[i] = VariableOp{op: ops[i]}
   276  	}
   277  
   278  	return variableElement{field: f, ops: out}, nil
   279  }
   280  
   281  func (e variableElement) compile(ctx *compileCtx) (FormatEvaler, error) {
   282  	return ctx.compileVariable(e.field, e.ops)
   283  }
   284  
   285  func parse(lex lexer) ([]formatElement, error) {
   286  	var elems []formatElement
   287  
   288  	for token := range lex.Tokens() {
   289  		switch token.typ {
   290  		case tokErr:
   291  			return nil, errors.New(token.val)
   292  
   293  		case tokString:
   294  			elems = append(elems, StringElement{token.val})
   295  
   296  		case tokOpen:
   297  			elem, err := parseVariable(lex)
   298  			if err != nil {
   299  				return nil, err
   300  			}
   301  			elems = append(elems, elem)
   302  
   303  		case tokClose, tokOperator:
   304  			// should not happen, but let's return error just in case
   305  			return nil, fmt.Errorf("Token '%v'(%v) not allowed", token.val, token.typ)
   306  		}
   307  	}
   308  
   309  	return elems, nil
   310  }
   311  
   312  func parseVariable(lex lexer) (formatElement, error) {
   313  	var strings []string
   314  	var ops []string
   315  
   316  	for token := range lex.Tokens() {
   317  		switch token.typ {
   318  		case tokErr:
   319  			return nil, errors.New(token.val)
   320  
   321  		case tokOpen:
   322  			return nil, errNestedVar
   323  
   324  		case tokClose:
   325  			if len(strings) == 0 {
   326  				return nil, errEmptyFormat
   327  			}
   328  			return makeVariableElement(strings[0], ops, strings[1:])
   329  
   330  		case tokString:
   331  			if len(strings) != len(ops) {
   332  				return nil, fmt.Errorf("Unexpected string token %v, expected operator", token.val)
   333  			}
   334  			strings = append(strings, token.val)
   335  
   336  		case tokOperator:
   337  			if len(strings) == 0 {
   338  				return nil, errUnexpectedOperator
   339  			}
   340  			ops = append(ops, token.val)
   341  			if len(ops) > len(strings) {
   342  				return nil, fmt.Errorf("Consecutive operator tokens '%v'", token.val)
   343  			}
   344  
   345  		default:
   346  			return nil, fmt.Errorf("Unexpected token '%v' (%v)", token.val, token.typ)
   347  		}
   348  	}
   349  
   350  	return nil, errMissingClose
   351  }
   352  
   353  func makeLexer(in string) lexer {
   354  	lex := make(chan token, 1)
   355  
   356  	go func() {
   357  		off := 0
   358  		content := in
   359  
   360  		defer func() {
   361  			if len(content) > 0 {
   362  				lex <- token{tokString, content}
   363  			}
   364  			close(lex)
   365  		}()
   366  
   367  		strToken := func(s string) {
   368  			if s != "" {
   369  				lex <- token{tokString, s}
   370  			}
   371  		}
   372  
   373  		opToken := func(op string) token {
   374  			return token{tokOperator, op}
   375  		}
   376  
   377  		varcount := 0
   378  		for len(content) > 0 {
   379  			var idx int
   380  			if varcount == 0 {
   381  				idx = strings.IndexAny(content[off:], `%\`)
   382  			} else {
   383  				idx = strings.IndexAny(content[off:], `%:}\`)
   384  			}
   385  
   386  			if idx == -1 {
   387  				return
   388  			}
   389  
   390  			idx += off
   391  			off = idx + 1
   392  
   393  			switch content[idx] {
   394  			case '\\': // escape next character
   395  				content = content[:idx] + content[off:]
   396  				continue
   397  
   398  			case ':':
   399  				if len(content) <= off { // found ':' at end of string
   400  					return
   401  				}
   402  
   403  				strToken(content[:idx])
   404  				op := ":"
   405  				if strings.ContainsRune("!@#&*=+<>?", rune(content[off])) {
   406  					off++
   407  					op = content[idx : off+1]
   408  				}
   409  				lex <- opToken(op)
   410  
   411  			case '}':
   412  				strToken(content[:idx])
   413  				lex <- closeToken
   414  				varcount--
   415  
   416  			case '%':
   417  				if len(content) <= off { // found '%' at end of string
   418  					return
   419  				}
   420  
   421  				if content[off] != '{' {
   422  					continue // no variable expression
   423  				}
   424  
   425  				strToken(content[:idx])
   426  				lex <- openToken
   427  				off++
   428  				varcount++
   429  			}
   430  
   431  			content = content[off:]
   432  			off = 0
   433  		}
   434  
   435  	}()
   436  
   437  	return lex
   438  }
   439  
   440  func (l lexer) Tokens() <-chan token {
   441  	return l
   442  }
   443  
   444  func (l lexer) Finish() {
   445  	for range l.Tokens() {
   446  	}
   447  }