github.com/alibaba/ilogtail/pkg@v0.0.0-20250526110833-c53b480d046c/fmtstr/formatstring.go (about) 1 // Licensed to Elasticsearch B.V. under one or more contributor 2 // license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright 4 // ownership. Elasticsearch B.V. licenses this file to you under 5 // the Apache License, Version 2.0 (the "License"); you may 6 // not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, 12 // software distributed under the License is distributed on an 13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 // KIND, either express or implied. See the License for the 15 // specific language governing permissions and limitations 16 // under the License. 17 18 package fmtstr 19 20 import ( 21 "bytes" 22 "errors" 23 "fmt" 24 "strings" 25 ) 26 27 type FormatEvaler interface { 28 // Eval will execute the format and writes the results into 29 // the provided output buffer. Returns error on failure. 30 Eval(ctx interface{}, out *bytes.Buffer) error 31 } 32 33 // StringFormatter interface extends FormatEvaler adding support for querying 34 // formatter meta data. 35 type StringFormatter interface { 36 FormatEvaler 37 38 // Run execute the formatter returning the generated string. 39 Run(ctx interface{}) (string, error) 40 41 // IsConst returns true, if execution of formatter will always return the 42 // same constant string. 43 IsConst() bool 44 } 45 46 // VariableOp defines one expansion variable, including operator and parameter. 47 // variable operations are always introduced by a colon ':'. 48 // For example the format string %{x:p1:?p2} has 2 variable operations 49 // (":", "p1") and (":?", "p2"). It's up to concrete format string implementation 50 // to compile and interpret variable ops. 51 type VariableOp struct { 52 op string 53 param string 54 } 55 56 type constStringFormatter struct { 57 s string 58 } 59 60 type execStringFormatter struct { 61 evalers []FormatEvaler 62 } 63 64 type formatElement interface { 65 compile(ctx *compileCtx) (FormatEvaler, error) 66 } 67 68 type compileCtx struct { 69 compileVariable VariableCompiler 70 } 71 72 // VariableCompiler is used to compile a variable expansion into 73 // an FormatEvaler to be used with the format-string. 74 type VariableCompiler func(string, []VariableOp) (FormatEvaler, error) 75 76 // StringElement implements StringFormatter always returning a constant string. 77 type StringElement struct { 78 S string 79 } 80 81 type variableElement struct { 82 field string 83 ops []VariableOp 84 } 85 86 type token struct { 87 typ tokenType 88 val string 89 } 90 91 type tokenType uint16 92 93 type lexer chan token 94 95 const ( 96 tokErr tokenType = iota + 1 97 tokString 98 tokOpen 99 tokClose 100 tokOperator 101 ) 102 103 var ( 104 openToken = token{tokOpen, "%{"} 105 closeToken = token{tokClose, "}"} 106 ) 107 108 var ( 109 errNestedVar = errors.New("format string variables can not be nested") 110 errUnexpectedOperator = errors.New("unexpected formatter operator") 111 errMissingClose = errors.New("missing closing '}'") 112 errEmptyFormat = errors.New("empty format expansion") 113 errParamsOpsMismatch = errors.New("more parameters then ops parsed") 114 ) 115 116 // Compile compiles an input format string into a StringFormatter. The variable 117 // compiler `vc` is invoked for every variable expansion found in the input format 118 // string. Returns error on parse failure or if variable compiler fails. 119 // 120 // Variable expansion are enclosed in expansion braces `%{<expansion>}`. 121 // The `<expansion>` can contain additional parameters separated by ops 122 // introduced by colons ':'. For example the format string `%{value:v1:?v2}` 123 // will be parsed into variable expansion on `value` with variable ops 124 // `[(":", "v1"), (":?", "v2")]`. It's up to the variable compiler to interpret 125 // content and variable ops. 126 // 127 // The back-slash character `\` acts as escape character. 128 func Compile(in string, vc VariableCompiler) (StringFormatter, error) { 129 ctx := &compileCtx{vc} 130 return compile(ctx, in) 131 } 132 133 func CompileKeys(in string) ([]string, error) { 134 keys := make([]string, 0) 135 lexerChan := makeLexer(in) 136 defer lexerChan.Finish() 137 // parse format string 138 elements, err := parse(lexerChan) 139 if err != nil { 140 return nil, err 141 } 142 for i := range elements { 143 var ele = elements[i] 144 if s, ok := ele.(variableElement); ok { 145 keys = append(keys, s.field) 146 } 147 } 148 return keys, err 149 } 150 151 func compile(ctx *compileCtx, in string) (StringFormatter, error) { 152 lexerChan := makeLexer(in) 153 defer lexerChan.Finish() 154 155 // parse format string 156 elements, err := parse(lexerChan) 157 if err != nil { 158 return nil, err 159 } 160 161 // compile elements into evaluators 162 evalers := make([]FormatEvaler, len(elements)) 163 for i := range elements { 164 evalers[i], err = elements[i].compile(ctx) 165 if err != nil { 166 return nil, err 167 } 168 } 169 evalers = optimize(evalers) 170 171 // try to create constant formatter for constant string 172 if len(evalers) == 1 { 173 if se, ok := evalers[0].(StringElement); ok { 174 return constStringFormatter{se.S}, nil 175 } 176 } 177 178 // create executable string formatter 179 fmt := execStringFormatter{ 180 evalers: evalers, 181 } 182 return fmt, nil 183 } 184 185 // optimize optimizes the sequence of evaluators by combining consecutive 186 // StringElement instances into one StringElement 187 func optimize(in []FormatEvaler) []FormatEvaler { 188 out := in[:0] 189 190 var active StringElement 191 isActive := false 192 193 for _, evaler := range in { 194 se, isString := evaler.(StringElement) 195 if !isString { 196 if isActive { 197 out = append(out, active) 198 isActive = false 199 } 200 out = append(out, evaler) 201 continue 202 } 203 204 if !isActive { 205 active = se 206 isActive = true 207 continue 208 } 209 active.S += se.S 210 } 211 212 if isActive { 213 out = append(out, active) 214 } 215 216 return out 217 } 218 219 func (f constStringFormatter) Eval(_ interface{}, out *bytes.Buffer) error { 220 _, err := out.WriteString(f.s) 221 return err 222 } 223 224 func (f constStringFormatter) Run(_ interface{}) (string, error) { 225 return f.s, nil 226 } 227 228 func (f constStringFormatter) IsConst() bool { 229 return true 230 } 231 232 func (f execStringFormatter) Eval(ctx interface{}, out *bytes.Buffer) error { 233 for _, evaler := range f.evalers { 234 if err := evaler.Eval(ctx, out); err != nil { 235 return err 236 } 237 } 238 return nil 239 } 240 241 func (f execStringFormatter) Run(ctx interface{}) (string, error) { 242 buf := bytes.NewBuffer(nil) 243 if err := f.Eval(ctx, buf); err != nil { 244 return "", err 245 } 246 return buf.String(), nil 247 } 248 249 func (f execStringFormatter) IsConst() bool { 250 return false 251 } 252 253 func (e StringElement) compile(ctx *compileCtx) (FormatEvaler, error) { 254 return e, nil 255 } 256 257 // Eval write the string elements constant string value into 258 // output buffer. 259 func (e StringElement) Eval(_ interface{}, out *bytes.Buffer) error { 260 _, err := out.WriteString(e.S) 261 return err 262 } 263 264 func makeVariableElement(f string, ops, params []string) (variableElement, error) { 265 if len(params) > len(ops) { 266 return variableElement{}, errParamsOpsMismatch 267 } 268 269 out := make([]VariableOp, len(ops)) 270 for i := range params { 271 out[i] = VariableOp{op: ops[i], param: params[i]} 272 } 273 if len(ops) > len(params) { 274 i := len(ops) - 1 275 out[i] = VariableOp{op: ops[i]} 276 } 277 278 return variableElement{field: f, ops: out}, nil 279 } 280 281 func (e variableElement) compile(ctx *compileCtx) (FormatEvaler, error) { 282 return ctx.compileVariable(e.field, e.ops) 283 } 284 285 func parse(lex lexer) ([]formatElement, error) { 286 var elems []formatElement 287 288 for token := range lex.Tokens() { 289 switch token.typ { 290 case tokErr: 291 return nil, errors.New(token.val) 292 293 case tokString: 294 elems = append(elems, StringElement{token.val}) 295 296 case tokOpen: 297 elem, err := parseVariable(lex) 298 if err != nil { 299 return nil, err 300 } 301 elems = append(elems, elem) 302 303 case tokClose, tokOperator: 304 // should not happen, but let's return error just in case 305 return nil, fmt.Errorf("Token '%v'(%v) not allowed", token.val, token.typ) 306 } 307 } 308 309 return elems, nil 310 } 311 312 func parseVariable(lex lexer) (formatElement, error) { 313 var strings []string 314 var ops []string 315 316 for token := range lex.Tokens() { 317 switch token.typ { 318 case tokErr: 319 return nil, errors.New(token.val) 320 321 case tokOpen: 322 return nil, errNestedVar 323 324 case tokClose: 325 if len(strings) == 0 { 326 return nil, errEmptyFormat 327 } 328 return makeVariableElement(strings[0], ops, strings[1:]) 329 330 case tokString: 331 if len(strings) != len(ops) { 332 return nil, fmt.Errorf("Unexpected string token %v, expected operator", token.val) 333 } 334 strings = append(strings, token.val) 335 336 case tokOperator: 337 if len(strings) == 0 { 338 return nil, errUnexpectedOperator 339 } 340 ops = append(ops, token.val) 341 if len(ops) > len(strings) { 342 return nil, fmt.Errorf("Consecutive operator tokens '%v'", token.val) 343 } 344 345 default: 346 return nil, fmt.Errorf("Unexpected token '%v' (%v)", token.val, token.typ) 347 } 348 } 349 350 return nil, errMissingClose 351 } 352 353 func makeLexer(in string) lexer { 354 lex := make(chan token, 1) 355 356 go func() { 357 off := 0 358 content := in 359 360 defer func() { 361 if len(content) > 0 { 362 lex <- token{tokString, content} 363 } 364 close(lex) 365 }() 366 367 strToken := func(s string) { 368 if s != "" { 369 lex <- token{tokString, s} 370 } 371 } 372 373 opToken := func(op string) token { 374 return token{tokOperator, op} 375 } 376 377 varcount := 0 378 for len(content) > 0 { 379 var idx int 380 if varcount == 0 { 381 idx = strings.IndexAny(content[off:], `%\`) 382 } else { 383 idx = strings.IndexAny(content[off:], `%:}\`) 384 } 385 386 if idx == -1 { 387 return 388 } 389 390 idx += off 391 off = idx + 1 392 393 switch content[idx] { 394 case '\\': // escape next character 395 content = content[:idx] + content[off:] 396 continue 397 398 case ':': 399 if len(content) <= off { // found ':' at end of string 400 return 401 } 402 403 strToken(content[:idx]) 404 op := ":" 405 if strings.ContainsRune("!@#&*=+<>?", rune(content[off])) { 406 off++ 407 op = content[idx : off+1] 408 } 409 lex <- opToken(op) 410 411 case '}': 412 strToken(content[:idx]) 413 lex <- closeToken 414 varcount-- 415 416 case '%': 417 if len(content) <= off { // found '%' at end of string 418 return 419 } 420 421 if content[off] != '{' { 422 continue // no variable expression 423 } 424 425 strToken(content[:idx]) 426 lex <- openToken 427 off++ 428 varcount++ 429 } 430 431 content = content[off:] 432 off = 0 433 } 434 435 }() 436 437 return lex 438 } 439 440 func (l lexer) Tokens() <-chan token { 441 return l 442 } 443 444 func (l lexer) Finish() { 445 for range l.Tokens() { 446 } 447 }