github.com/observiq/carbon@v0.9.11-0.20200820160507-1b872e368a5e/operator/builtin/parser/regex.go (about) 1 package parser 2 3 import ( 4 "context" 5 "fmt" 6 "regexp" 7 8 "github.com/observiq/carbon/entry" 9 "github.com/observiq/carbon/errors" 10 "github.com/observiq/carbon/operator" 11 "github.com/observiq/carbon/operator/helper" 12 ) 13 14 func init() { 15 operator.Register("regex_parser", func() operator.Builder { return NewRegexParserConfig("") }) 16 } 17 18 func NewRegexParserConfig(operatorID string) *RegexParserConfig { 19 return &RegexParserConfig{ 20 ParserConfig: helper.NewParserConfig(operatorID, "regex_parser"), 21 } 22 } 23 24 // RegexParserConfig is the configuration of a regex parser operator. 25 type RegexParserConfig struct { 26 helper.ParserConfig `yaml:",inline"` 27 28 Regex string `json:"regex" yaml:"regex"` 29 } 30 31 // Build will build a regex parser operator. 32 func (c RegexParserConfig) Build(context operator.BuildContext) (operator.Operator, error) { 33 parserOperator, err := c.ParserConfig.Build(context) 34 if err != nil { 35 return nil, err 36 } 37 38 if c.Regex == "" { 39 return nil, fmt.Errorf("missing required field 'regex'") 40 } 41 42 r, err := regexp.Compile(c.Regex) 43 if err != nil { 44 return nil, fmt.Errorf("compiling regex: %s", err) 45 } 46 47 namedCaptureGroups := 0 48 for _, groupName := range r.SubexpNames() { 49 if groupName != "" { 50 namedCaptureGroups++ 51 } 52 } 53 if namedCaptureGroups == 0 { 54 return nil, errors.NewError( 55 "no named capture groups in regex pattern", 56 "use named capture groups like '^(?P<my_key>.*)$' to specify the key name for the parsed field", 57 ) 58 } 59 60 regexParser := &RegexParser{ 61 ParserOperator: parserOperator, 62 regexp: r, 63 } 64 65 return regexParser, nil 66 } 67 68 // RegexParser is an operator that parses regex in an entry. 69 type RegexParser struct { 70 helper.ParserOperator 71 regexp *regexp.Regexp 72 } 73 74 // Process will parse an entry for regex. 75 func (r *RegexParser) Process(ctx context.Context, entry *entry.Entry) error { 76 return r.ParserOperator.ProcessWith(ctx, entry, r.parse) 77 } 78 79 // parse will parse a value using the supplied regex. 80 func (r *RegexParser) parse(value interface{}) (interface{}, error) { 81 var matches []string 82 switch m := value.(type) { 83 case string: 84 matches = r.regexp.FindStringSubmatch(m) 85 if matches == nil { 86 return nil, fmt.Errorf("regex pattern does not match") 87 } 88 case []byte: 89 byteMatches := r.regexp.FindSubmatch(m) 90 if byteMatches == nil { 91 return nil, fmt.Errorf("regex pattern does not match") 92 } 93 94 matches = make([]string, len(byteMatches)) 95 for i, byteSlice := range byteMatches { 96 matches[i] = string(byteSlice) 97 } 98 default: 99 return nil, fmt.Errorf("type '%T' cannot be parsed as regex", value) 100 } 101 102 parsedValues := map[string]interface{}{} 103 for i, subexp := range r.regexp.SubexpNames() { 104 if i == 0 { 105 // Skip whole match 106 continue 107 } 108 if subexp != "" { 109 parsedValues[subexp] = matches[i] 110 } 111 } 112 113 return parsedValues, nil 114 }