github.com/observiq/carbon@v0.9.11-0.20200820160507-1b872e368a5e/operator/builtin/parser/regex.go (about)

     1  package parser
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"regexp"
     7  
     8  	"github.com/observiq/carbon/entry"
     9  	"github.com/observiq/carbon/errors"
    10  	"github.com/observiq/carbon/operator"
    11  	"github.com/observiq/carbon/operator/helper"
    12  )
    13  
    14  func init() {
    15  	operator.Register("regex_parser", func() operator.Builder { return NewRegexParserConfig("") })
    16  }
    17  
    18  func NewRegexParserConfig(operatorID string) *RegexParserConfig {
    19  	return &RegexParserConfig{
    20  		ParserConfig: helper.NewParserConfig(operatorID, "regex_parser"),
    21  	}
    22  }
    23  
    24  // RegexParserConfig is the configuration of a regex parser operator.
    25  type RegexParserConfig struct {
    26  	helper.ParserConfig `yaml:",inline"`
    27  
    28  	Regex string `json:"regex" yaml:"regex"`
    29  }
    30  
    31  // Build will build a regex parser operator.
    32  func (c RegexParserConfig) Build(context operator.BuildContext) (operator.Operator, error) {
    33  	parserOperator, err := c.ParserConfig.Build(context)
    34  	if err != nil {
    35  		return nil, err
    36  	}
    37  
    38  	if c.Regex == "" {
    39  		return nil, fmt.Errorf("missing required field 'regex'")
    40  	}
    41  
    42  	r, err := regexp.Compile(c.Regex)
    43  	if err != nil {
    44  		return nil, fmt.Errorf("compiling regex: %s", err)
    45  	}
    46  
    47  	namedCaptureGroups := 0
    48  	for _, groupName := range r.SubexpNames() {
    49  		if groupName != "" {
    50  			namedCaptureGroups++
    51  		}
    52  	}
    53  	if namedCaptureGroups == 0 {
    54  		return nil, errors.NewError(
    55  			"no named capture groups in regex pattern",
    56  			"use named capture groups like '^(?P<my_key>.*)$' to specify the key name for the parsed field",
    57  		)
    58  	}
    59  
    60  	regexParser := &RegexParser{
    61  		ParserOperator: parserOperator,
    62  		regexp:         r,
    63  	}
    64  
    65  	return regexParser, nil
    66  }
    67  
    68  // RegexParser is an operator that parses regex in an entry.
    69  type RegexParser struct {
    70  	helper.ParserOperator
    71  	regexp *regexp.Regexp
    72  }
    73  
    74  // Process will parse an entry for regex.
    75  func (r *RegexParser) Process(ctx context.Context, entry *entry.Entry) error {
    76  	return r.ParserOperator.ProcessWith(ctx, entry, r.parse)
    77  }
    78  
    79  // parse will parse a value using the supplied regex.
    80  func (r *RegexParser) parse(value interface{}) (interface{}, error) {
    81  	var matches []string
    82  	switch m := value.(type) {
    83  	case string:
    84  		matches = r.regexp.FindStringSubmatch(m)
    85  		if matches == nil {
    86  			return nil, fmt.Errorf("regex pattern does not match")
    87  		}
    88  	case []byte:
    89  		byteMatches := r.regexp.FindSubmatch(m)
    90  		if byteMatches == nil {
    91  			return nil, fmt.Errorf("regex pattern does not match")
    92  		}
    93  
    94  		matches = make([]string, len(byteMatches))
    95  		for i, byteSlice := range byteMatches {
    96  			matches[i] = string(byteSlice)
    97  		}
    98  	default:
    99  		return nil, fmt.Errorf("type '%T' cannot be parsed as regex", value)
   100  	}
   101  
   102  	parsedValues := map[string]interface{}{}
   103  	for i, subexp := range r.regexp.SubexpNames() {
   104  		if i == 0 {
   105  			// Skip whole match
   106  			continue
   107  		}
   108  		if subexp != "" {
   109  			parsedValues[subexp] = matches[i]
   110  		}
   111  	}
   112  
   113  	return parsedValues, nil
   114  }