github.com/honeycombio/honeytail@v1.9.0/parsers/csv/csv.go (about)

     1  package csv
     2  
     3  import (
     4  	"encoding/csv"
     5  	"errors"
     6  	"strconv"
     7  	"strings"
     8  	"sync"
     9  
    10  	"github.com/sirupsen/logrus"
    11  
    12  	"github.com/honeycombio/honeytail/event"
    13  	"github.com/honeycombio/honeytail/httime"
    14  	"github.com/honeycombio/honeytail/parsers"
    15  )
    16  
    17  // Options defines the options relevant to the CSV parser
    18  type Options struct {
    19  	Fields           string `long:"fields" description:"Comma separated list of CSV fields, in order."`
    20  	TimeFieldName    string `long:"timefield" description:"Name of the field that contains a timestamp" yaml:"timefield,omitempty"`
    21  	TimeFieldFormat  string `long:"time_format" description:"Timestamp format to use (strftime and Golang time.Parse supported)" yaml:"time_format,omitempty"`
    22  	TrimLeadingSpace bool   `long:"trim_leading_space" description:"trim leading whitespace in CSV fields and values" yaml:"trim_leading_space,omitempty"`
    23  
    24  	NumParsers int `hidden:"true" description:"number of csv parsers to spin up" yaml:"-"`
    25  }
    26  
    27  // Parser implements the Parser interface
    28  type Parser struct {
    29  	conf       Options
    30  	lineParser parsers.LineParser
    31  }
    32  
    33  // Init constructs our parser from the provided options
    34  func (p *Parser) Init(options interface{}) error {
    35  	p.conf = *options.(*Options)
    36  	if p.conf.Fields == "" {
    37  		return errors.New("must provide at least 1 field name when parsing CSV lines")
    38  	}
    39  	lineParser, err := NewCSVLineParser(p.conf.Fields, p.conf.TrimLeadingSpace)
    40  	if err != nil {
    41  		return err
    42  	}
    43  	p.lineParser = lineParser
    44  	return nil
    45  }
    46  
    47  type CSVLineParser struct {
    48  	fields           []string
    49  	numFields        int
    50  	trimLeadingSpace bool
    51  }
    52  
    53  // NewCSVLineParser factory
    54  func NewCSVLineParser(fieldsString string, trimLeadingSpace bool) (*CSVLineParser, error) {
    55  	// Is building a reader for every single line a good idea?
    56  	// Potential for future optimization here
    57  	reader := strings.NewReader(fieldsString)
    58  	csvReader := csv.NewReader(reader)
    59  	csvReader.TrimLeadingSpace = trimLeadingSpace
    60  
    61  	fields, err := csvReader.Read()
    62  	if err != nil {
    63  		logrus.WithError(err).WithField("fields", fieldsString).
    64  			Error("unable to parse list of fields")
    65  		return nil, err
    66  	}
    67  	logrus.WithFields(logrus.Fields{
    68  		"fields": fields,
    69  	}).Debug("generated CSV fields")
    70  	return &CSVLineParser{
    71  		fields:           fields,
    72  		numFields:        len(fields),
    73  		trimLeadingSpace: trimLeadingSpace}, nil
    74  }
    75  
    76  func (p *CSVLineParser) ParseLine(line string) (map[string]interface{}, error) {
    77  	csvReader := csv.NewReader(strings.NewReader(line))
    78  	csvReader.FieldsPerRecord = p.numFields
    79  	csvReader.TrimLeadingSpace = p.trimLeadingSpace
    80  	data := make(map[string]interface{})
    81  	values, err := csvReader.Read()
    82  	if err != nil {
    83  		logrus.WithError(err).WithField("line", line).
    84  			Error("failed to parse line")
    85  		return nil, err
    86  	}
    87  
    88  	for i := 0; i < p.numFields; i++ {
    89  		if val, err := strconv.Atoi(values[i]); err == nil {
    90  			data[p.fields[i]] = val
    91  		} else if val, err := strconv.ParseFloat(values[i], 64); err == nil {
    92  			data[p.fields[i]] = val
    93  		} else {
    94  			data[p.fields[i]] = values[i]
    95  		}
    96  	}
    97  
    98  	return data, nil
    99  }
   100  
   101  func (p *Parser) ProcessLines(lines <-chan string, send chan<- event.Event, prefixRegex *parsers.ExtRegexp) {
   102  	// parse lines one by one
   103  	wg := sync.WaitGroup{}
   104  	numParsers := 1
   105  	if p.conf.NumParsers > 0 {
   106  		numParsers = p.conf.NumParsers
   107  	}
   108  	for i := 0; i < numParsers; i++ {
   109  		wg.Add(1)
   110  		go func() {
   111  			for line := range lines {
   112  				logrus.WithFields(logrus.Fields{
   113  					"line": line,
   114  				}).Debug("attempting to process csv line")
   115  
   116  				// take care of any headers on the line
   117  				var prefixFields map[string]string
   118  				if prefixRegex != nil {
   119  					var prefix string
   120  					prefix, prefixFields = prefixRegex.FindStringSubmatchMap(line)
   121  					line = strings.TrimPrefix(line, prefix)
   122  				}
   123  
   124  				parsedLine, err := p.lineParser.ParseLine(line)
   125  				if err != nil {
   126  					continue
   127  				}
   128  
   129  				if len(parsedLine) == 0 {
   130  					logrus.WithFields(logrus.Fields{
   131  						"line": line,
   132  					}).Info("skipping line, no values found")
   133  					continue
   134  				}
   135  
   136  				// merge the prefix fields and the parsed line contents
   137  				for k, v := range prefixFields {
   138  					parsedLine[k] = v
   139  				}
   140  
   141  				// look for the timestamp in any of the prefix fields or regular content
   142  				timestamp := httime.GetTimestamp(parsedLine, p.conf.TimeFieldName, p.conf.TimeFieldFormat)
   143  
   144  				// send an event to Transmission
   145  				e := event.Event{
   146  					Timestamp: timestamp,
   147  					Data:      parsedLine,
   148  				}
   149  				send <- e
   150  			}
   151  			wg.Done()
   152  		}()
   153  	}
   154  	wg.Wait()
   155  	logrus.Debug("lines channel is closed, ending csv processor")
   156  }