github.com/honeycombio/honeytail@v1.9.0/parsers/csv/csv.go (about) 1 package csv 2 3 import ( 4 "encoding/csv" 5 "errors" 6 "strconv" 7 "strings" 8 "sync" 9 10 "github.com/sirupsen/logrus" 11 12 "github.com/honeycombio/honeytail/event" 13 "github.com/honeycombio/honeytail/httime" 14 "github.com/honeycombio/honeytail/parsers" 15 ) 16 17 // Options defines the options relevant to the CSV parser 18 type Options struct { 19 Fields string `long:"fields" description:"Comma separated list of CSV fields, in order."` 20 TimeFieldName string `long:"timefield" description:"Name of the field that contains a timestamp" yaml:"timefield,omitempty"` 21 TimeFieldFormat string `long:"time_format" description:"Timestamp format to use (strftime and Golang time.Parse supported)" yaml:"time_format,omitempty"` 22 TrimLeadingSpace bool `long:"trim_leading_space" description:"trim leading whitespace in CSV fields and values" yaml:"trim_leading_space,omitempty"` 23 24 NumParsers int `hidden:"true" description:"number of csv parsers to spin up" yaml:"-"` 25 } 26 27 // Parser implements the Parser interface 28 type Parser struct { 29 conf Options 30 lineParser parsers.LineParser 31 } 32 33 // Init constructs our parser from the provided options 34 func (p *Parser) Init(options interface{}) error { 35 p.conf = *options.(*Options) 36 if p.conf.Fields == "" { 37 return errors.New("must provide at least 1 field name when parsing CSV lines") 38 } 39 lineParser, err := NewCSVLineParser(p.conf.Fields, p.conf.TrimLeadingSpace) 40 if err != nil { 41 return err 42 } 43 p.lineParser = lineParser 44 return nil 45 } 46 47 type CSVLineParser struct { 48 fields []string 49 numFields int 50 trimLeadingSpace bool 51 } 52 53 // NewCSVLineParser factory 54 func NewCSVLineParser(fieldsString string, trimLeadingSpace bool) (*CSVLineParser, error) { 55 // Is building a reader for every single line a good idea? 56 // Potential for future optimization here 57 reader := strings.NewReader(fieldsString) 58 csvReader := csv.NewReader(reader) 59 csvReader.TrimLeadingSpace = trimLeadingSpace 60 61 fields, err := csvReader.Read() 62 if err != nil { 63 logrus.WithError(err).WithField("fields", fieldsString). 64 Error("unable to parse list of fields") 65 return nil, err 66 } 67 logrus.WithFields(logrus.Fields{ 68 "fields": fields, 69 }).Debug("generated CSV fields") 70 return &CSVLineParser{ 71 fields: fields, 72 numFields: len(fields), 73 trimLeadingSpace: trimLeadingSpace}, nil 74 } 75 76 func (p *CSVLineParser) ParseLine(line string) (map[string]interface{}, error) { 77 csvReader := csv.NewReader(strings.NewReader(line)) 78 csvReader.FieldsPerRecord = p.numFields 79 csvReader.TrimLeadingSpace = p.trimLeadingSpace 80 data := make(map[string]interface{}) 81 values, err := csvReader.Read() 82 if err != nil { 83 logrus.WithError(err).WithField("line", line). 84 Error("failed to parse line") 85 return nil, err 86 } 87 88 for i := 0; i < p.numFields; i++ { 89 if val, err := strconv.Atoi(values[i]); err == nil { 90 data[p.fields[i]] = val 91 } else if val, err := strconv.ParseFloat(values[i], 64); err == nil { 92 data[p.fields[i]] = val 93 } else { 94 data[p.fields[i]] = values[i] 95 } 96 } 97 98 return data, nil 99 } 100 101 func (p *Parser) ProcessLines(lines <-chan string, send chan<- event.Event, prefixRegex *parsers.ExtRegexp) { 102 // parse lines one by one 103 wg := sync.WaitGroup{} 104 numParsers := 1 105 if p.conf.NumParsers > 0 { 106 numParsers = p.conf.NumParsers 107 } 108 for i := 0; i < numParsers; i++ { 109 wg.Add(1) 110 go func() { 111 for line := range lines { 112 logrus.WithFields(logrus.Fields{ 113 "line": line, 114 }).Debug("attempting to process csv line") 115 116 // take care of any headers on the line 117 var prefixFields map[string]string 118 if prefixRegex != nil { 119 var prefix string 120 prefix, prefixFields = prefixRegex.FindStringSubmatchMap(line) 121 line = strings.TrimPrefix(line, prefix) 122 } 123 124 parsedLine, err := p.lineParser.ParseLine(line) 125 if err != nil { 126 continue 127 } 128 129 if len(parsedLine) == 0 { 130 logrus.WithFields(logrus.Fields{ 131 "line": line, 132 }).Info("skipping line, no values found") 133 continue 134 } 135 136 // merge the prefix fields and the parsed line contents 137 for k, v := range prefixFields { 138 parsedLine[k] = v 139 } 140 141 // look for the timestamp in any of the prefix fields or regular content 142 timestamp := httime.GetTimestamp(parsedLine, p.conf.TimeFieldName, p.conf.TimeFieldFormat) 143 144 // send an event to Transmission 145 e := event.Event{ 146 Timestamp: timestamp, 147 Data: parsedLine, 148 } 149 send <- e 150 } 151 wg.Done() 152 }() 153 } 154 wg.Wait() 155 logrus.Debug("lines channel is closed, ending csv processor") 156 }