github.com/netdata/go.d.plugin@v0.58.1/pkg/logs/csv.go (about) 1 // SPDX-License-Identifier: GPL-3.0-or-later 2 3 package logs 4 5 import ( 6 "bytes" 7 "encoding/csv" 8 "errors" 9 "fmt" 10 "io" 11 "strconv" 12 "strings" 13 ) 14 15 type ( 16 CSVConfig struct { 17 FieldsPerRecord int `yaml:"fields_per_record"` 18 Delimiter string `yaml:"delimiter"` 19 TrimLeadingSpace bool `yaml:"trim_leading_space"` 20 Format string `yaml:"format"` 21 CheckField func(string) (string, int, bool) `yaml:"-"` 22 } 23 24 CSVParser struct { 25 Config CSVConfig 26 reader *csv.Reader 27 format *csvFormat 28 } 29 30 csvFormat struct { 31 raw string 32 maxIndex int 33 fields []csvField 34 } 35 36 csvField struct { 37 name string 38 idx int 39 } 40 ) 41 42 func NewCSVParser(config CSVConfig, in io.Reader) (*CSVParser, error) { 43 if config.Format == "" { 44 return nil, errors.New("empty csv format") 45 } 46 47 format, err := newCSVFormat(config) 48 if err != nil { 49 return nil, fmt.Errorf("bad csv format '%s': %v", config.Format, err) 50 } 51 52 p := &CSVParser{ 53 Config: config, 54 reader: newCSVReader(in, config), 55 format: format, 56 } 57 return p, nil 58 } 59 60 func (p *CSVParser) ReadLine(line LogLine) error { 61 record, err := p.reader.Read() 62 if err != nil { 63 return handleCSVReaderError(err) 64 } 65 return p.format.parse(record, line) 66 } 67 68 func (p *CSVParser) Parse(row []byte, line LogLine) error { 69 r := newCSVReader(bytes.NewBuffer(row), p.Config) 70 record, err := r.Read() 71 if err != nil { 72 return handleCSVReaderError(err) 73 } 74 return p.format.parse(record, line) 75 } 76 77 func (p CSVParser) Info() string { 78 return fmt.Sprintf("csv: %s", p.format.raw) 79 } 80 81 func (f *csvFormat) parse(record []string, line LogLine) error { 82 if len(record) <= f.maxIndex { 83 return &ParseError{msg: "csv parse: unmatched line"} 84 } 85 86 for _, v := range f.fields { 87 if err := line.Assign(v.name, record[v.idx]); err != nil { 88 return &ParseError{msg: fmt.Sprintf("csv parse: %v", err), err: err} 89 } 90 } 91 return nil 92 } 93 94 func newCSVReader(in io.Reader, config CSVConfig) *csv.Reader { 95 r := csv.NewReader(in) 96 if config.Delimiter != "" { 97 if d, err := parseCSVDelimiter(config.Delimiter); err == nil { 98 r.Comma = d 99 } 100 } 101 r.TrimLeadingSpace = config.TrimLeadingSpace 102 r.FieldsPerRecord = config.FieldsPerRecord 103 r.ReuseRecord = true 104 return r 105 } 106 107 func newCSVFormat(config CSVConfig) (*csvFormat, error) { 108 r := csv.NewReader(strings.NewReader(config.Format)) 109 if config.Delimiter != "" { 110 if d, err := parseCSVDelimiter(config.Delimiter); err == nil { 111 r.Comma = d 112 } 113 } 114 r.TrimLeadingSpace = config.TrimLeadingSpace 115 116 record, err := r.Read() 117 if err != nil { 118 return nil, err 119 } 120 121 fields, err := createCSVFields(record, config.CheckField) 122 if err != nil { 123 return nil, err 124 } 125 126 if len(fields) == 0 { 127 return nil, errors.New("zero fields") 128 } 129 130 format := &csvFormat{ 131 raw: config.Format, 132 maxIndex: fields[len(fields)-1].idx, 133 fields: fields, 134 } 135 return format, nil 136 } 137 138 func createCSVFields(format []string, check func(string) (string, int, bool)) ([]csvField, error) { 139 if check == nil { 140 check = checkCSVFormatField 141 } 142 var fields []csvField 143 var offset int 144 seen := make(map[string]bool) 145 146 for i, name := range format { 147 name = strings.Trim(name, `"`) 148 149 name, addOffset, valid := check(name) 150 offset += addOffset 151 if !valid { 152 continue 153 } 154 if seen[name] { 155 return nil, fmt.Errorf("duplicate field: %s", name) 156 } 157 seen[name] = true 158 159 idx := i + offset 160 fields = append(fields, csvField{name, idx}) 161 } 162 return fields, nil 163 } 164 165 func handleCSVReaderError(err error) error { 166 if isCSVParseError(err) { 167 return &ParseError{msg: fmt.Sprintf("csv parse: %v", err), err: err} 168 } 169 return err 170 } 171 172 func isCSVParseError(err error) bool { 173 return errors.Is(err, csv.ErrBareQuote) || errors.Is(err, csv.ErrFieldCount) || errors.Is(err, csv.ErrQuote) 174 } 175 176 func checkCSVFormatField(name string) (newName string, offset int, valid bool) { 177 if len(name) < 2 || !strings.HasPrefix(name, "$") { 178 return "", 0, false 179 } 180 return name, 0, true 181 } 182 183 func parseCSVDelimiter(s string) (rune, error) { 184 if isNumber(s) { 185 d, err := strconv.ParseInt(s, 10, 32) 186 if err != nil { 187 return 0, fmt.Errorf("invalid CSV delimiter: %v", err) 188 } 189 return rune(d), nil 190 } 191 if len(s) != 1 { 192 return 0, errors.New("invalid CSV delimiter: must be a single character") 193 } 194 return rune(s[0]), nil 195 }