github.com/netdata/go.d.plugin@v0.58.1/pkg/logs/csv.go (about)

     1  // SPDX-License-Identifier: GPL-3.0-or-later
     2  
     3  package logs
     4  
     5  import (
     6  	"bytes"
     7  	"encoding/csv"
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"strconv"
    12  	"strings"
    13  )
    14  
    15  type (
    16  	CSVConfig struct {
    17  		FieldsPerRecord  int                              `yaml:"fields_per_record"`
    18  		Delimiter        string                           `yaml:"delimiter"`
    19  		TrimLeadingSpace bool                             `yaml:"trim_leading_space"`
    20  		Format           string                           `yaml:"format"`
    21  		CheckField       func(string) (string, int, bool) `yaml:"-"`
    22  	}
    23  
    24  	CSVParser struct {
    25  		Config CSVConfig
    26  		reader *csv.Reader
    27  		format *csvFormat
    28  	}
    29  
    30  	csvFormat struct {
    31  		raw      string
    32  		maxIndex int
    33  		fields   []csvField
    34  	}
    35  
    36  	csvField struct {
    37  		name string
    38  		idx  int
    39  	}
    40  )
    41  
    42  func NewCSVParser(config CSVConfig, in io.Reader) (*CSVParser, error) {
    43  	if config.Format == "" {
    44  		return nil, errors.New("empty csv format")
    45  	}
    46  
    47  	format, err := newCSVFormat(config)
    48  	if err != nil {
    49  		return nil, fmt.Errorf("bad csv format '%s': %v", config.Format, err)
    50  	}
    51  
    52  	p := &CSVParser{
    53  		Config: config,
    54  		reader: newCSVReader(in, config),
    55  		format: format,
    56  	}
    57  	return p, nil
    58  }
    59  
    60  func (p *CSVParser) ReadLine(line LogLine) error {
    61  	record, err := p.reader.Read()
    62  	if err != nil {
    63  		return handleCSVReaderError(err)
    64  	}
    65  	return p.format.parse(record, line)
    66  }
    67  
    68  func (p *CSVParser) Parse(row []byte, line LogLine) error {
    69  	r := newCSVReader(bytes.NewBuffer(row), p.Config)
    70  	record, err := r.Read()
    71  	if err != nil {
    72  		return handleCSVReaderError(err)
    73  	}
    74  	return p.format.parse(record, line)
    75  }
    76  
    77  func (p CSVParser) Info() string {
    78  	return fmt.Sprintf("csv: %s", p.format.raw)
    79  }
    80  
    81  func (f *csvFormat) parse(record []string, line LogLine) error {
    82  	if len(record) <= f.maxIndex {
    83  		return &ParseError{msg: "csv parse: unmatched line"}
    84  	}
    85  
    86  	for _, v := range f.fields {
    87  		if err := line.Assign(v.name, record[v.idx]); err != nil {
    88  			return &ParseError{msg: fmt.Sprintf("csv parse: %v", err), err: err}
    89  		}
    90  	}
    91  	return nil
    92  }
    93  
    94  func newCSVReader(in io.Reader, config CSVConfig) *csv.Reader {
    95  	r := csv.NewReader(in)
    96  	if config.Delimiter != "" {
    97  		if d, err := parseCSVDelimiter(config.Delimiter); err == nil {
    98  			r.Comma = d
    99  		}
   100  	}
   101  	r.TrimLeadingSpace = config.TrimLeadingSpace
   102  	r.FieldsPerRecord = config.FieldsPerRecord
   103  	r.ReuseRecord = true
   104  	return r
   105  }
   106  
   107  func newCSVFormat(config CSVConfig) (*csvFormat, error) {
   108  	r := csv.NewReader(strings.NewReader(config.Format))
   109  	if config.Delimiter != "" {
   110  		if d, err := parseCSVDelimiter(config.Delimiter); err == nil {
   111  			r.Comma = d
   112  		}
   113  	}
   114  	r.TrimLeadingSpace = config.TrimLeadingSpace
   115  
   116  	record, err := r.Read()
   117  	if err != nil {
   118  		return nil, err
   119  	}
   120  
   121  	fields, err := createCSVFields(record, config.CheckField)
   122  	if err != nil {
   123  		return nil, err
   124  	}
   125  
   126  	if len(fields) == 0 {
   127  		return nil, errors.New("zero fields")
   128  	}
   129  
   130  	format := &csvFormat{
   131  		raw:      config.Format,
   132  		maxIndex: fields[len(fields)-1].idx,
   133  		fields:   fields,
   134  	}
   135  	return format, nil
   136  }
   137  
   138  func createCSVFields(format []string, check func(string) (string, int, bool)) ([]csvField, error) {
   139  	if check == nil {
   140  		check = checkCSVFormatField
   141  	}
   142  	var fields []csvField
   143  	var offset int
   144  	seen := make(map[string]bool)
   145  
   146  	for i, name := range format {
   147  		name = strings.Trim(name, `"`)
   148  
   149  		name, addOffset, valid := check(name)
   150  		offset += addOffset
   151  		if !valid {
   152  			continue
   153  		}
   154  		if seen[name] {
   155  			return nil, fmt.Errorf("duplicate field: %s", name)
   156  		}
   157  		seen[name] = true
   158  
   159  		idx := i + offset
   160  		fields = append(fields, csvField{name, idx})
   161  	}
   162  	return fields, nil
   163  }
   164  
   165  func handleCSVReaderError(err error) error {
   166  	if isCSVParseError(err) {
   167  		return &ParseError{msg: fmt.Sprintf("csv parse: %v", err), err: err}
   168  	}
   169  	return err
   170  }
   171  
   172  func isCSVParseError(err error) bool {
   173  	return errors.Is(err, csv.ErrBareQuote) || errors.Is(err, csv.ErrFieldCount) || errors.Is(err, csv.ErrQuote)
   174  }
   175  
   176  func checkCSVFormatField(name string) (newName string, offset int, valid bool) {
   177  	if len(name) < 2 || !strings.HasPrefix(name, "$") {
   178  		return "", 0, false
   179  	}
   180  	return name, 0, true
   181  }
   182  
   183  func parseCSVDelimiter(s string) (rune, error) {
   184  	if isNumber(s) {
   185  		d, err := strconv.ParseInt(s, 10, 32)
   186  		if err != nil {
   187  			return 0, fmt.Errorf("invalid CSV delimiter: %v", err)
   188  		}
   189  		return rune(d), nil
   190  	}
   191  	if len(s) != 1 {
   192  		return 0, errors.New("invalid CSV delimiter: must be a single character")
   193  	}
   194  	return rune(s[0]), nil
   195  }