gitee.com/quant1x/pkg@v0.2.8/gocsv/csv.go (about)

     1  // Copyright 2014 Jonathan Picques. All rights reserved.
     2  // Use of this source code is governed by a MIT license
     3  // The license can be found in the LICENSE file.
     4  
     5  // The GoCSV package aims to provide easy CSV serialization and deserialization to the golang programming language
     6  
     7  package gocsv
     8  
     9  import (
    10  	"bytes"
    11  	"encoding/csv"
    12  	"fmt"
    13  	"io"
    14  	"mime/multipart"
    15  	"os"
    16  	"reflect"
    17  	"strings"
    18  	"sync"
    19  )
    20  
    21  // FailIfUnmatchedStructTags indicates whether it is considered an error when there is an unmatched
    22  // struct tag.
    23  var FailIfUnmatchedStructTags = false
    24  
    25  // FailIfDoubleHeaderNames indicates whether it is considered an error when a header name is repeated
    26  // in the csv header.
    27  var FailIfDoubleHeaderNames = false
    28  
    29  // ShouldAlignDuplicateHeadersWithStructFieldOrder indicates whether we should align duplicate CSV
    30  // headers per their alignment in the struct definition.
    31  var ShouldAlignDuplicateHeadersWithStructFieldOrder = false
    32  
    33  // TagName defines key in the struct field's tag to scan
    34  var TagName = "csv"
    35  
    36  // TagSeparator defines seperator string for multiple csv tags in struct fields
    37  var TagSeparator = ","
    38  
    39  // FieldSeperator defines how to combine parent struct with child struct
    40  var FieldsCombiner = "."
    41  
    42  // Normalizer is a function that takes and returns a string. It is applied to
    43  // struct and header field values before they are compared. It can be used to alter
    44  // names for comparison. For instance, you could allow case insensitive matching
    45  // or convert '-' to '_'.
    46  type Normalizer func(string) string
    47  
    48  type ErrorHandler func(*csv.ParseError) bool
    49  
    50  // normalizeName function initially set to a nop Normalizer.
    51  var normalizeName = DefaultNameNormalizer()
    52  
    53  // DefaultNameNormalizer is a nop Normalizer.
    54  func DefaultNameNormalizer() Normalizer { return func(s string) string { return s } }
    55  
    56  // SetHeaderNormalizer sets the normalizer used to normalize struct and header field names.
    57  func SetHeaderNormalizer(f Normalizer) {
    58  	normalizeName = f
    59  	// Need to clear the cache hen the header normalizer changes.
    60  	structInfoCache = sync.Map{}
    61  }
    62  
    63  // --------------------------------------------------------------------------
    64  // CSVWriter used to format CSV
    65  
    66  var selfCSVWriter = DefaultCSVWriter
    67  
    68  // DefaultCSVWriter is the default SafeCSVWriter used to format CSV (cf. csv.NewWriter)
    69  func DefaultCSVWriter(out io.Writer) *SafeCSVWriter {
    70  	writer := NewSafeCSVWriter(csv.NewWriter(out))
    71  
    72  	// As only one rune can be defined as a CSV separator, we are going to trim
    73  	// the custom tag separator and use the first rune.
    74  	if runes := []rune(strings.TrimSpace(TagSeparator)); len(runes) > 0 {
    75  		writer.Comma = runes[0]
    76  	}
    77  
    78  	return writer
    79  }
    80  
    81  // SetCSVWriter sets the SafeCSVWriter used to format CSV.
    82  func SetCSVWriter(csvWriter func(io.Writer) *SafeCSVWriter) {
    83  	selfCSVWriter = csvWriter
    84  }
    85  
    86  func getCSVWriter(out io.Writer) *SafeCSVWriter {
    87  	return selfCSVWriter(out)
    88  }
    89  
    90  // --------------------------------------------------------------------------
    91  // CSVReader used to parse CSV
    92  
    93  var selfCSVReader = DefaultCSVReader
    94  
    95  // DefaultCSVReader is the default CSV reader used to parse CSV (cf. csv.NewReader)
    96  func DefaultCSVReader(in io.Reader) CSVReader {
    97  	return csv.NewReader(in)
    98  }
    99  
   100  // LazyCSVReader returns a lazy CSV reader, with LazyQuotes and TrimLeadingSpace.
   101  func LazyCSVReader(in io.Reader) CSVReader {
   102  	csvReader := csv.NewReader(in)
   103  	csvReader.LazyQuotes = true
   104  	csvReader.TrimLeadingSpace = true
   105  	return csvReader
   106  }
   107  
   108  // SetCSVReader sets the CSV reader used to parse CSV.
   109  func SetCSVReader(csvReader func(io.Reader) CSVReader) {
   110  	selfCSVReader = csvReader
   111  }
   112  
   113  func getCSVReader(in io.Reader) CSVReader {
   114  	return selfCSVReader(in)
   115  }
   116  
   117  // --------------------------------------------------------------------------
   118  // Marshal functions
   119  
   120  // MarshalFile saves the interface as CSV in the file.
   121  func MarshalFile(in interface{}, file *os.File) (err error) {
   122  	return Marshal(in, file)
   123  }
   124  
   125  // MarshalString returns the CSV string from the interface.
   126  func MarshalString(in interface{}) (out string, err error) {
   127  	bufferString := bytes.NewBufferString(out)
   128  	if err := Marshal(in, bufferString); err != nil {
   129  		return "", err
   130  	}
   131  	return bufferString.String(), nil
   132  }
   133  
   134  // MarshalStringWithoutHeaders returns the CSV string from the interface.
   135  func MarshalStringWithoutHeaders(in interface{}) (out string, err error) {
   136  	bufferString := bytes.NewBufferString(out)
   137  	if err := MarshalWithoutHeaders(in, bufferString); err != nil {
   138  		return "", err
   139  	}
   140  	return bufferString.String(), nil
   141  }
   142  
   143  // MarshalBytes returns the CSV bytes from the interface.
   144  func MarshalBytes(in interface{}) (out []byte, err error) {
   145  	bufferString := bytes.NewBuffer(out)
   146  	if err := Marshal(in, bufferString); err != nil {
   147  		return nil, err
   148  	}
   149  	return bufferString.Bytes(), nil
   150  }
   151  
   152  // Marshal returns the CSV in writer from the interface.
   153  func Marshal(in interface{}, out io.Writer) (err error) {
   154  	writer := getCSVWriter(out)
   155  	return writeTo(writer, in, false)
   156  }
   157  
   158  // MarshalWithoutHeaders returns the CSV in writer from the interface.
   159  func MarshalWithoutHeaders(in interface{}, out io.Writer) (err error) {
   160  	writer := getCSVWriter(out)
   161  	return writeTo(writer, in, true)
   162  }
   163  
   164  // MarshalChan returns the CSV read from the channel.
   165  func MarshalChan(c <-chan interface{}, out CSVWriter) error {
   166  	return writeFromChan(out, c, false)
   167  }
   168  
   169  // MarshalChanWithoutHeaders returns the CSV read from the channel.
   170  func MarshalChanWithoutHeaders(c <-chan interface{}, out CSVWriter) error {
   171  	return writeFromChan(out, c, true)
   172  }
   173  
   174  // MarshalCSV returns the CSV in writer from the interface.
   175  func MarshalCSV(in interface{}, out CSVWriter) (err error) {
   176  	return writeTo(out, in, false)
   177  }
   178  
   179  // MarshalCSVWithoutHeaders returns the CSV in writer from the interface.
   180  func MarshalCSVWithoutHeaders(in interface{}, out CSVWriter) (err error) {
   181  	return writeTo(out, in, true)
   182  }
   183  
   184  // --------------------------------------------------------------------------
   185  // Unmarshal functions
   186  
   187  // UnmarshalFile parses the CSV from the file in the interface.
   188  func UnmarshalFile(in *os.File, out interface{}) error {
   189  	return Unmarshal(in, out)
   190  }
   191  
   192  // UnmarshalMultipartFile parses the CSV from the multipart file in the interface.
   193  func UnmarshalMultipartFile(in *multipart.File, out interface{}) error {
   194  	return Unmarshal(convertTo(in), out)
   195  }
   196  
   197  // UnmarshalFileWithErrorHandler parses the CSV from the file in the interface.
   198  func UnmarshalFileWithErrorHandler(in *os.File, errHandler ErrorHandler, out interface{}) error {
   199  	return UnmarshalWithErrorHandler(in, errHandler, out)
   200  }
   201  
   202  // UnmarshalString parses the CSV from the string in the interface.
   203  func UnmarshalString(in string, out interface{}) error {
   204  	return Unmarshal(strings.NewReader(in), out)
   205  }
   206  
   207  // UnmarshalBytes parses the CSV from the bytes in the interface.
   208  func UnmarshalBytes(in []byte, out interface{}) error {
   209  	return Unmarshal(bytes.NewReader(in), out)
   210  }
   211  
   212  // Unmarshal parses the CSV from the reader in the interface.
   213  func Unmarshal(in io.Reader, out interface{}) error {
   214  	return readTo(newSimpleDecoderFromReader(in), out)
   215  }
   216  
   217  // Unmarshal parses the CSV from the reader in the interface.
   218  func UnmarshalWithErrorHandler(in io.Reader, errHandle ErrorHandler, out interface{}) error {
   219  	return readToWithErrorHandler(newSimpleDecoderFromReader(in), errHandle, out)
   220  }
   221  
   222  // UnmarshalWithoutHeaders parses the CSV from the reader in the interface.
   223  func UnmarshalWithoutHeaders(in io.Reader, out interface{}) error {
   224  	return readToWithoutHeaders(newSimpleDecoderFromReader(in), out)
   225  }
   226  
   227  // UnmarshalCSVWithoutHeaders parses a headerless CSV with passed in CSV reader
   228  func UnmarshalCSVWithoutHeaders(in CSVReader, out interface{}) error {
   229  	return readToWithoutHeaders(csvDecoder{in}, out)
   230  }
   231  
   232  // UnmarshalDecoder parses the CSV from the decoder in the interface
   233  func UnmarshalDecoder(in Decoder, out interface{}) error {
   234  	return readTo(in, out)
   235  }
   236  
   237  // UnmarshalCSV parses the CSV from the reader in the interface.
   238  func UnmarshalCSV(in CSVReader, out interface{}) error {
   239  	return readTo(csvDecoder{in}, out)
   240  }
   241  
   242  // UnmarshalCSVToMap parses a CSV of 2 columns into a map.
   243  func UnmarshalCSVToMap(in CSVReader, out interface{}) error {
   244  	decoder := NewSimpleDecoderFromCSVReader(in)
   245  	header, err := decoder.GetCSVRow()
   246  	if err != nil {
   247  		return err
   248  	}
   249  	if len(header) != 2 {
   250  		return fmt.Errorf("maps can only be created for csv of two columns")
   251  	}
   252  	outValue, outType := getConcreteReflectValueAndType(out)
   253  	if outType.Kind() != reflect.Map {
   254  		return fmt.Errorf("cannot use " + outType.String() + ", only map supported")
   255  	}
   256  	keyType := outType.Key()
   257  	valueType := outType.Elem()
   258  	outValue.Set(reflect.MakeMap(outType))
   259  	for {
   260  		key := reflect.New(keyType)
   261  		value := reflect.New(valueType)
   262  		line, err := decoder.GetCSVRow()
   263  		if err == io.EOF {
   264  			break
   265  		} else if err != nil {
   266  			return err
   267  		}
   268  		if err := setField(key, line[0], false); err != nil {
   269  			return err
   270  		}
   271  		if err := setField(value, line[1], false); err != nil {
   272  			return err
   273  		}
   274  		outValue.SetMapIndex(key.Elem(), value.Elem())
   275  	}
   276  	return nil
   277  }
   278  
   279  // UnmarshalToChan parses the CSV from the reader and send each value in the chan c.
   280  // The channel must have a concrete type.
   281  func UnmarshalToChan(in io.Reader, c interface{}) error {
   282  	if c == nil {
   283  		return fmt.Errorf("goscv: channel is %v", c)
   284  	}
   285  	return readEach(newSimpleDecoderFromReader(in), nil, c)
   286  }
   287  
   288  // UnmarshalToChanWithErrorHandler parses the CSV from the reader in the interface.
   289  func UnmarshalToChanWithErrorHandler(in io.Reader, errorHandler ErrorHandler, c interface{}) error {
   290  	if c == nil {
   291  		return fmt.Errorf("goscv: channel is %v", c)
   292  	}
   293  	return readEach(newSimpleDecoderFromReader(in), errorHandler, c)
   294  }
   295  
   296  // UnmarshalToChanWithoutHeaders parses the CSV from the reader and send each value in the chan c.
   297  // The channel must have a concrete type.
   298  func UnmarshalToChanWithoutHeaders(in io.Reader, c interface{}) error {
   299  	if c == nil {
   300  		return fmt.Errorf("goscv: channel is %v", c)
   301  	}
   302  	return readEachWithoutHeaders(newSimpleDecoderFromReader(in), c)
   303  }
   304  
   305  // UnmarshalDecoderToChan parses the CSV from the decoder and send each value in the chan c.
   306  // The channel must have a concrete type.
   307  func UnmarshalDecoderToChan(in SimpleDecoder, c interface{}) error {
   308  	if c == nil {
   309  		return fmt.Errorf("goscv: channel is %v", c)
   310  	}
   311  	return readEach(in, nil, c)
   312  }
   313  
   314  // UnmarshalStringToChan parses the CSV from the string and send each value in the chan c.
   315  // The channel must have a concrete type.
   316  func UnmarshalStringToChan(in string, c interface{}) error {
   317  	return UnmarshalToChan(strings.NewReader(in), c)
   318  }
   319  
   320  // UnmarshalBytesToChan parses the CSV from the bytes and send each value in the chan c.
   321  // The channel must have a concrete type.
   322  func UnmarshalBytesToChan(in []byte, c interface{}) error {
   323  	return UnmarshalToChan(bytes.NewReader(in), c)
   324  }
   325  
   326  // UnmarshalToCallback parses the CSV from the reader and send each value to the given func f.
   327  // The func must look like func(Struct).
   328  func UnmarshalToCallback(in io.Reader, f interface{}) error {
   329  	valueFunc := reflect.ValueOf(f)
   330  	t := reflect.TypeOf(f)
   331  	if t.NumIn() != 1 {
   332  		return fmt.Errorf("the given function must have exactly one parameter")
   333  	}
   334  	cerr := make(chan error)
   335  	c := reflect.MakeChan(reflect.ChanOf(reflect.BothDir, t.In(0)), 0)
   336  	go func() {
   337  		cerr <- UnmarshalToChan(in, c.Interface())
   338  	}()
   339  	for {
   340  		select {
   341  		case err := <-cerr:
   342  			return err
   343  		default:
   344  		}
   345  		v, notClosed := c.Recv()
   346  		if !notClosed || v.Interface() == nil {
   347  			break
   348  		}
   349  		callResults := valueFunc.Call([]reflect.Value{v})
   350  		// if last returned value from Call() is an error, return it
   351  		if len(callResults) > 0 {
   352  			if err, ok := callResults[len(callResults)-1].Interface().(error); ok {
   353  				return err
   354  			}
   355  		}
   356  	}
   357  	return <-cerr
   358  }
   359  
   360  // UnmarshalDecoderToCallback parses the CSV from the decoder and send each value to the given func f.
   361  // The func must look like func(Struct).
   362  func UnmarshalDecoderToCallback(in SimpleDecoder, f interface{}) error {
   363  	valueFunc := reflect.ValueOf(f)
   364  	t := reflect.TypeOf(f)
   365  	if t.NumIn() != 1 {
   366  		return fmt.Errorf("the given function must have exactly one parameter")
   367  	}
   368  	cerr := make(chan error)
   369  	c := reflect.MakeChan(reflect.ChanOf(reflect.BothDir, t.In(0)), 0)
   370  	go func() {
   371  		cerr <- UnmarshalDecoderToChan(in, c.Interface())
   372  	}()
   373  	for {
   374  		select {
   375  		case err := <-cerr:
   376  			return err
   377  		default:
   378  		}
   379  		v, notClosed := c.Recv()
   380  		if !notClosed || v.Interface() == nil {
   381  			break
   382  		}
   383  		valueFunc.Call([]reflect.Value{v})
   384  	}
   385  	return <-cerr
   386  }
   387  
   388  // UnmarshalBytesToCallback parses the CSV from the bytes and send each value to the given func f.
   389  // The func must look like func(Struct).
   390  func UnmarshalBytesToCallback(in []byte, f interface{}) error {
   391  	return UnmarshalToCallback(bytes.NewReader(in), f)
   392  }
   393  
   394  // UnmarshalStringToCallback parses the CSV from the string and send each value to the given func f.
   395  // The func must look like func(Struct).
   396  func UnmarshalStringToCallback(in string, c interface{}) (err error) {
   397  	return UnmarshalToCallback(strings.NewReader(in), c)
   398  }
   399  
   400  // UnmarshalToCallbackWithError parses the CSV from the reader and
   401  // send each value to the given func f.
   402  //
   403  // If func returns error, it will stop processing, drain the
   404  // parser and propagate the error to caller.
   405  //
   406  // The func must look like func(Struct) error.
   407  func UnmarshalToCallbackWithError(in io.Reader, f interface{}) error {
   408  	valueFunc := reflect.ValueOf(f)
   409  	t := reflect.TypeOf(f)
   410  	if t.NumIn() != 1 {
   411  		return fmt.Errorf("the given function must have exactly one parameter")
   412  	}
   413  	if t.NumOut() != 1 {
   414  		return fmt.Errorf("the given function must have exactly one return value")
   415  	}
   416  	if !isErrorType(t.Out(0)) {
   417  		return fmt.Errorf("the given function must only return error")
   418  	}
   419  
   420  	cerr := make(chan error)
   421  	c := reflect.MakeChan(reflect.ChanOf(reflect.BothDir, t.In(0)), 0)
   422  	go func() {
   423  		cerr <- UnmarshalToChan(in, c.Interface())
   424  	}()
   425  
   426  	var fErr error
   427  	for {
   428  		select {
   429  		case err := <-cerr:
   430  			if err != nil {
   431  				return err
   432  			}
   433  			return fErr
   434  		default:
   435  		}
   436  		v, notClosed := c.Recv()
   437  		if !notClosed || v.Interface() == nil {
   438  			if err := <-cerr; err != nil {
   439  				fErr = err
   440  			}
   441  			break
   442  		}
   443  
   444  		// callback f has already returned an error, stop processing but keep draining the chan c
   445  		if fErr != nil {
   446  			continue
   447  		}
   448  
   449  		results := valueFunc.Call([]reflect.Value{v})
   450  
   451  		// If the callback f returns an error, stores it and returns it in future.
   452  		errValue := results[0].Interface()
   453  		if errValue != nil {
   454  			fErr = errValue.(error)
   455  		}
   456  	}
   457  	return fErr
   458  }
   459  
   460  // UnmarshalBytesToCallbackWithError parses the CSV from the bytes and
   461  // send each value to the given func f.
   462  //
   463  // If func returns error, it will stop processing, drain the
   464  // parser and propagate the error to caller.
   465  //
   466  // The func must look like func(Struct) error.
   467  func UnmarshalBytesToCallbackWithError(in []byte, f interface{}) error {
   468  	return UnmarshalToCallbackWithError(bytes.NewReader(in), f)
   469  }
   470  
   471  // UnmarshalStringToCallbackWithError parses the CSV from the string and
   472  // send each value to the given func f.
   473  //
   474  // If func returns error, it will stop processing, drain the
   475  // parser and propagate the error to caller.
   476  //
   477  // The func must look like func(Struct) error.
   478  func UnmarshalStringToCallbackWithError(in string, c interface{}) (err error) {
   479  	return UnmarshalToCallbackWithError(strings.NewReader(in), c)
   480  }
   481  
   482  // CSVToMap creates a simple map from a CSV of 2 columns.
   483  func CSVToMap(in io.Reader) (map[string]string, error) {
   484  	decoder := newSimpleDecoderFromReader(in)
   485  	header, err := decoder.GetCSVRow()
   486  	if err != nil {
   487  		return nil, err
   488  	}
   489  	if len(header) != 2 {
   490  		return nil, fmt.Errorf("maps can only be created for csv of two columns")
   491  	}
   492  	m := make(map[string]string)
   493  	for {
   494  		line, err := decoder.GetCSVRow()
   495  		if err == io.EOF {
   496  			break
   497  		} else if err != nil {
   498  			return nil, err
   499  		}
   500  		m[line[0]] = line[1]
   501  	}
   502  	return m, nil
   503  }
   504  
   505  // CSVToMaps takes a reader and returns an array of dictionaries, using the header row as the keys
   506  func CSVToMaps(reader io.Reader) ([]map[string]string, error) {
   507  	r := getCSVReader(reader)
   508  	rows := []map[string]string{}
   509  	var header []string
   510  	for {
   511  		record, err := r.Read()
   512  		if err == io.EOF {
   513  			break
   514  		}
   515  		if err != nil {
   516  			return nil, err
   517  		}
   518  		if header == nil {
   519  			header = record
   520  		} else {
   521  			dict := map[string]string{}
   522  			for i := range header {
   523  				dict[header[i]] = record[i]
   524  			}
   525  			rows = append(rows, dict)
   526  		}
   527  	}
   528  	return rows, nil
   529  }
   530  
   531  // CSVToChanMaps parses the CSV from the reader and send a dictionary in the chan c, using the header row as the keys.
   532  func CSVToChanMaps(reader io.Reader, c chan<- map[string]string) error {
   533  	r := csv.NewReader(reader)
   534  	var header []string
   535  	for {
   536  		record, err := r.Read()
   537  		if err == io.EOF {
   538  			break
   539  		}
   540  		if err != nil {
   541  			return err
   542  		}
   543  		if header == nil {
   544  			header = record
   545  		} else {
   546  			dict := map[string]string{}
   547  			for i := range header {
   548  				dict[header[i]] = record[i]
   549  			}
   550  			c <- dict
   551  		}
   552  	}
   553  	return nil
   554  }