gitee.com/quant1x/pkg@v0.2.8/gocsv/csv.go (about) 1 // Copyright 2014 Jonathan Picques. All rights reserved. 2 // Use of this source code is governed by a MIT license 3 // The license can be found in the LICENSE file. 4 5 // The GoCSV package aims to provide easy CSV serialization and deserialization to the golang programming language 6 7 package gocsv 8 9 import ( 10 "bytes" 11 "encoding/csv" 12 "fmt" 13 "io" 14 "mime/multipart" 15 "os" 16 "reflect" 17 "strings" 18 "sync" 19 ) 20 21 // FailIfUnmatchedStructTags indicates whether it is considered an error when there is an unmatched 22 // struct tag. 23 var FailIfUnmatchedStructTags = false 24 25 // FailIfDoubleHeaderNames indicates whether it is considered an error when a header name is repeated 26 // in the csv header. 27 var FailIfDoubleHeaderNames = false 28 29 // ShouldAlignDuplicateHeadersWithStructFieldOrder indicates whether we should align duplicate CSV 30 // headers per their alignment in the struct definition. 31 var ShouldAlignDuplicateHeadersWithStructFieldOrder = false 32 33 // TagName defines key in the struct field's tag to scan 34 var TagName = "csv" 35 36 // TagSeparator defines seperator string for multiple csv tags in struct fields 37 var TagSeparator = "," 38 39 // FieldSeperator defines how to combine parent struct with child struct 40 var FieldsCombiner = "." 41 42 // Normalizer is a function that takes and returns a string. It is applied to 43 // struct and header field values before they are compared. It can be used to alter 44 // names for comparison. For instance, you could allow case insensitive matching 45 // or convert '-' to '_'. 46 type Normalizer func(string) string 47 48 type ErrorHandler func(*csv.ParseError) bool 49 50 // normalizeName function initially set to a nop Normalizer. 51 var normalizeName = DefaultNameNormalizer() 52 53 // DefaultNameNormalizer is a nop Normalizer. 54 func DefaultNameNormalizer() Normalizer { return func(s string) string { return s } } 55 56 // SetHeaderNormalizer sets the normalizer used to normalize struct and header field names. 57 func SetHeaderNormalizer(f Normalizer) { 58 normalizeName = f 59 // Need to clear the cache hen the header normalizer changes. 60 structInfoCache = sync.Map{} 61 } 62 63 // -------------------------------------------------------------------------- 64 // CSVWriter used to format CSV 65 66 var selfCSVWriter = DefaultCSVWriter 67 68 // DefaultCSVWriter is the default SafeCSVWriter used to format CSV (cf. csv.NewWriter) 69 func DefaultCSVWriter(out io.Writer) *SafeCSVWriter { 70 writer := NewSafeCSVWriter(csv.NewWriter(out)) 71 72 // As only one rune can be defined as a CSV separator, we are going to trim 73 // the custom tag separator and use the first rune. 74 if runes := []rune(strings.TrimSpace(TagSeparator)); len(runes) > 0 { 75 writer.Comma = runes[0] 76 } 77 78 return writer 79 } 80 81 // SetCSVWriter sets the SafeCSVWriter used to format CSV. 82 func SetCSVWriter(csvWriter func(io.Writer) *SafeCSVWriter) { 83 selfCSVWriter = csvWriter 84 } 85 86 func getCSVWriter(out io.Writer) *SafeCSVWriter { 87 return selfCSVWriter(out) 88 } 89 90 // -------------------------------------------------------------------------- 91 // CSVReader used to parse CSV 92 93 var selfCSVReader = DefaultCSVReader 94 95 // DefaultCSVReader is the default CSV reader used to parse CSV (cf. csv.NewReader) 96 func DefaultCSVReader(in io.Reader) CSVReader { 97 return csv.NewReader(in) 98 } 99 100 // LazyCSVReader returns a lazy CSV reader, with LazyQuotes and TrimLeadingSpace. 101 func LazyCSVReader(in io.Reader) CSVReader { 102 csvReader := csv.NewReader(in) 103 csvReader.LazyQuotes = true 104 csvReader.TrimLeadingSpace = true 105 return csvReader 106 } 107 108 // SetCSVReader sets the CSV reader used to parse CSV. 109 func SetCSVReader(csvReader func(io.Reader) CSVReader) { 110 selfCSVReader = csvReader 111 } 112 113 func getCSVReader(in io.Reader) CSVReader { 114 return selfCSVReader(in) 115 } 116 117 // -------------------------------------------------------------------------- 118 // Marshal functions 119 120 // MarshalFile saves the interface as CSV in the file. 121 func MarshalFile(in interface{}, file *os.File) (err error) { 122 return Marshal(in, file) 123 } 124 125 // MarshalString returns the CSV string from the interface. 126 func MarshalString(in interface{}) (out string, err error) { 127 bufferString := bytes.NewBufferString(out) 128 if err := Marshal(in, bufferString); err != nil { 129 return "", err 130 } 131 return bufferString.String(), nil 132 } 133 134 // MarshalStringWithoutHeaders returns the CSV string from the interface. 135 func MarshalStringWithoutHeaders(in interface{}) (out string, err error) { 136 bufferString := bytes.NewBufferString(out) 137 if err := MarshalWithoutHeaders(in, bufferString); err != nil { 138 return "", err 139 } 140 return bufferString.String(), nil 141 } 142 143 // MarshalBytes returns the CSV bytes from the interface. 144 func MarshalBytes(in interface{}) (out []byte, err error) { 145 bufferString := bytes.NewBuffer(out) 146 if err := Marshal(in, bufferString); err != nil { 147 return nil, err 148 } 149 return bufferString.Bytes(), nil 150 } 151 152 // Marshal returns the CSV in writer from the interface. 153 func Marshal(in interface{}, out io.Writer) (err error) { 154 writer := getCSVWriter(out) 155 return writeTo(writer, in, false) 156 } 157 158 // MarshalWithoutHeaders returns the CSV in writer from the interface. 159 func MarshalWithoutHeaders(in interface{}, out io.Writer) (err error) { 160 writer := getCSVWriter(out) 161 return writeTo(writer, in, true) 162 } 163 164 // MarshalChan returns the CSV read from the channel. 165 func MarshalChan(c <-chan interface{}, out CSVWriter) error { 166 return writeFromChan(out, c, false) 167 } 168 169 // MarshalChanWithoutHeaders returns the CSV read from the channel. 170 func MarshalChanWithoutHeaders(c <-chan interface{}, out CSVWriter) error { 171 return writeFromChan(out, c, true) 172 } 173 174 // MarshalCSV returns the CSV in writer from the interface. 175 func MarshalCSV(in interface{}, out CSVWriter) (err error) { 176 return writeTo(out, in, false) 177 } 178 179 // MarshalCSVWithoutHeaders returns the CSV in writer from the interface. 180 func MarshalCSVWithoutHeaders(in interface{}, out CSVWriter) (err error) { 181 return writeTo(out, in, true) 182 } 183 184 // -------------------------------------------------------------------------- 185 // Unmarshal functions 186 187 // UnmarshalFile parses the CSV from the file in the interface. 188 func UnmarshalFile(in *os.File, out interface{}) error { 189 return Unmarshal(in, out) 190 } 191 192 // UnmarshalMultipartFile parses the CSV from the multipart file in the interface. 193 func UnmarshalMultipartFile(in *multipart.File, out interface{}) error { 194 return Unmarshal(convertTo(in), out) 195 } 196 197 // UnmarshalFileWithErrorHandler parses the CSV from the file in the interface. 198 func UnmarshalFileWithErrorHandler(in *os.File, errHandler ErrorHandler, out interface{}) error { 199 return UnmarshalWithErrorHandler(in, errHandler, out) 200 } 201 202 // UnmarshalString parses the CSV from the string in the interface. 203 func UnmarshalString(in string, out interface{}) error { 204 return Unmarshal(strings.NewReader(in), out) 205 } 206 207 // UnmarshalBytes parses the CSV from the bytes in the interface. 208 func UnmarshalBytes(in []byte, out interface{}) error { 209 return Unmarshal(bytes.NewReader(in), out) 210 } 211 212 // Unmarshal parses the CSV from the reader in the interface. 213 func Unmarshal(in io.Reader, out interface{}) error { 214 return readTo(newSimpleDecoderFromReader(in), out) 215 } 216 217 // Unmarshal parses the CSV from the reader in the interface. 218 func UnmarshalWithErrorHandler(in io.Reader, errHandle ErrorHandler, out interface{}) error { 219 return readToWithErrorHandler(newSimpleDecoderFromReader(in), errHandle, out) 220 } 221 222 // UnmarshalWithoutHeaders parses the CSV from the reader in the interface. 223 func UnmarshalWithoutHeaders(in io.Reader, out interface{}) error { 224 return readToWithoutHeaders(newSimpleDecoderFromReader(in), out) 225 } 226 227 // UnmarshalCSVWithoutHeaders parses a headerless CSV with passed in CSV reader 228 func UnmarshalCSVWithoutHeaders(in CSVReader, out interface{}) error { 229 return readToWithoutHeaders(csvDecoder{in}, out) 230 } 231 232 // UnmarshalDecoder parses the CSV from the decoder in the interface 233 func UnmarshalDecoder(in Decoder, out interface{}) error { 234 return readTo(in, out) 235 } 236 237 // UnmarshalCSV parses the CSV from the reader in the interface. 238 func UnmarshalCSV(in CSVReader, out interface{}) error { 239 return readTo(csvDecoder{in}, out) 240 } 241 242 // UnmarshalCSVToMap parses a CSV of 2 columns into a map. 243 func UnmarshalCSVToMap(in CSVReader, out interface{}) error { 244 decoder := NewSimpleDecoderFromCSVReader(in) 245 header, err := decoder.GetCSVRow() 246 if err != nil { 247 return err 248 } 249 if len(header) != 2 { 250 return fmt.Errorf("maps can only be created for csv of two columns") 251 } 252 outValue, outType := getConcreteReflectValueAndType(out) 253 if outType.Kind() != reflect.Map { 254 return fmt.Errorf("cannot use " + outType.String() + ", only map supported") 255 } 256 keyType := outType.Key() 257 valueType := outType.Elem() 258 outValue.Set(reflect.MakeMap(outType)) 259 for { 260 key := reflect.New(keyType) 261 value := reflect.New(valueType) 262 line, err := decoder.GetCSVRow() 263 if err == io.EOF { 264 break 265 } else if err != nil { 266 return err 267 } 268 if err := setField(key, line[0], false); err != nil { 269 return err 270 } 271 if err := setField(value, line[1], false); err != nil { 272 return err 273 } 274 outValue.SetMapIndex(key.Elem(), value.Elem()) 275 } 276 return nil 277 } 278 279 // UnmarshalToChan parses the CSV from the reader and send each value in the chan c. 280 // The channel must have a concrete type. 281 func UnmarshalToChan(in io.Reader, c interface{}) error { 282 if c == nil { 283 return fmt.Errorf("goscv: channel is %v", c) 284 } 285 return readEach(newSimpleDecoderFromReader(in), nil, c) 286 } 287 288 // UnmarshalToChanWithErrorHandler parses the CSV from the reader in the interface. 289 func UnmarshalToChanWithErrorHandler(in io.Reader, errorHandler ErrorHandler, c interface{}) error { 290 if c == nil { 291 return fmt.Errorf("goscv: channel is %v", c) 292 } 293 return readEach(newSimpleDecoderFromReader(in), errorHandler, c) 294 } 295 296 // UnmarshalToChanWithoutHeaders parses the CSV from the reader and send each value in the chan c. 297 // The channel must have a concrete type. 298 func UnmarshalToChanWithoutHeaders(in io.Reader, c interface{}) error { 299 if c == nil { 300 return fmt.Errorf("goscv: channel is %v", c) 301 } 302 return readEachWithoutHeaders(newSimpleDecoderFromReader(in), c) 303 } 304 305 // UnmarshalDecoderToChan parses the CSV from the decoder and send each value in the chan c. 306 // The channel must have a concrete type. 307 func UnmarshalDecoderToChan(in SimpleDecoder, c interface{}) error { 308 if c == nil { 309 return fmt.Errorf("goscv: channel is %v", c) 310 } 311 return readEach(in, nil, c) 312 } 313 314 // UnmarshalStringToChan parses the CSV from the string and send each value in the chan c. 315 // The channel must have a concrete type. 316 func UnmarshalStringToChan(in string, c interface{}) error { 317 return UnmarshalToChan(strings.NewReader(in), c) 318 } 319 320 // UnmarshalBytesToChan parses the CSV from the bytes and send each value in the chan c. 321 // The channel must have a concrete type. 322 func UnmarshalBytesToChan(in []byte, c interface{}) error { 323 return UnmarshalToChan(bytes.NewReader(in), c) 324 } 325 326 // UnmarshalToCallback parses the CSV from the reader and send each value to the given func f. 327 // The func must look like func(Struct). 328 func UnmarshalToCallback(in io.Reader, f interface{}) error { 329 valueFunc := reflect.ValueOf(f) 330 t := reflect.TypeOf(f) 331 if t.NumIn() != 1 { 332 return fmt.Errorf("the given function must have exactly one parameter") 333 } 334 cerr := make(chan error) 335 c := reflect.MakeChan(reflect.ChanOf(reflect.BothDir, t.In(0)), 0) 336 go func() { 337 cerr <- UnmarshalToChan(in, c.Interface()) 338 }() 339 for { 340 select { 341 case err := <-cerr: 342 return err 343 default: 344 } 345 v, notClosed := c.Recv() 346 if !notClosed || v.Interface() == nil { 347 break 348 } 349 callResults := valueFunc.Call([]reflect.Value{v}) 350 // if last returned value from Call() is an error, return it 351 if len(callResults) > 0 { 352 if err, ok := callResults[len(callResults)-1].Interface().(error); ok { 353 return err 354 } 355 } 356 } 357 return <-cerr 358 } 359 360 // UnmarshalDecoderToCallback parses the CSV from the decoder and send each value to the given func f. 361 // The func must look like func(Struct). 362 func UnmarshalDecoderToCallback(in SimpleDecoder, f interface{}) error { 363 valueFunc := reflect.ValueOf(f) 364 t := reflect.TypeOf(f) 365 if t.NumIn() != 1 { 366 return fmt.Errorf("the given function must have exactly one parameter") 367 } 368 cerr := make(chan error) 369 c := reflect.MakeChan(reflect.ChanOf(reflect.BothDir, t.In(0)), 0) 370 go func() { 371 cerr <- UnmarshalDecoderToChan(in, c.Interface()) 372 }() 373 for { 374 select { 375 case err := <-cerr: 376 return err 377 default: 378 } 379 v, notClosed := c.Recv() 380 if !notClosed || v.Interface() == nil { 381 break 382 } 383 valueFunc.Call([]reflect.Value{v}) 384 } 385 return <-cerr 386 } 387 388 // UnmarshalBytesToCallback parses the CSV from the bytes and send each value to the given func f. 389 // The func must look like func(Struct). 390 func UnmarshalBytesToCallback(in []byte, f interface{}) error { 391 return UnmarshalToCallback(bytes.NewReader(in), f) 392 } 393 394 // UnmarshalStringToCallback parses the CSV from the string and send each value to the given func f. 395 // The func must look like func(Struct). 396 func UnmarshalStringToCallback(in string, c interface{}) (err error) { 397 return UnmarshalToCallback(strings.NewReader(in), c) 398 } 399 400 // UnmarshalToCallbackWithError parses the CSV from the reader and 401 // send each value to the given func f. 402 // 403 // If func returns error, it will stop processing, drain the 404 // parser and propagate the error to caller. 405 // 406 // The func must look like func(Struct) error. 407 func UnmarshalToCallbackWithError(in io.Reader, f interface{}) error { 408 valueFunc := reflect.ValueOf(f) 409 t := reflect.TypeOf(f) 410 if t.NumIn() != 1 { 411 return fmt.Errorf("the given function must have exactly one parameter") 412 } 413 if t.NumOut() != 1 { 414 return fmt.Errorf("the given function must have exactly one return value") 415 } 416 if !isErrorType(t.Out(0)) { 417 return fmt.Errorf("the given function must only return error") 418 } 419 420 cerr := make(chan error) 421 c := reflect.MakeChan(reflect.ChanOf(reflect.BothDir, t.In(0)), 0) 422 go func() { 423 cerr <- UnmarshalToChan(in, c.Interface()) 424 }() 425 426 var fErr error 427 for { 428 select { 429 case err := <-cerr: 430 if err != nil { 431 return err 432 } 433 return fErr 434 default: 435 } 436 v, notClosed := c.Recv() 437 if !notClosed || v.Interface() == nil { 438 if err := <-cerr; err != nil { 439 fErr = err 440 } 441 break 442 } 443 444 // callback f has already returned an error, stop processing but keep draining the chan c 445 if fErr != nil { 446 continue 447 } 448 449 results := valueFunc.Call([]reflect.Value{v}) 450 451 // If the callback f returns an error, stores it and returns it in future. 452 errValue := results[0].Interface() 453 if errValue != nil { 454 fErr = errValue.(error) 455 } 456 } 457 return fErr 458 } 459 460 // UnmarshalBytesToCallbackWithError parses the CSV from the bytes and 461 // send each value to the given func f. 462 // 463 // If func returns error, it will stop processing, drain the 464 // parser and propagate the error to caller. 465 // 466 // The func must look like func(Struct) error. 467 func UnmarshalBytesToCallbackWithError(in []byte, f interface{}) error { 468 return UnmarshalToCallbackWithError(bytes.NewReader(in), f) 469 } 470 471 // UnmarshalStringToCallbackWithError parses the CSV from the string and 472 // send each value to the given func f. 473 // 474 // If func returns error, it will stop processing, drain the 475 // parser and propagate the error to caller. 476 // 477 // The func must look like func(Struct) error. 478 func UnmarshalStringToCallbackWithError(in string, c interface{}) (err error) { 479 return UnmarshalToCallbackWithError(strings.NewReader(in), c) 480 } 481 482 // CSVToMap creates a simple map from a CSV of 2 columns. 483 func CSVToMap(in io.Reader) (map[string]string, error) { 484 decoder := newSimpleDecoderFromReader(in) 485 header, err := decoder.GetCSVRow() 486 if err != nil { 487 return nil, err 488 } 489 if len(header) != 2 { 490 return nil, fmt.Errorf("maps can only be created for csv of two columns") 491 } 492 m := make(map[string]string) 493 for { 494 line, err := decoder.GetCSVRow() 495 if err == io.EOF { 496 break 497 } else if err != nil { 498 return nil, err 499 } 500 m[line[0]] = line[1] 501 } 502 return m, nil 503 } 504 505 // CSVToMaps takes a reader and returns an array of dictionaries, using the header row as the keys 506 func CSVToMaps(reader io.Reader) ([]map[string]string, error) { 507 r := getCSVReader(reader) 508 rows := []map[string]string{} 509 var header []string 510 for { 511 record, err := r.Read() 512 if err == io.EOF { 513 break 514 } 515 if err != nil { 516 return nil, err 517 } 518 if header == nil { 519 header = record 520 } else { 521 dict := map[string]string{} 522 for i := range header { 523 dict[header[i]] = record[i] 524 } 525 rows = append(rows, dict) 526 } 527 } 528 return rows, nil 529 } 530 531 // CSVToChanMaps parses the CSV from the reader and send a dictionary in the chan c, using the header row as the keys. 532 func CSVToChanMaps(reader io.Reader, c chan<- map[string]string) error { 533 r := csv.NewReader(reader) 534 var header []string 535 for { 536 record, err := r.Read() 537 if err == io.EOF { 538 break 539 } 540 if err != nil { 541 return err 542 } 543 if header == nil { 544 header = record 545 } else { 546 dict := map[string]string{} 547 for i := range header { 548 dict[header[i]] = record[i] 549 } 550 c <- dict 551 } 552 } 553 return nil 554 }