github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/util/encoding/csv/writer.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // Copyright 2011 The Go Authors. All rights reserved.
    12  // Use of this source code is governed by a BSD-style
    13  // license that can be found in licenses/BSD-golang.txt.
    14  
    15  package csv
    16  
    17  import (
    18  	"bufio"
    19  	"io"
    20  	"strings"
    21  	"unicode"
    22  	"unicode/utf8"
    23  )
    24  
    25  // A Writer writes records to a CSV encoded file.
    26  //
    27  // As returned by NewWriter, a Writer writes records terminated by a
    28  // newline and uses ',' as the field delimiter. The exported fields can be
    29  // changed to customize the details before the first call to Write or WriteAll.
    30  //
    31  // Comma is the field delimiter.
    32  //
    33  // If UseCRLF is true, the Writer ends each record with \r\n instead of \n.
    34  type Writer struct {
    35  	Comma   rune // Field delimiter (set to ',' by NewWriter)
    36  	UseCRLF bool // True to use \r\n as the line terminator
    37  	w       *bufio.Writer
    38  }
    39  
    40  // NewWriter returns a new Writer that writes to w.
    41  func NewWriter(w io.Writer) *Writer {
    42  	return &Writer{
    43  		Comma: ',',
    44  		w:     bufio.NewWriter(w),
    45  	}
    46  }
    47  
    48  // Writer writes a single CSV record to w along with any necessary quoting.
    49  // A record is a slice of strings with each string being one field.
    50  func (w *Writer) Write(record []string) error {
    51  	if !validDelim(w.Comma) {
    52  		return errInvalidDelim
    53  	}
    54  
    55  	for n, field := range record {
    56  		if n > 0 {
    57  			if _, err := w.w.WriteRune(w.Comma); err != nil {
    58  				return err
    59  			}
    60  		}
    61  
    62  		// If we don't have to have a quoted field then just
    63  		// write out the field and continue to the next field.
    64  		if !w.fieldNeedsQuotes(field) {
    65  			if _, err := w.w.WriteString(field); err != nil {
    66  				return err
    67  			}
    68  			continue
    69  		}
    70  		if err := w.w.WriteByte('"'); err != nil {
    71  			return err
    72  		}
    73  
    74  		for _, r1 := range field {
    75  			var err error
    76  			switch r1 {
    77  			case '"':
    78  				_, err = w.w.WriteString(`""`)
    79  			case '\r':
    80  				if !w.UseCRLF {
    81  					err = w.w.WriteByte('\r')
    82  				}
    83  			case '\n':
    84  				if w.UseCRLF {
    85  					_, err = w.w.WriteString("\r\n")
    86  				} else {
    87  					err = w.w.WriteByte('\n')
    88  				}
    89  			default:
    90  				_, err = w.w.WriteRune(r1)
    91  			}
    92  			if err != nil {
    93  				return err
    94  			}
    95  		}
    96  
    97  		if err := w.w.WriteByte('"'); err != nil {
    98  			return err
    99  		}
   100  	}
   101  	var err error
   102  	if w.UseCRLF {
   103  		_, err = w.w.WriteString("\r\n")
   104  	} else {
   105  		err = w.w.WriteByte('\n')
   106  	}
   107  	return err
   108  }
   109  
   110  // Flush writes any buffered data to the underlying io.Writer.
   111  // To check if an error occurred during the Flush, call Error.
   112  func (w *Writer) Flush() {
   113  	w.w.Flush()
   114  }
   115  
   116  // Error reports any error that has occurred during a previous Write or Flush.
   117  func (w *Writer) Error() error {
   118  	_, err := w.w.Write(nil)
   119  	return err
   120  }
   121  
   122  // WriteAll writes multiple CSV records to w using Write and then calls Flush.
   123  func (w *Writer) WriteAll(records [][]string) error {
   124  	for _, record := range records {
   125  		err := w.Write(record)
   126  		if err != nil {
   127  			return err
   128  		}
   129  	}
   130  	return w.w.Flush()
   131  }
   132  
   133  // fieldNeedsQuotes reports whether our field must be enclosed in quotes.
   134  // Fields with a Comma, fields with a quote or newline, and
   135  // fields which start with a space must be enclosed in quotes.
   136  // We used to quote empty strings, but we do not anymore (as of Go 1.4).
   137  // The two representations should be equivalent, but Postgres distinguishes
   138  // quoted vs non-quoted empty string during database imports, and it has
   139  // an option to force the quoted behavior for non-quoted CSV but it has
   140  // no option to force the non-quoted behavior for quoted CSV, making
   141  // CSV with quoted empty strings strictly less useful.
   142  // Not quoting the empty string also makes this package match the behavior
   143  // of Microsoft Excel and Google Drive.
   144  // For Postgres, quote the data terminating string `\.`.
   145  func (w *Writer) fieldNeedsQuotes(field string) bool {
   146  	if field == "" {
   147  		return false
   148  	}
   149  	if field == `\.` || strings.ContainsRune(field, w.Comma) || strings.ContainsAny(field, "\"\r\n") {
   150  		return true
   151  	}
   152  
   153  	r1, _ := utf8.DecodeRuneInString(field)
   154  	return unicode.IsSpace(r1)
   155  }