github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/encoding/csv/reader_test.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package csv
     6  
     7  import (
     8  	"io"
     9  	"reflect"
    10  	"strings"
    11  	"testing"
    12  	"unicode/utf8"
    13  )
    14  
    15  func TestRead(t *testing.T) {
    16  	tests := []struct {
    17  		Name   string
    18  		Input  string
    19  		Output [][]string
    20  		Error  error
    21  
    22  		// These fields are copied into the Reader
    23  		Comma              rune
    24  		Comment            rune
    25  		UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
    26  		FieldsPerRecord    int
    27  		LazyQuotes         bool
    28  		TrimLeadingSpace   bool
    29  		ReuseRecord        bool
    30  	}{{
    31  		Name:   "Simple",
    32  		Input:  "a,b,c\n",
    33  		Output: [][]string{{"a", "b", "c"}},
    34  	}, {
    35  		Name:   "CRLF",
    36  		Input:  "a,b\r\nc,d\r\n",
    37  		Output: [][]string{{"a", "b"}, {"c", "d"}},
    38  	}, {
    39  		Name:   "BareCR",
    40  		Input:  "a,b\rc,d\r\n",
    41  		Output: [][]string{{"a", "b\rc", "d"}},
    42  	}, {
    43  		Name: "RFC4180test",
    44  		Input: `#field1,field2,field3
    45  "aaa","bb
    46  b","ccc"
    47  "a,a","b""bb","ccc"
    48  zzz,yyy,xxx
    49  `,
    50  		Output: [][]string{
    51  			{"#field1", "field2", "field3"},
    52  			{"aaa", "bb\nb", "ccc"},
    53  			{"a,a", `b"bb`, "ccc"},
    54  			{"zzz", "yyy", "xxx"},
    55  		},
    56  		UseFieldsPerRecord: true,
    57  		FieldsPerRecord:    0,
    58  	}, {
    59  		Name:   "NoEOLTest",
    60  		Input:  "a,b,c",
    61  		Output: [][]string{{"a", "b", "c"}},
    62  	}, {
    63  		Name:   "Semicolon",
    64  		Input:  "a;b;c\n",
    65  		Output: [][]string{{"a", "b", "c"}},
    66  		Comma:  ';',
    67  	}, {
    68  		Name: "MultiLine",
    69  		Input: `"two
    70  line","one line","three
    71  line
    72  field"`,
    73  		Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
    74  	}, {
    75  		Name:  "BlankLine",
    76  		Input: "a,b,c\n\nd,e,f\n\n",
    77  		Output: [][]string{
    78  			{"a", "b", "c"},
    79  			{"d", "e", "f"},
    80  		},
    81  	}, {
    82  		Name:  "BlankLineFieldCount",
    83  		Input: "a,b,c\n\nd,e,f\n\n",
    84  		Output: [][]string{
    85  			{"a", "b", "c"},
    86  			{"d", "e", "f"},
    87  		},
    88  		UseFieldsPerRecord: true,
    89  		FieldsPerRecord:    0,
    90  	}, {
    91  		Name:             "TrimSpace",
    92  		Input:            " a,  b,   c\n",
    93  		Output:           [][]string{{"a", "b", "c"}},
    94  		TrimLeadingSpace: true,
    95  	}, {
    96  		Name:   "LeadingSpace",
    97  		Input:  " a,  b,   c\n",
    98  		Output: [][]string{{" a", "  b", "   c"}},
    99  	}, {
   100  		Name:    "Comment",
   101  		Input:   "#1,2,3\na,b,c\n#comment",
   102  		Output:  [][]string{{"a", "b", "c"}},
   103  		Comment: '#',
   104  	}, {
   105  		Name:   "NoComment",
   106  		Input:  "#1,2,3\na,b,c",
   107  		Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
   108  	}, {
   109  		Name:       "LazyQuotes",
   110  		Input:      `a "word","1"2",a","b`,
   111  		Output:     [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
   112  		LazyQuotes: true,
   113  	}, {
   114  		Name:       "BareQuotes",
   115  		Input:      `a "word","1"2",a"`,
   116  		Output:     [][]string{{`a "word"`, `1"2`, `a"`}},
   117  		LazyQuotes: true,
   118  	}, {
   119  		Name:       "BareDoubleQuotes",
   120  		Input:      `a""b,c`,
   121  		Output:     [][]string{{`a""b`, `c`}},
   122  		LazyQuotes: true,
   123  	}, {
   124  		Name:  "BadDoubleQuotes",
   125  		Input: `a""b,c`,
   126  		Error: &ParseError{RecordLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
   127  	}, {
   128  		Name:             "TrimQuote",
   129  		Input:            ` "a"," b",c`,
   130  		Output:           [][]string{{"a", " b", "c"}},
   131  		TrimLeadingSpace: true,
   132  	}, {
   133  		Name:  "BadBareQuote",
   134  		Input: `a "word","b"`,
   135  		Error: &ParseError{RecordLine: 1, Line: 1, Column: 2, Err: ErrBareQuote},
   136  	}, {
   137  		Name:  "BadTrailingQuote",
   138  		Input: `"a word",b"`,
   139  		Error: &ParseError{RecordLine: 1, Line: 1, Column: 10, Err: ErrBareQuote},
   140  	}, {
   141  		Name:  "ExtraneousQuote",
   142  		Input: `"a "word","b"`,
   143  		Error: &ParseError{RecordLine: 1, Line: 1, Column: 3, Err: ErrQuote},
   144  	}, {
   145  		Name:               "BadFieldCount",
   146  		Input:              "a,b,c\nd,e",
   147  		Error:              &ParseError{RecordLine: 2, Line: 2, Err: ErrFieldCount},
   148  		UseFieldsPerRecord: true,
   149  		FieldsPerRecord:    0,
   150  	}, {
   151  		Name:               "BadFieldCount1",
   152  		Input:              `a,b,c`,
   153  		Error:              &ParseError{RecordLine: 1, Line: 1, Err: ErrFieldCount},
   154  		UseFieldsPerRecord: true,
   155  		FieldsPerRecord:    2,
   156  	}, {
   157  		Name:   "FieldCount",
   158  		Input:  "a,b,c\nd,e",
   159  		Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
   160  	}, {
   161  		Name:   "TrailingCommaEOF",
   162  		Input:  "a,b,c,",
   163  		Output: [][]string{{"a", "b", "c", ""}},
   164  	}, {
   165  		Name:   "TrailingCommaEOL",
   166  		Input:  "a,b,c,\n",
   167  		Output: [][]string{{"a", "b", "c", ""}},
   168  	}, {
   169  		Name:             "TrailingCommaSpaceEOF",
   170  		Input:            "a,b,c, ",
   171  		Output:           [][]string{{"a", "b", "c", ""}},
   172  		TrimLeadingSpace: true,
   173  	}, {
   174  		Name:             "TrailingCommaSpaceEOL",
   175  		Input:            "a,b,c, \n",
   176  		Output:           [][]string{{"a", "b", "c", ""}},
   177  		TrimLeadingSpace: true,
   178  	}, {
   179  		Name:             "TrailingCommaLine3",
   180  		Input:            "a,b,c\nd,e,f\ng,hi,",
   181  		Output:           [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
   182  		TrimLeadingSpace: true,
   183  	}, {
   184  		Name:   "NotTrailingComma3",
   185  		Input:  "a,b,c, \n",
   186  		Output: [][]string{{"a", "b", "c", " "}},
   187  	}, {
   188  		Name: "CommaFieldTest",
   189  		Input: `x,y,z,w
   190  x,y,z,
   191  x,y,,
   192  x,,,
   193  ,,,
   194  "x","y","z","w"
   195  "x","y","z",""
   196  "x","y","",""
   197  "x","","",""
   198  "","","",""
   199  `,
   200  		Output: [][]string{
   201  			{"x", "y", "z", "w"},
   202  			{"x", "y", "z", ""},
   203  			{"x", "y", "", ""},
   204  			{"x", "", "", ""},
   205  			{"", "", "", ""},
   206  			{"x", "y", "z", "w"},
   207  			{"x", "y", "z", ""},
   208  			{"x", "y", "", ""},
   209  			{"x", "", "", ""},
   210  			{"", "", "", ""},
   211  		},
   212  	}, {
   213  		Name:  "TrailingCommaIneffective1",
   214  		Input: "a,b,\nc,d,e",
   215  		Output: [][]string{
   216  			{"a", "b", ""},
   217  			{"c", "d", "e"},
   218  		},
   219  		TrimLeadingSpace: true,
   220  	}, {
   221  		Name:  "ReadAllReuseRecord",
   222  		Input: "a,b\nc,d",
   223  		Output: [][]string{
   224  			{"a", "b"},
   225  			{"c", "d"},
   226  		},
   227  		ReuseRecord: true,
   228  	}, {
   229  		Name:  "RecordLine1", // Issue 19019
   230  		Input: "a,\"b\nc\"d,e",
   231  		Error: &ParseError{RecordLine: 1, Line: 2, Column: 1, Err: ErrQuote},
   232  	}, {
   233  		Name:  "RecordLine2",
   234  		Input: "a,b\n\"d\n\n,e",
   235  		Error: &ParseError{RecordLine: 2, Line: 5, Column: 0, Err: ErrQuote},
   236  	}, {
   237  		Name:  "CRLFInQuotedField", // Issue 21201
   238  		Input: "\"Hello\r\nHi\"",
   239  		Output: [][]string{
   240  			{"Hello\r\nHi"},
   241  		},
   242  	}, {
   243  		Name:   "BinaryBlobField", // Issue 19410
   244  		Input:  "x09\x41\xb4\x1c,aktau",
   245  		Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
   246  	}, {
   247  		Name:   "TrailingCR",
   248  		Input:  "field1,field2\r",
   249  		Output: [][]string{{"field1", "field2\r"}},
   250  	}, {
   251  		Name:             "NonASCIICommaAndComment",
   252  		Input:            "a£b,c£ \td,e\n€ comment\n",
   253  		Output:           [][]string{{"a", "b,c", "d,e"}},
   254  		TrimLeadingSpace: true,
   255  		Comma:            '£',
   256  		Comment:          '€',
   257  	}, {
   258  		Name:    "NonASCIICommaAndCommentWithQuotes",
   259  		Input:   "a€\"  b,\"€ c\nλ comment\n",
   260  		Output:  [][]string{{"a", "  b,", " c"}},
   261  		Comma:   '€',
   262  		Comment: 'λ',
   263  	}, {
   264  		// λ and θ start with the same byte.
   265  		// This tests that the parser doesn't confuse such characters.
   266  		Name:    "NonASCIICommaConfusion",
   267  		Input:   "\"abθcd\"λefθgh",
   268  		Output:  [][]string{{"abθcd", "efθgh"}},
   269  		Comma:   'λ',
   270  		Comment: '€',
   271  	}, {
   272  		Name:    "NonASCIICommentConfusion",
   273  		Input:   "λ\nλ\nθ\nλ\n",
   274  		Output:  [][]string{{"λ"}, {"λ"}, {"λ"}},
   275  		Comment: 'θ',
   276  	}, {
   277  		Name:   "QuotedFieldMultipleLF",
   278  		Input:  "\"\n\n\n\n\"",
   279  		Output: [][]string{{"\n\n\n\n"}},
   280  	}, {
   281  		Name:  "MultipleCRLF",
   282  		Input: "\r\n\r\n\r\n\r\n",
   283  	}, {
   284  		// The implementation may read each line in several chunks if it doesn't fit entirely
   285  		// in the read buffer, so we should test the code to handle that condition.
   286  		Name:    "HugeLines",
   287  		Input:   strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000),
   288  		Output:  [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
   289  		Comment: '#',
   290  	}, {
   291  		Name:  "QuoteWithTrailingCRLF",
   292  		Input: "\"foo\"bar\"\r\n",
   293  		Error: &ParseError{RecordLine: 1, Line: 1, Column: 4, Err: ErrQuote},
   294  	}, {
   295  		Name:       "LazyQuoteWithTrailingCRLF",
   296  		Input:      "\"foo\"bar\"\r\n",
   297  		Output:     [][]string{{`foo"bar`}},
   298  		LazyQuotes: true,
   299  	}, {
   300  		Name:   "DoubleQuoteWithTrailingCRLF",
   301  		Input:  "\"foo\"\"bar\"\r\n",
   302  		Output: [][]string{{`foo"bar`}},
   303  	}, {
   304  		Name:   "EvenQuotes",
   305  		Input:  `""""""""`,
   306  		Output: [][]string{{`"""`}},
   307  	}, {
   308  		Name:  "OddQuotes",
   309  		Input: `"""""""`,
   310  		Error: &ParseError{RecordLine: 1, Line: 1, Column: 7, Err: ErrQuote},
   311  	}, {
   312  		Name:       "LazyOddQuotes",
   313  		Input:      `"""""""`,
   314  		Output:     [][]string{{`"""`}},
   315  		LazyQuotes: true,
   316  	}, {
   317  		Name:  "BadComma1",
   318  		Comma: '\n',
   319  		Error: errInvalidDelim,
   320  	}, {
   321  		Name:  "BadComma2",
   322  		Comma: '\r',
   323  		Error: errInvalidDelim,
   324  	}, {
   325  		Name:  "BadComma3",
   326  		Comma: utf8.RuneError,
   327  		Error: errInvalidDelim,
   328  	}, {
   329  		Name:    "BadComment1",
   330  		Comment: '\n',
   331  		Error:   errInvalidDelim,
   332  	}, {
   333  		Name:    "BadComment2",
   334  		Comment: '\r',
   335  		Error:   errInvalidDelim,
   336  	}, {
   337  		Name:    "BadComment3",
   338  		Comment: utf8.RuneError,
   339  		Error:   errInvalidDelim,
   340  	}, {
   341  		Name:    "BadCommaComment",
   342  		Comma:   'X',
   343  		Comment: 'X',
   344  		Error:   errInvalidDelim,
   345  	}}
   346  
   347  	for _, tt := range tests {
   348  		t.Run(tt.Name, func(t *testing.T) {
   349  			r := NewReader(strings.NewReader(tt.Input))
   350  
   351  			if tt.Comma != 0 {
   352  				r.Comma = tt.Comma
   353  			}
   354  			r.Comment = tt.Comment
   355  			if tt.UseFieldsPerRecord {
   356  				r.FieldsPerRecord = tt.FieldsPerRecord
   357  			} else {
   358  				r.FieldsPerRecord = -1
   359  			}
   360  			r.LazyQuotes = tt.LazyQuotes
   361  			r.TrimLeadingSpace = tt.TrimLeadingSpace
   362  			r.ReuseRecord = tt.ReuseRecord
   363  
   364  			out, err := r.ReadAll()
   365  			if !reflect.DeepEqual(err, tt.Error) {
   366  				t.Errorf("ReadAll() error:\ngot  %v\nwant %v", err, tt.Error)
   367  			} else if !reflect.DeepEqual(out, tt.Output) {
   368  				t.Errorf("ReadAll() output:\ngot  %q\nwant %q", out, tt.Output)
   369  			}
   370  		})
   371  	}
   372  }
   373  
   374  // nTimes is an io.Reader which yields the string s n times.
   375  type nTimes struct {
   376  	s   string
   377  	n   int
   378  	off int
   379  }
   380  
   381  func (r *nTimes) Read(p []byte) (n int, err error) {
   382  	for {
   383  		if r.n <= 0 || r.s == "" {
   384  			return n, io.EOF
   385  		}
   386  		n0 := copy(p, r.s[r.off:])
   387  		p = p[n0:]
   388  		n += n0
   389  		r.off += n0
   390  		if r.off == len(r.s) {
   391  			r.off = 0
   392  			r.n--
   393  		}
   394  		if len(p) == 0 {
   395  			return
   396  		}
   397  	}
   398  }
   399  
   400  // benchmarkRead measures reading the provided CSV rows data.
   401  // initReader, if non-nil, modifies the Reader before it's used.
   402  func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) {
   403  	b.ReportAllocs()
   404  	r := NewReader(&nTimes{s: rows, n: b.N})
   405  	if initReader != nil {
   406  		initReader(r)
   407  	}
   408  	for {
   409  		_, err := r.Read()
   410  		if err == io.EOF {
   411  			break
   412  		}
   413  		if err != nil {
   414  			b.Fatal(err)
   415  		}
   416  	}
   417  }
   418  
   419  const benchmarkCSVData = `x,y,z,w
   420  x,y,z,
   421  x,y,,
   422  x,,,
   423  ,,,
   424  "x","y","z","w"
   425  "x","y","z",""
   426  "x","y","",""
   427  "x","","",""
   428  "","","",""
   429  `
   430  
   431  func BenchmarkRead(b *testing.B) {
   432  	benchmarkRead(b, nil, benchmarkCSVData)
   433  }
   434  
   435  func BenchmarkReadWithFieldsPerRecord(b *testing.B) {
   436  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData)
   437  }
   438  
   439  func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) {
   440  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData)
   441  }
   442  
   443  func BenchmarkReadLargeFields(b *testing.B) {
   444  	benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   445  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
   446  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   447  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   448  `, 3))
   449  }
   450  
   451  func BenchmarkReadReuseRecord(b *testing.B) {
   452  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData)
   453  }
   454  
   455  func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) {
   456  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData)
   457  }
   458  
   459  func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) {
   460  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData)
   461  }
   462  
   463  func BenchmarkReadReuseRecordLargeFields(b *testing.B) {
   464  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   465  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
   466  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   467  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   468  `, 3))
   469  }