github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/encoding/csv/reader_test.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package csv
     6  
     7  import (
     8  	"io"
     9  	"reflect"
    10  	"strings"
    11  	"testing"
    12  )
    13  
    14  var readTests = []struct {
    15  	Name               string
    16  	Input              string
    17  	Output             [][]string
    18  	UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
    19  
    20  	// These fields are copied into the Reader
    21  	Comma            rune
    22  	Comment          rune
    23  	FieldsPerRecord  int
    24  	LazyQuotes       bool
    25  	TrailingComma    bool
    26  	TrimLeadingSpace bool
    27  	ReuseRecord      bool
    28  
    29  	Error  string
    30  	Line   int // Expected error line if != 0
    31  	Column int // Expected error column if line != 0
    32  }{
    33  	{
    34  		Name:   "Simple",
    35  		Input:  "a,b,c\n",
    36  		Output: [][]string{{"a", "b", "c"}},
    37  	},
    38  	{
    39  		Name:   "CRLF",
    40  		Input:  "a,b\r\nc,d\r\n",
    41  		Output: [][]string{{"a", "b"}, {"c", "d"}},
    42  	},
    43  	{
    44  		Name:   "BareCR",
    45  		Input:  "a,b\rc,d\r\n",
    46  		Output: [][]string{{"a", "b\rc", "d"}},
    47  	},
    48  	{
    49  		Name:               "RFC4180test",
    50  		UseFieldsPerRecord: true,
    51  		Input: `#field1,field2,field3
    52  "aaa","bb
    53  b","ccc"
    54  "a,a","b""bb","ccc"
    55  zzz,yyy,xxx
    56  `,
    57  		Output: [][]string{
    58  			{"#field1", "field2", "field3"},
    59  			{"aaa", "bb\nb", "ccc"},
    60  			{"a,a", `b"bb`, "ccc"},
    61  			{"zzz", "yyy", "xxx"},
    62  		},
    63  	},
    64  	{
    65  		Name:   "NoEOLTest",
    66  		Input:  "a,b,c",
    67  		Output: [][]string{{"a", "b", "c"}},
    68  	},
    69  	{
    70  		Name:   "Semicolon",
    71  		Comma:  ';',
    72  		Input:  "a;b;c\n",
    73  		Output: [][]string{{"a", "b", "c"}},
    74  	},
    75  	{
    76  		Name: "MultiLine",
    77  		Input: `"two
    78  line","one line","three
    79  line
    80  field"`,
    81  		Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
    82  	},
    83  	{
    84  		Name:  "BlankLine",
    85  		Input: "a,b,c\n\nd,e,f\n\n",
    86  		Output: [][]string{
    87  			{"a", "b", "c"},
    88  			{"d", "e", "f"},
    89  		},
    90  	},
    91  	{
    92  		Name:               "BlankLineFieldCount",
    93  		Input:              "a,b,c\n\nd,e,f\n\n",
    94  		UseFieldsPerRecord: true,
    95  		Output: [][]string{
    96  			{"a", "b", "c"},
    97  			{"d", "e", "f"},
    98  		},
    99  	},
   100  	{
   101  		Name:             "TrimSpace",
   102  		Input:            " a,  b,   c\n",
   103  		TrimLeadingSpace: true,
   104  		Output:           [][]string{{"a", "b", "c"}},
   105  	},
   106  	{
   107  		Name:   "LeadingSpace",
   108  		Input:  " a,  b,   c\n",
   109  		Output: [][]string{{" a", "  b", "   c"}},
   110  	},
   111  	{
   112  		Name:    "Comment",
   113  		Comment: '#',
   114  		Input:   "#1,2,3\na,b,c\n#comment",
   115  		Output:  [][]string{{"a", "b", "c"}},
   116  	},
   117  	{
   118  		Name:   "NoComment",
   119  		Input:  "#1,2,3\na,b,c",
   120  		Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
   121  	},
   122  	{
   123  		Name:       "LazyQuotes",
   124  		LazyQuotes: true,
   125  		Input:      `a "word","1"2",a","b`,
   126  		Output:     [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
   127  	},
   128  	{
   129  		Name:       "BareQuotes",
   130  		LazyQuotes: true,
   131  		Input:      `a "word","1"2",a"`,
   132  		Output:     [][]string{{`a "word"`, `1"2`, `a"`}},
   133  	},
   134  	{
   135  		Name:       "BareDoubleQuotes",
   136  		LazyQuotes: true,
   137  		Input:      `a""b,c`,
   138  		Output:     [][]string{{`a""b`, `c`}},
   139  	},
   140  	{
   141  		Name:  "BadDoubleQuotes",
   142  		Input: `a""b,c`,
   143  		Error: `bare " in non-quoted-field`, Line: 1, Column: 1,
   144  	},
   145  	{
   146  		Name:             "TrimQuote",
   147  		Input:            ` "a"," b",c`,
   148  		TrimLeadingSpace: true,
   149  		Output:           [][]string{{"a", " b", "c"}},
   150  	},
   151  	{
   152  		Name:  "BadBareQuote",
   153  		Input: `a "word","b"`,
   154  		Error: `bare " in non-quoted-field`, Line: 1, Column: 2,
   155  	},
   156  	{
   157  		Name:  "BadTrailingQuote",
   158  		Input: `"a word",b"`,
   159  		Error: `bare " in non-quoted-field`, Line: 1, Column: 10,
   160  	},
   161  	{
   162  		Name:  "ExtraneousQuote",
   163  		Input: `"a "word","b"`,
   164  		Error: `extraneous " in field`, Line: 1, Column: 3,
   165  	},
   166  	{
   167  		Name:               "BadFieldCount",
   168  		UseFieldsPerRecord: true,
   169  		Input:              "a,b,c\nd,e",
   170  		Error:              "wrong number of fields", Line: 2,
   171  	},
   172  	{
   173  		Name:               "BadFieldCount1",
   174  		UseFieldsPerRecord: true,
   175  		FieldsPerRecord:    2,
   176  		Input:              `a,b,c`,
   177  		Error:              "wrong number of fields", Line: 1,
   178  	},
   179  	{
   180  		Name:   "FieldCount",
   181  		Input:  "a,b,c\nd,e",
   182  		Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
   183  	},
   184  	{
   185  		Name:   "TrailingCommaEOF",
   186  		Input:  "a,b,c,",
   187  		Output: [][]string{{"a", "b", "c", ""}},
   188  	},
   189  	{
   190  		Name:   "TrailingCommaEOL",
   191  		Input:  "a,b,c,\n",
   192  		Output: [][]string{{"a", "b", "c", ""}},
   193  	},
   194  	{
   195  		Name:             "TrailingCommaSpaceEOF",
   196  		TrimLeadingSpace: true,
   197  		Input:            "a,b,c, ",
   198  		Output:           [][]string{{"a", "b", "c", ""}},
   199  	},
   200  	{
   201  		Name:             "TrailingCommaSpaceEOL",
   202  		TrimLeadingSpace: true,
   203  		Input:            "a,b,c, \n",
   204  		Output:           [][]string{{"a", "b", "c", ""}},
   205  	},
   206  	{
   207  		Name:             "TrailingCommaLine3",
   208  		TrimLeadingSpace: true,
   209  		Input:            "a,b,c\nd,e,f\ng,hi,",
   210  		Output:           [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
   211  	},
   212  	{
   213  		Name:   "NotTrailingComma3",
   214  		Input:  "a,b,c, \n",
   215  		Output: [][]string{{"a", "b", "c", " "}},
   216  	},
   217  	{
   218  		Name:          "CommaFieldTest",
   219  		TrailingComma: true,
   220  		Input: `x,y,z,w
   221  x,y,z,
   222  x,y,,
   223  x,,,
   224  ,,,
   225  "x","y","z","w"
   226  "x","y","z",""
   227  "x","y","",""
   228  "x","","",""
   229  "","","",""
   230  `,
   231  		Output: [][]string{
   232  			{"x", "y", "z", "w"},
   233  			{"x", "y", "z", ""},
   234  			{"x", "y", "", ""},
   235  			{"x", "", "", ""},
   236  			{"", "", "", ""},
   237  			{"x", "y", "z", "w"},
   238  			{"x", "y", "z", ""},
   239  			{"x", "y", "", ""},
   240  			{"x", "", "", ""},
   241  			{"", "", "", ""},
   242  		},
   243  	},
   244  	{
   245  		Name:             "TrailingCommaIneffective1",
   246  		TrailingComma:    true,
   247  		TrimLeadingSpace: true,
   248  		Input:            "a,b,\nc,d,e",
   249  		Output: [][]string{
   250  			{"a", "b", ""},
   251  			{"c", "d", "e"},
   252  		},
   253  	},
   254  	{
   255  		Name:             "TrailingCommaIneffective2",
   256  		TrailingComma:    false,
   257  		TrimLeadingSpace: true,
   258  		Input:            "a,b,\nc,d,e",
   259  		Output: [][]string{
   260  			{"a", "b", ""},
   261  			{"c", "d", "e"},
   262  		},
   263  	},
   264  	{
   265  		Name:        "ReadAllReuseRecord",
   266  		ReuseRecord: true,
   267  		Input:       "a,b\nc,d",
   268  		Output: [][]string{
   269  			{"a", "b"},
   270  			{"c", "d"},
   271  		},
   272  	},
   273  	{ // issue 19019
   274  		Name:   "RecordLine1",
   275  		Input:  "a,\"b\nc\"d,e",
   276  		Error:  `extraneous " in field`,
   277  		Line:   1,
   278  		Column: 1,
   279  	},
   280  	{
   281  		Name:   "RecordLine2",
   282  		Input:  "a,b\n\"d\n\n,e",
   283  		Error:  `extraneous " in field`,
   284  		Line:   2,
   285  		Column: 2,
   286  	},
   287  	{ // issue 21201
   288  		Name:  "CRLFInQuotedField",
   289  		Input: "\"Hello\r\nHi\"",
   290  		Output: [][]string{
   291  			{"Hello\r\nHi"},
   292  		},
   293  	},
   294  }
   295  
   296  func TestRead(t *testing.T) {
   297  	for _, tt := range readTests {
   298  		r := NewReader(strings.NewReader(tt.Input))
   299  		r.Comment = tt.Comment
   300  		if tt.UseFieldsPerRecord {
   301  			r.FieldsPerRecord = tt.FieldsPerRecord
   302  		} else {
   303  			r.FieldsPerRecord = -1
   304  		}
   305  		r.LazyQuotes = tt.LazyQuotes
   306  		r.TrailingComma = tt.TrailingComma
   307  		r.TrimLeadingSpace = tt.TrimLeadingSpace
   308  		r.ReuseRecord = tt.ReuseRecord
   309  		if tt.Comma != 0 {
   310  			r.Comma = tt.Comma
   311  		}
   312  		out, err := r.ReadAll()
   313  		perr, _ := err.(*ParseError)
   314  		if tt.Error != "" {
   315  			if err == nil || !strings.Contains(err.Error(), tt.Error) {
   316  				t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error)
   317  			} else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) {
   318  				t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column)
   319  			}
   320  		} else if err != nil {
   321  			t.Errorf("%s: unexpected error %v", tt.Name, err)
   322  		} else if !reflect.DeepEqual(out, tt.Output) {
   323  			t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output)
   324  		}
   325  	}
   326  }
   327  
   328  // nTimes is an io.Reader which yields the string s n times.
   329  type nTimes struct {
   330  	s   string
   331  	n   int
   332  	off int
   333  }
   334  
   335  func (r *nTimes) Read(p []byte) (n int, err error) {
   336  	for {
   337  		if r.n <= 0 || r.s == "" {
   338  			return n, io.EOF
   339  		}
   340  		n0 := copy(p, r.s[r.off:])
   341  		p = p[n0:]
   342  		n += n0
   343  		r.off += n0
   344  		if r.off == len(r.s) {
   345  			r.off = 0
   346  			r.n--
   347  		}
   348  		if len(p) == 0 {
   349  			return
   350  		}
   351  	}
   352  }
   353  
   354  // benchmarkRead measures reading the provided CSV rows data.
   355  // initReader, if non-nil, modifies the Reader before it's used.
   356  func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) {
   357  	b.ReportAllocs()
   358  	r := NewReader(&nTimes{s: rows, n: b.N})
   359  	if initReader != nil {
   360  		initReader(r)
   361  	}
   362  	for {
   363  		_, err := r.Read()
   364  		if err == io.EOF {
   365  			break
   366  		}
   367  		if err != nil {
   368  			b.Fatal(err)
   369  		}
   370  	}
   371  }
   372  
   373  const benchmarkCSVData = `x,y,z,w
   374  x,y,z,
   375  x,y,,
   376  x,,,
   377  ,,,
   378  "x","y","z","w"
   379  "x","y","z",""
   380  "x","y","",""
   381  "x","","",""
   382  "","","",""
   383  `
   384  
   385  func BenchmarkRead(b *testing.B) {
   386  	benchmarkRead(b, nil, benchmarkCSVData)
   387  }
   388  
   389  func BenchmarkReadWithFieldsPerRecord(b *testing.B) {
   390  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData)
   391  }
   392  
   393  func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) {
   394  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData)
   395  }
   396  
   397  func BenchmarkReadLargeFields(b *testing.B) {
   398  	benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   399  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
   400  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   401  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   402  `, 3))
   403  }
   404  
   405  func BenchmarkReadReuseRecord(b *testing.B) {
   406  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData)
   407  }
   408  
   409  func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) {
   410  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData)
   411  }
   412  
   413  func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) {
   414  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData)
   415  }
   416  
   417  func BenchmarkReadReuseRecordLargeFields(b *testing.B) {
   418  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   419  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
   420  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   421  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   422  `, 3))
   423  }