github.com/slayercat/go@v0.0.0-20170428012452-c51559813f61/src/encoding/csv/reader_test.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package csv
     6  
     7  import (
     8  	"io"
     9  	"reflect"
    10  	"strings"
    11  	"testing"
    12  )
    13  
    14  var readTests = []struct {
    15  	Name               string
    16  	Input              string
    17  	Output             [][]string
    18  	UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
    19  
    20  	// These fields are copied into the Reader
    21  	Comma            rune
    22  	Comment          rune
    23  	FieldsPerRecord  int
    24  	LazyQuotes       bool
    25  	TrailingComma    bool
    26  	TrimLeadingSpace bool
    27  	ReuseRecord      bool
    28  
    29  	Error  string
    30  	Line   int // Expected error line if != 0
    31  	Column int // Expected error column if line != 0
    32  }{
    33  	{
    34  		Name:   "Simple",
    35  		Input:  "a,b,c\n",
    36  		Output: [][]string{{"a", "b", "c"}},
    37  	},
    38  	{
    39  		Name:   "CRLF",
    40  		Input:  "a,b\r\nc,d\r\n",
    41  		Output: [][]string{{"a", "b"}, {"c", "d"}},
    42  	},
    43  	{
    44  		Name:   "BareCR",
    45  		Input:  "a,b\rc,d\r\n",
    46  		Output: [][]string{{"a", "b\rc", "d"}},
    47  	},
    48  	{
    49  		Name:               "RFC4180test",
    50  		UseFieldsPerRecord: true,
    51  		Input: `#field1,field2,field3
    52  "aaa","bb
    53  b","ccc"
    54  "a,a","b""bb","ccc"
    55  zzz,yyy,xxx
    56  `,
    57  		Output: [][]string{
    58  			{"#field1", "field2", "field3"},
    59  			{"aaa", "bb\nb", "ccc"},
    60  			{"a,a", `b"bb`, "ccc"},
    61  			{"zzz", "yyy", "xxx"},
    62  		},
    63  	},
    64  	{
    65  		Name:   "NoEOLTest",
    66  		Input:  "a,b,c",
    67  		Output: [][]string{{"a", "b", "c"}},
    68  	},
    69  	{
    70  		Name:   "Semicolon",
    71  		Comma:  ';',
    72  		Input:  "a;b;c\n",
    73  		Output: [][]string{{"a", "b", "c"}},
    74  	},
    75  	{
    76  		Name: "MultiLine",
    77  		Input: `"two
    78  line","one line","three
    79  line
    80  field"`,
    81  		Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
    82  	},
    83  	{
    84  		Name:  "BlankLine",
    85  		Input: "a,b,c\n\nd,e,f\n\n",
    86  		Output: [][]string{
    87  			{"a", "b", "c"},
    88  			{"d", "e", "f"},
    89  		},
    90  	},
    91  	{
    92  		Name:               "BlankLineFieldCount",
    93  		Input:              "a,b,c\n\nd,e,f\n\n",
    94  		UseFieldsPerRecord: true,
    95  		Output: [][]string{
    96  			{"a", "b", "c"},
    97  			{"d", "e", "f"},
    98  		},
    99  	},
   100  	{
   101  		Name:             "TrimSpace",
   102  		Input:            " a,  b,   c\n",
   103  		TrimLeadingSpace: true,
   104  		Output:           [][]string{{"a", "b", "c"}},
   105  	},
   106  	{
   107  		Name:   "LeadingSpace",
   108  		Input:  " a,  b,   c\n",
   109  		Output: [][]string{{" a", "  b", "   c"}},
   110  	},
   111  	{
   112  		Name:    "Comment",
   113  		Comment: '#',
   114  		Input:   "#1,2,3\na,b,c\n#comment",
   115  		Output:  [][]string{{"a", "b", "c"}},
   116  	},
   117  	{
   118  		Name:   "NoComment",
   119  		Input:  "#1,2,3\na,b,c",
   120  		Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
   121  	},
   122  	{
   123  		Name:       "LazyQuotes",
   124  		LazyQuotes: true,
   125  		Input:      `a "word","1"2",a","b`,
   126  		Output:     [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
   127  	},
   128  	{
   129  		Name:       "BareQuotes",
   130  		LazyQuotes: true,
   131  		Input:      `a "word","1"2",a"`,
   132  		Output:     [][]string{{`a "word"`, `1"2`, `a"`}},
   133  	},
   134  	{
   135  		Name:       "BareDoubleQuotes",
   136  		LazyQuotes: true,
   137  		Input:      `a""b,c`,
   138  		Output:     [][]string{{`a""b`, `c`}},
   139  	},
   140  	{
   141  		Name:  "BadDoubleQuotes",
   142  		Input: `a""b,c`,
   143  		Error: `bare " in non-quoted-field`, Line: 1, Column: 1,
   144  	},
   145  	{
   146  		Name:             "TrimQuote",
   147  		Input:            ` "a"," b",c`,
   148  		TrimLeadingSpace: true,
   149  		Output:           [][]string{{"a", " b", "c"}},
   150  	},
   151  	{
   152  		Name:  "BadBareQuote",
   153  		Input: `a "word","b"`,
   154  		Error: `bare " in non-quoted-field`, Line: 1, Column: 2,
   155  	},
   156  	{
   157  		Name:  "BadTrailingQuote",
   158  		Input: `"a word",b"`,
   159  		Error: `bare " in non-quoted-field`, Line: 1, Column: 10,
   160  	},
   161  	{
   162  		Name:  "ExtraneousQuote",
   163  		Input: `"a "word","b"`,
   164  		Error: `extraneous " in field`, Line: 1, Column: 3,
   165  	},
   166  	{
   167  		Name:               "BadFieldCount",
   168  		UseFieldsPerRecord: true,
   169  		Input:              "a,b,c\nd,e",
   170  		Error:              "wrong number of fields", Line: 2,
   171  	},
   172  	{
   173  		Name:               "BadFieldCount1",
   174  		UseFieldsPerRecord: true,
   175  		FieldsPerRecord:    2,
   176  		Input:              `a,b,c`,
   177  		Error:              "wrong number of fields", Line: 1,
   178  	},
   179  	{
   180  		Name:   "FieldCount",
   181  		Input:  "a,b,c\nd,e",
   182  		Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
   183  	},
   184  	{
   185  		Name:   "TrailingCommaEOF",
   186  		Input:  "a,b,c,",
   187  		Output: [][]string{{"a", "b", "c", ""}},
   188  	},
   189  	{
   190  		Name:   "TrailingCommaEOL",
   191  		Input:  "a,b,c,\n",
   192  		Output: [][]string{{"a", "b", "c", ""}},
   193  	},
   194  	{
   195  		Name:             "TrailingCommaSpaceEOF",
   196  		TrimLeadingSpace: true,
   197  		Input:            "a,b,c, ",
   198  		Output:           [][]string{{"a", "b", "c", ""}},
   199  	},
   200  	{
   201  		Name:             "TrailingCommaSpaceEOL",
   202  		TrimLeadingSpace: true,
   203  		Input:            "a,b,c, \n",
   204  		Output:           [][]string{{"a", "b", "c", ""}},
   205  	},
   206  	{
   207  		Name:             "TrailingCommaLine3",
   208  		TrimLeadingSpace: true,
   209  		Input:            "a,b,c\nd,e,f\ng,hi,",
   210  		Output:           [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
   211  	},
   212  	{
   213  		Name:   "NotTrailingComma3",
   214  		Input:  "a,b,c, \n",
   215  		Output: [][]string{{"a", "b", "c", " "}},
   216  	},
   217  	{
   218  		Name:          "CommaFieldTest",
   219  		TrailingComma: true,
   220  		Input: `x,y,z,w
   221  x,y,z,
   222  x,y,,
   223  x,,,
   224  ,,,
   225  "x","y","z","w"
   226  "x","y","z",""
   227  "x","y","",""
   228  "x","","",""
   229  "","","",""
   230  `,
   231  		Output: [][]string{
   232  			{"x", "y", "z", "w"},
   233  			{"x", "y", "z", ""},
   234  			{"x", "y", "", ""},
   235  			{"x", "", "", ""},
   236  			{"", "", "", ""},
   237  			{"x", "y", "z", "w"},
   238  			{"x", "y", "z", ""},
   239  			{"x", "y", "", ""},
   240  			{"x", "", "", ""},
   241  			{"", "", "", ""},
   242  		},
   243  	},
   244  	{
   245  		Name:             "TrailingCommaIneffective1",
   246  		TrailingComma:    true,
   247  		TrimLeadingSpace: true,
   248  		Input:            "a,b,\nc,d,e",
   249  		Output: [][]string{
   250  			{"a", "b", ""},
   251  			{"c", "d", "e"},
   252  		},
   253  	},
   254  	{
   255  		Name:             "TrailingCommaIneffective2",
   256  		TrailingComma:    false,
   257  		TrimLeadingSpace: true,
   258  		Input:            "a,b,\nc,d,e",
   259  		Output: [][]string{
   260  			{"a", "b", ""},
   261  			{"c", "d", "e"},
   262  		},
   263  	},
   264  	{
   265  		Name:        "ReadAllReuseRecord",
   266  		ReuseRecord: true,
   267  		Input:       "a,b\nc,d",
   268  		Output: [][]string{
   269  			{"a", "b"},
   270  			{"c", "d"},
   271  		},
   272  	},
   273  }
   274  
   275  func TestRead(t *testing.T) {
   276  	for _, tt := range readTests {
   277  		r := NewReader(strings.NewReader(tt.Input))
   278  		r.Comment = tt.Comment
   279  		if tt.UseFieldsPerRecord {
   280  			r.FieldsPerRecord = tt.FieldsPerRecord
   281  		} else {
   282  			r.FieldsPerRecord = -1
   283  		}
   284  		r.LazyQuotes = tt.LazyQuotes
   285  		r.TrailingComma = tt.TrailingComma
   286  		r.TrimLeadingSpace = tt.TrimLeadingSpace
   287  		r.ReuseRecord = tt.ReuseRecord
   288  		if tt.Comma != 0 {
   289  			r.Comma = tt.Comma
   290  		}
   291  		out, err := r.ReadAll()
   292  		perr, _ := err.(*ParseError)
   293  		if tt.Error != "" {
   294  			if err == nil || !strings.Contains(err.Error(), tt.Error) {
   295  				t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error)
   296  			} else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) {
   297  				t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column)
   298  			}
   299  		} else if err != nil {
   300  			t.Errorf("%s: unexpected error %v", tt.Name, err)
   301  		} else if !reflect.DeepEqual(out, tt.Output) {
   302  			t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output)
   303  		}
   304  	}
   305  }
   306  
   307  // nTimes is an io.Reader which yields the string s n times.
   308  type nTimes struct {
   309  	s   string
   310  	n   int
   311  	off int
   312  }
   313  
   314  func (r *nTimes) Read(p []byte) (n int, err error) {
   315  	for {
   316  		if r.n <= 0 || r.s == "" {
   317  			return n, io.EOF
   318  		}
   319  		n0 := copy(p, r.s[r.off:])
   320  		p = p[n0:]
   321  		n += n0
   322  		r.off += n0
   323  		if r.off == len(r.s) {
   324  			r.off = 0
   325  			r.n--
   326  		}
   327  		if len(p) == 0 {
   328  			return
   329  		}
   330  	}
   331  }
   332  
   333  // benchmarkRead measures reading the provided CSV rows data.
   334  // initReader, if non-nil, modifies the Reader before it's used.
   335  func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) {
   336  	b.ReportAllocs()
   337  	r := NewReader(&nTimes{s: rows, n: b.N})
   338  	if initReader != nil {
   339  		initReader(r)
   340  	}
   341  	for {
   342  		_, err := r.Read()
   343  		if err == io.EOF {
   344  			break
   345  		}
   346  		if err != nil {
   347  			b.Fatal(err)
   348  		}
   349  	}
   350  }
   351  
   352  const benchmarkCSVData = `x,y,z,w
   353  x,y,z,
   354  x,y,,
   355  x,,,
   356  ,,,
   357  "x","y","z","w"
   358  "x","y","z",""
   359  "x","y","",""
   360  "x","","",""
   361  "","","",""
   362  `
   363  
   364  func BenchmarkRead(b *testing.B) {
   365  	benchmarkRead(b, nil, benchmarkCSVData)
   366  }
   367  
   368  func BenchmarkReadWithFieldsPerRecord(b *testing.B) {
   369  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData)
   370  }
   371  
   372  func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) {
   373  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData)
   374  }
   375  
   376  func BenchmarkReadLargeFields(b *testing.B) {
   377  	benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   378  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
   379  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   380  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   381  `, 3))
   382  }
   383  
   384  func BenchmarkReadReuseRecord(b *testing.B) {
   385  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData)
   386  }
   387  
   388  func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) {
   389  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData)
   390  }
   391  
   392  func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) {
   393  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData)
   394  }
   395  
   396  func BenchmarkReadReuseRecordLargeFields(b *testing.B) {
   397  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   398  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
   399  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   400  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   401  `, 3))
   402  }