github.com/rakyll/go@v0.0.0-20170216000551-64c02460d703/src/encoding/csv/reader_test.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package csv
     6  
     7  import (
     8  	"io"
     9  	"reflect"
    10  	"strings"
    11  	"testing"
    12  )
    13  
    14  var readTests = []struct {
    15  	Name               string
    16  	Input              string
    17  	Output             [][]string
    18  	UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
    19  
    20  	// These fields are copied into the Reader
    21  	Comma            rune
    22  	Comment          rune
    23  	FieldsPerRecord  int
    24  	LazyQuotes       bool
    25  	TrailingComma    bool
    26  	TrimLeadingSpace bool
    27  
    28  	Error  string
    29  	Line   int // Expected error line if != 0
    30  	Column int // Expected error column if line != 0
    31  }{
    32  	{
    33  		Name:   "Simple",
    34  		Input:  "a,b,c\n",
    35  		Output: [][]string{{"a", "b", "c"}},
    36  	},
    37  	{
    38  		Name:   "CRLF",
    39  		Input:  "a,b\r\nc,d\r\n",
    40  		Output: [][]string{{"a", "b"}, {"c", "d"}},
    41  	},
    42  	{
    43  		Name:   "BareCR",
    44  		Input:  "a,b\rc,d\r\n",
    45  		Output: [][]string{{"a", "b\rc", "d"}},
    46  	},
    47  	{
    48  		Name:               "RFC4180test",
    49  		UseFieldsPerRecord: true,
    50  		Input: `#field1,field2,field3
    51  "aaa","bb
    52  b","ccc"
    53  "a,a","b""bb","ccc"
    54  zzz,yyy,xxx
    55  `,
    56  		Output: [][]string{
    57  			{"#field1", "field2", "field3"},
    58  			{"aaa", "bb\nb", "ccc"},
    59  			{"a,a", `b"bb`, "ccc"},
    60  			{"zzz", "yyy", "xxx"},
    61  		},
    62  	},
    63  	{
    64  		Name:   "NoEOLTest",
    65  		Input:  "a,b,c",
    66  		Output: [][]string{{"a", "b", "c"}},
    67  	},
    68  	{
    69  		Name:   "Semicolon",
    70  		Comma:  ';',
    71  		Input:  "a;b;c\n",
    72  		Output: [][]string{{"a", "b", "c"}},
    73  	},
    74  	{
    75  		Name: "MultiLine",
    76  		Input: `"two
    77  line","one line","three
    78  line
    79  field"`,
    80  		Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
    81  	},
    82  	{
    83  		Name:  "BlankLine",
    84  		Input: "a,b,c\n\nd,e,f\n\n",
    85  		Output: [][]string{
    86  			{"a", "b", "c"},
    87  			{"d", "e", "f"},
    88  		},
    89  	},
    90  	{
    91  		Name:               "BlankLineFieldCount",
    92  		Input:              "a,b,c\n\nd,e,f\n\n",
    93  		UseFieldsPerRecord: true,
    94  		Output: [][]string{
    95  			{"a", "b", "c"},
    96  			{"d", "e", "f"},
    97  		},
    98  	},
    99  	{
   100  		Name:             "TrimSpace",
   101  		Input:            " a,  b,   c\n",
   102  		TrimLeadingSpace: true,
   103  		Output:           [][]string{{"a", "b", "c"}},
   104  	},
   105  	{
   106  		Name:   "LeadingSpace",
   107  		Input:  " a,  b,   c\n",
   108  		Output: [][]string{{" a", "  b", "   c"}},
   109  	},
   110  	{
   111  		Name:    "Comment",
   112  		Comment: '#',
   113  		Input:   "#1,2,3\na,b,c\n#comment",
   114  		Output:  [][]string{{"a", "b", "c"}},
   115  	},
   116  	{
   117  		Name:   "NoComment",
   118  		Input:  "#1,2,3\na,b,c",
   119  		Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
   120  	},
   121  	{
   122  		Name:       "LazyQuotes",
   123  		LazyQuotes: true,
   124  		Input:      `a "word","1"2",a","b`,
   125  		Output:     [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
   126  	},
   127  	{
   128  		Name:       "BareQuotes",
   129  		LazyQuotes: true,
   130  		Input:      `a "word","1"2",a"`,
   131  		Output:     [][]string{{`a "word"`, `1"2`, `a"`}},
   132  	},
   133  	{
   134  		Name:       "BareDoubleQuotes",
   135  		LazyQuotes: true,
   136  		Input:      `a""b,c`,
   137  		Output:     [][]string{{`a""b`, `c`}},
   138  	},
   139  	{
   140  		Name:  "BadDoubleQuotes",
   141  		Input: `a""b,c`,
   142  		Error: `bare " in non-quoted-field`, Line: 1, Column: 1,
   143  	},
   144  	{
   145  		Name:             "TrimQuote",
   146  		Input:            ` "a"," b",c`,
   147  		TrimLeadingSpace: true,
   148  		Output:           [][]string{{"a", " b", "c"}},
   149  	},
   150  	{
   151  		Name:  "BadBareQuote",
   152  		Input: `a "word","b"`,
   153  		Error: `bare " in non-quoted-field`, Line: 1, Column: 2,
   154  	},
   155  	{
   156  		Name:  "BadTrailingQuote",
   157  		Input: `"a word",b"`,
   158  		Error: `bare " in non-quoted-field`, Line: 1, Column: 10,
   159  	},
   160  	{
   161  		Name:  "ExtraneousQuote",
   162  		Input: `"a "word","b"`,
   163  		Error: `extraneous " in field`, Line: 1, Column: 3,
   164  	},
   165  	{
   166  		Name:               "BadFieldCount",
   167  		UseFieldsPerRecord: true,
   168  		Input:              "a,b,c\nd,e",
   169  		Error:              "wrong number of fields", Line: 2,
   170  	},
   171  	{
   172  		Name:               "BadFieldCount1",
   173  		UseFieldsPerRecord: true,
   174  		FieldsPerRecord:    2,
   175  		Input:              `a,b,c`,
   176  		Error:              "wrong number of fields", Line: 1,
   177  	},
   178  	{
   179  		Name:   "FieldCount",
   180  		Input:  "a,b,c\nd,e",
   181  		Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
   182  	},
   183  	{
   184  		Name:   "TrailingCommaEOF",
   185  		Input:  "a,b,c,",
   186  		Output: [][]string{{"a", "b", "c", ""}},
   187  	},
   188  	{
   189  		Name:   "TrailingCommaEOL",
   190  		Input:  "a,b,c,\n",
   191  		Output: [][]string{{"a", "b", "c", ""}},
   192  	},
   193  	{
   194  		Name:             "TrailingCommaSpaceEOF",
   195  		TrimLeadingSpace: true,
   196  		Input:            "a,b,c, ",
   197  		Output:           [][]string{{"a", "b", "c", ""}},
   198  	},
   199  	{
   200  		Name:             "TrailingCommaSpaceEOL",
   201  		TrimLeadingSpace: true,
   202  		Input:            "a,b,c, \n",
   203  		Output:           [][]string{{"a", "b", "c", ""}},
   204  	},
   205  	{
   206  		Name:             "TrailingCommaLine3",
   207  		TrimLeadingSpace: true,
   208  		Input:            "a,b,c\nd,e,f\ng,hi,",
   209  		Output:           [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
   210  	},
   211  	{
   212  		Name:   "NotTrailingComma3",
   213  		Input:  "a,b,c, \n",
   214  		Output: [][]string{{"a", "b", "c", " "}},
   215  	},
   216  	{
   217  		Name:          "CommaFieldTest",
   218  		TrailingComma: true,
   219  		Input: `x,y,z,w
   220  x,y,z,
   221  x,y,,
   222  x,,,
   223  ,,,
   224  "x","y","z","w"
   225  "x","y","z",""
   226  "x","y","",""
   227  "x","","",""
   228  "","","",""
   229  `,
   230  		Output: [][]string{
   231  			{"x", "y", "z", "w"},
   232  			{"x", "y", "z", ""},
   233  			{"x", "y", "", ""},
   234  			{"x", "", "", ""},
   235  			{"", "", "", ""},
   236  			{"x", "y", "z", "w"},
   237  			{"x", "y", "z", ""},
   238  			{"x", "y", "", ""},
   239  			{"x", "", "", ""},
   240  			{"", "", "", ""},
   241  		},
   242  	},
   243  	{
   244  		Name:             "TrailingCommaIneffective1",
   245  		TrailingComma:    true,
   246  		TrimLeadingSpace: true,
   247  		Input:            "a,b,\nc,d,e",
   248  		Output: [][]string{
   249  			{"a", "b", ""},
   250  			{"c", "d", "e"},
   251  		},
   252  	},
   253  	{
   254  		Name:             "TrailingCommaIneffective2",
   255  		TrailingComma:    false,
   256  		TrimLeadingSpace: true,
   257  		Input:            "a,b,\nc,d,e",
   258  		Output: [][]string{
   259  			{"a", "b", ""},
   260  			{"c", "d", "e"},
   261  		},
   262  	},
   263  }
   264  
   265  func TestRead(t *testing.T) {
   266  	for _, tt := range readTests {
   267  		r := NewReader(strings.NewReader(tt.Input))
   268  		r.Comment = tt.Comment
   269  		if tt.UseFieldsPerRecord {
   270  			r.FieldsPerRecord = tt.FieldsPerRecord
   271  		} else {
   272  			r.FieldsPerRecord = -1
   273  		}
   274  		r.LazyQuotes = tt.LazyQuotes
   275  		r.TrailingComma = tt.TrailingComma
   276  		r.TrimLeadingSpace = tt.TrimLeadingSpace
   277  		if tt.Comma != 0 {
   278  			r.Comma = tt.Comma
   279  		}
   280  		out, err := r.ReadAll()
   281  		perr, _ := err.(*ParseError)
   282  		if tt.Error != "" {
   283  			if err == nil || !strings.Contains(err.Error(), tt.Error) {
   284  				t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error)
   285  			} else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) {
   286  				t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column)
   287  			}
   288  		} else if err != nil {
   289  			t.Errorf("%s: unexpected error %v", tt.Name, err)
   290  		} else if !reflect.DeepEqual(out, tt.Output) {
   291  			t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output)
   292  		}
   293  	}
   294  }
   295  
   296  // nTimes is an io.Reader which yields the string s n times.
   297  type nTimes struct {
   298  	s   string
   299  	n   int
   300  	off int
   301  }
   302  
   303  func (r *nTimes) Read(p []byte) (n int, err error) {
   304  	for {
   305  		if r.n <= 0 || r.s == "" {
   306  			return n, io.EOF
   307  		}
   308  		n0 := copy(p, r.s[r.off:])
   309  		p = p[n0:]
   310  		n += n0
   311  		r.off += n0
   312  		if r.off == len(r.s) {
   313  			r.off = 0
   314  			r.n--
   315  		}
   316  		if len(p) == 0 {
   317  			return
   318  		}
   319  	}
   320  }
   321  
   322  // benchmarkRead measures reading the provided CSV rows data.
   323  // initReader, if non-nil, modifies the Reader before it's used.
   324  func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) {
   325  	b.ReportAllocs()
   326  	r := NewReader(&nTimes{s: rows, n: b.N})
   327  	if initReader != nil {
   328  		initReader(r)
   329  	}
   330  	for {
   331  		_, err := r.Read()
   332  		if err == io.EOF {
   333  			break
   334  		}
   335  		if err != nil {
   336  			b.Fatal(err)
   337  		}
   338  	}
   339  }
   340  
   341  const benchmarkCSVData = `x,y,z,w
   342  x,y,z,
   343  x,y,,
   344  x,,,
   345  ,,,
   346  "x","y","z","w"
   347  "x","y","z",""
   348  "x","y","",""
   349  "x","","",""
   350  "","","",""
   351  `
   352  
   353  func BenchmarkRead(b *testing.B) {
   354  	benchmarkRead(b, nil, benchmarkCSVData)
   355  }
   356  
   357  func BenchmarkReadWithFieldsPerRecord(b *testing.B) {
   358  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData)
   359  }
   360  
   361  func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) {
   362  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData)
   363  }
   364  
   365  func BenchmarkReadLargeFields(b *testing.B) {
   366  	benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   367  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
   368  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   369  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   370  `, 3))
   371  }