github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/encoding/csv/reader_test.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package csv 6 7 import ( 8 "io" 9 "reflect" 10 "strings" 11 "testing" 12 "unicode/utf8" 13 ) 14 15 func TestRead(t *testing.T) { 16 tests := []struct { 17 Name string 18 Input string 19 Output [][]string 20 Error error 21 22 // These fields are copied into the Reader 23 Comma rune 24 Comment rune 25 UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 26 FieldsPerRecord int 27 LazyQuotes bool 28 TrimLeadingSpace bool 29 ReuseRecord bool 30 }{{ 31 Name: "Simple", 32 Input: "a,b,c\n", 33 Output: [][]string{{"a", "b", "c"}}, 34 }, { 35 Name: "CRLF", 36 Input: "a,b\r\nc,d\r\n", 37 Output: [][]string{{"a", "b"}, {"c", "d"}}, 38 }, { 39 Name: "BareCR", 40 Input: "a,b\rc,d\r\n", 41 Output: [][]string{{"a", "b\rc", "d"}}, 42 }, { 43 Name: "RFC4180test", 44 Input: `#field1,field2,field3 45 "aaa","bb 46 b","ccc" 47 "a,a","b""bb","ccc" 48 zzz,yyy,xxx 49 `, 50 Output: [][]string{ 51 {"#field1", "field2", "field3"}, 52 {"aaa", "bb\nb", "ccc"}, 53 {"a,a", `b"bb`, "ccc"}, 54 {"zzz", "yyy", "xxx"}, 55 }, 56 UseFieldsPerRecord: true, 57 FieldsPerRecord: 0, 58 }, { 59 Name: "NoEOLTest", 60 Input: "a,b,c", 61 Output: [][]string{{"a", "b", "c"}}, 62 }, { 63 Name: "Semicolon", 64 Input: "a;b;c\n", 65 Output: [][]string{{"a", "b", "c"}}, 66 Comma: ';', 67 }, { 68 Name: "MultiLine", 69 Input: `"two 70 line","one line","three 71 line 72 field"`, 73 Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, 74 }, { 75 Name: "BlankLine", 76 Input: "a,b,c\n\nd,e,f\n\n", 77 Output: [][]string{ 78 {"a", "b", "c"}, 79 {"d", "e", "f"}, 80 }, 81 }, { 82 Name: "BlankLineFieldCount", 83 Input: "a,b,c\n\nd,e,f\n\n", 84 Output: [][]string{ 85 {"a", "b", "c"}, 86 {"d", "e", "f"}, 87 }, 88 UseFieldsPerRecord: true, 89 FieldsPerRecord: 0, 90 }, { 91 Name: "TrimSpace", 92 Input: " a, b, c\n", 93 Output: [][]string{{"a", "b", "c"}}, 94 TrimLeadingSpace: true, 95 }, { 96 Name: "LeadingSpace", 97 Input: " a, b, c\n", 98 Output: [][]string{{" a", " b", " c"}}, 99 }, { 100 Name: "Comment", 101 Input: "#1,2,3\na,b,c\n#comment", 102 Output: [][]string{{"a", "b", "c"}}, 103 Comment: '#', 104 }, { 105 Name: "NoComment", 106 Input: "#1,2,3\na,b,c", 107 Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, 108 }, { 109 Name: "LazyQuotes", 110 Input: `a "word","1"2",a","b`, 111 Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, 112 LazyQuotes: true, 113 }, { 114 Name: "BareQuotes", 115 Input: `a "word","1"2",a"`, 116 Output: [][]string{{`a "word"`, `1"2`, `a"`}}, 117 LazyQuotes: true, 118 }, { 119 Name: "BareDoubleQuotes", 120 Input: `a""b,c`, 121 Output: [][]string{{`a""b`, `c`}}, 122 LazyQuotes: true, 123 }, { 124 Name: "BadDoubleQuotes", 125 Input: `a""b,c`, 126 Error: &ParseError{RecordLine: 1, Line: 1, Column: 1, Err: ErrBareQuote}, 127 }, { 128 Name: "TrimQuote", 129 Input: ` "a"," b",c`, 130 Output: [][]string{{"a", " b", "c"}}, 131 TrimLeadingSpace: true, 132 }, { 133 Name: "BadBareQuote", 134 Input: `a "word","b"`, 135 Error: &ParseError{RecordLine: 1, Line: 1, Column: 2, Err: ErrBareQuote}, 136 }, { 137 Name: "BadTrailingQuote", 138 Input: `"a word",b"`, 139 Error: &ParseError{RecordLine: 1, Line: 1, Column: 10, Err: ErrBareQuote}, 140 }, { 141 Name: "ExtraneousQuote", 142 Input: `"a "word","b"`, 143 Error: &ParseError{RecordLine: 1, Line: 1, Column: 3, Err: ErrQuote}, 144 }, { 145 Name: "BadFieldCount", 146 Input: "a,b,c\nd,e", 147 Error: &ParseError{RecordLine: 2, Line: 2, Err: ErrFieldCount}, 148 UseFieldsPerRecord: true, 149 FieldsPerRecord: 0, 150 }, { 151 Name: "BadFieldCount1", 152 Input: `a,b,c`, 153 Error: &ParseError{RecordLine: 1, Line: 1, Err: ErrFieldCount}, 154 UseFieldsPerRecord: true, 155 FieldsPerRecord: 2, 156 }, { 157 Name: "FieldCount", 158 Input: "a,b,c\nd,e", 159 Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, 160 }, { 161 Name: "TrailingCommaEOF", 162 Input: "a,b,c,", 163 Output: [][]string{{"a", "b", "c", ""}}, 164 }, { 165 Name: "TrailingCommaEOL", 166 Input: "a,b,c,\n", 167 Output: [][]string{{"a", "b", "c", ""}}, 168 }, { 169 Name: "TrailingCommaSpaceEOF", 170 Input: "a,b,c, ", 171 Output: [][]string{{"a", "b", "c", ""}}, 172 TrimLeadingSpace: true, 173 }, { 174 Name: "TrailingCommaSpaceEOL", 175 Input: "a,b,c, \n", 176 Output: [][]string{{"a", "b", "c", ""}}, 177 TrimLeadingSpace: true, 178 }, { 179 Name: "TrailingCommaLine3", 180 Input: "a,b,c\nd,e,f\ng,hi,", 181 Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, 182 TrimLeadingSpace: true, 183 }, { 184 Name: "NotTrailingComma3", 185 Input: "a,b,c, \n", 186 Output: [][]string{{"a", "b", "c", " "}}, 187 }, { 188 Name: "CommaFieldTest", 189 Input: `x,y,z,w 190 x,y,z, 191 x,y,, 192 x,,, 193 ,,, 194 "x","y","z","w" 195 "x","y","z","" 196 "x","y","","" 197 "x","","","" 198 "","","","" 199 `, 200 Output: [][]string{ 201 {"x", "y", "z", "w"}, 202 {"x", "y", "z", ""}, 203 {"x", "y", "", ""}, 204 {"x", "", "", ""}, 205 {"", "", "", ""}, 206 {"x", "y", "z", "w"}, 207 {"x", "y", "z", ""}, 208 {"x", "y", "", ""}, 209 {"x", "", "", ""}, 210 {"", "", "", ""}, 211 }, 212 }, { 213 Name: "TrailingCommaIneffective1", 214 Input: "a,b,\nc,d,e", 215 Output: [][]string{ 216 {"a", "b", ""}, 217 {"c", "d", "e"}, 218 }, 219 TrimLeadingSpace: true, 220 }, { 221 Name: "ReadAllReuseRecord", 222 Input: "a,b\nc,d", 223 Output: [][]string{ 224 {"a", "b"}, 225 {"c", "d"}, 226 }, 227 ReuseRecord: true, 228 }, { 229 Name: "RecordLine1", // Issue 19019 230 Input: "a,\"b\nc\"d,e", 231 Error: &ParseError{RecordLine: 1, Line: 2, Column: 1, Err: ErrQuote}, 232 }, { 233 Name: "RecordLine2", 234 Input: "a,b\n\"d\n\n,e", 235 Error: &ParseError{RecordLine: 2, Line: 5, Column: 0, Err: ErrQuote}, 236 }, { 237 Name: "CRLFInQuotedField", // Issue 21201 238 Input: "\"Hello\r\nHi\"", 239 Output: [][]string{ 240 {"Hello\r\nHi"}, 241 }, 242 }, { 243 Name: "BinaryBlobField", // Issue 19410 244 Input: "x09\x41\xb4\x1c,aktau", 245 Output: [][]string{{"x09A\xb4\x1c", "aktau"}}, 246 }, { 247 Name: "TrailingCR", 248 Input: "field1,field2\r", 249 Output: [][]string{{"field1", "field2\r"}}, 250 }, { 251 Name: "NonASCIICommaAndComment", 252 Input: "a£b,c£ \td,e\n€ comment\n", 253 Output: [][]string{{"a", "b,c", "d,e"}}, 254 TrimLeadingSpace: true, 255 Comma: '£', 256 Comment: '€', 257 }, { 258 Name: "NonASCIICommaAndCommentWithQuotes", 259 Input: "a€\" b,\"€ c\nλ comment\n", 260 Output: [][]string{{"a", " b,", " c"}}, 261 Comma: '€', 262 Comment: 'λ', 263 }, { 264 // λ and θ start with the same byte. 265 // This tests that the parser doesn't confuse such characters. 266 Name: "NonASCIICommaConfusion", 267 Input: "\"abθcd\"λefθgh", 268 Output: [][]string{{"abθcd", "efθgh"}}, 269 Comma: 'λ', 270 Comment: '€', 271 }, { 272 Name: "NonASCIICommentConfusion", 273 Input: "λ\nλ\nθ\nλ\n", 274 Output: [][]string{{"λ"}, {"λ"}, {"λ"}}, 275 Comment: 'θ', 276 }, { 277 Name: "QuotedFieldMultipleLF", 278 Input: "\"\n\n\n\n\"", 279 Output: [][]string{{"\n\n\n\n"}}, 280 }, { 281 Name: "MultipleCRLF", 282 Input: "\r\n\r\n\r\n\r\n", 283 }, { 284 // The implementation may read each line in several chunks if it doesn't fit entirely 285 // in the read buffer, so we should test the code to handle that condition. 286 Name: "HugeLines", 287 Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000), 288 Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}}, 289 Comment: '#', 290 }, { 291 Name: "QuoteWithTrailingCRLF", 292 Input: "\"foo\"bar\"\r\n", 293 Error: &ParseError{RecordLine: 1, Line: 1, Column: 4, Err: ErrQuote}, 294 }, { 295 Name: "LazyQuoteWithTrailingCRLF", 296 Input: "\"foo\"bar\"\r\n", 297 Output: [][]string{{`foo"bar`}}, 298 LazyQuotes: true, 299 }, { 300 Name: "DoubleQuoteWithTrailingCRLF", 301 Input: "\"foo\"\"bar\"\r\n", 302 Output: [][]string{{`foo"bar`}}, 303 }, { 304 Name: "EvenQuotes", 305 Input: `""""""""`, 306 Output: [][]string{{`"""`}}, 307 }, { 308 Name: "OddQuotes", 309 Input: `"""""""`, 310 Error: &ParseError{RecordLine: 1, Line: 1, Column: 7, Err: ErrQuote}, 311 }, { 312 Name: "LazyOddQuotes", 313 Input: `"""""""`, 314 Output: [][]string{{`"""`}}, 315 LazyQuotes: true, 316 }, { 317 Name: "BadComma1", 318 Comma: '\n', 319 Error: errInvalidDelim, 320 }, { 321 Name: "BadComma2", 322 Comma: '\r', 323 Error: errInvalidDelim, 324 }, { 325 Name: "BadComma3", 326 Comma: utf8.RuneError, 327 Error: errInvalidDelim, 328 }, { 329 Name: "BadComment1", 330 Comment: '\n', 331 Error: errInvalidDelim, 332 }, { 333 Name: "BadComment2", 334 Comment: '\r', 335 Error: errInvalidDelim, 336 }, { 337 Name: "BadComment3", 338 Comment: utf8.RuneError, 339 Error: errInvalidDelim, 340 }, { 341 Name: "BadCommaComment", 342 Comma: 'X', 343 Comment: 'X', 344 Error: errInvalidDelim, 345 }} 346 347 for _, tt := range tests { 348 t.Run(tt.Name, func(t *testing.T) { 349 r := NewReader(strings.NewReader(tt.Input)) 350 351 if tt.Comma != 0 { 352 r.Comma = tt.Comma 353 } 354 r.Comment = tt.Comment 355 if tt.UseFieldsPerRecord { 356 r.FieldsPerRecord = tt.FieldsPerRecord 357 } else { 358 r.FieldsPerRecord = -1 359 } 360 r.LazyQuotes = tt.LazyQuotes 361 r.TrimLeadingSpace = tt.TrimLeadingSpace 362 r.ReuseRecord = tt.ReuseRecord 363 364 out, err := r.ReadAll() 365 if !reflect.DeepEqual(err, tt.Error) { 366 t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error) 367 } else if !reflect.DeepEqual(out, tt.Output) { 368 t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output) 369 } 370 }) 371 } 372 } 373 374 // nTimes is an io.Reader which yields the string s n times. 375 type nTimes struct { 376 s string 377 n int 378 off int 379 } 380 381 func (r *nTimes) Read(p []byte) (n int, err error) { 382 for { 383 if r.n <= 0 || r.s == "" { 384 return n, io.EOF 385 } 386 n0 := copy(p, r.s[r.off:]) 387 p = p[n0:] 388 n += n0 389 r.off += n0 390 if r.off == len(r.s) { 391 r.off = 0 392 r.n-- 393 } 394 if len(p) == 0 { 395 return 396 } 397 } 398 } 399 400 // benchmarkRead measures reading the provided CSV rows data. 401 // initReader, if non-nil, modifies the Reader before it's used. 402 func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) { 403 b.ReportAllocs() 404 r := NewReader(&nTimes{s: rows, n: b.N}) 405 if initReader != nil { 406 initReader(r) 407 } 408 for { 409 _, err := r.Read() 410 if err == io.EOF { 411 break 412 } 413 if err != nil { 414 b.Fatal(err) 415 } 416 } 417 } 418 419 const benchmarkCSVData = `x,y,z,w 420 x,y,z, 421 x,y,, 422 x,,, 423 ,,, 424 "x","y","z","w" 425 "x","y","z","" 426 "x","y","","" 427 "x","","","" 428 "","","","" 429 ` 430 431 func BenchmarkRead(b *testing.B) { 432 benchmarkRead(b, nil, benchmarkCSVData) 433 } 434 435 func BenchmarkReadWithFieldsPerRecord(b *testing.B) { 436 benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData) 437 } 438 439 func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) { 440 benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData) 441 } 442 443 func BenchmarkReadLargeFields(b *testing.B) { 444 benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 445 xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv 446 ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 447 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 448 `, 3)) 449 } 450 451 func BenchmarkReadReuseRecord(b *testing.B) { 452 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData) 453 } 454 455 func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) { 456 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData) 457 } 458 459 func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) { 460 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData) 461 } 462 463 func BenchmarkReadReuseRecordLargeFields(b *testing.B) { 464 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 465 xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv 466 ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 467 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 468 `, 3)) 469 }