github.com/kdevb0x/go@v0.0.0-20180115030120-39687051e9e7/src/encoding/csv/reader_test.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package csv 6 7 import ( 8 "io" 9 "reflect" 10 "strings" 11 "testing" 12 "unicode/utf8" 13 ) 14 15 func TestRead(t *testing.T) { 16 tests := []struct { 17 Name string 18 Input string 19 Output [][]string 20 Error error 21 22 // These fields are copied into the Reader 23 Comma rune 24 Comment rune 25 UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 26 FieldsPerRecord int 27 LazyQuotes bool 28 TrimLeadingSpace bool 29 ReuseRecord bool 30 }{{ 31 Name: "Simple", 32 Input: "a,b,c\n", 33 Output: [][]string{{"a", "b", "c"}}, 34 }, { 35 Name: "CRLF", 36 Input: "a,b\r\nc,d\r\n", 37 Output: [][]string{{"a", "b"}, {"c", "d"}}, 38 }, { 39 Name: "BareCR", 40 Input: "a,b\rc,d\r\n", 41 Output: [][]string{{"a", "b\rc", "d"}}, 42 }, { 43 Name: "RFC4180test", 44 Input: `#field1,field2,field3 45 "aaa","bb 46 b","ccc" 47 "a,a","b""bb","ccc" 48 zzz,yyy,xxx 49 `, 50 Output: [][]string{ 51 {"#field1", "field2", "field3"}, 52 {"aaa", "bb\nb", "ccc"}, 53 {"a,a", `b"bb`, "ccc"}, 54 {"zzz", "yyy", "xxx"}, 55 }, 56 UseFieldsPerRecord: true, 57 FieldsPerRecord: 0, 58 }, { 59 Name: "NoEOLTest", 60 Input: "a,b,c", 61 Output: [][]string{{"a", "b", "c"}}, 62 }, { 63 Name: "Semicolon", 64 Input: "a;b;c\n", 65 Output: [][]string{{"a", "b", "c"}}, 66 Comma: ';', 67 }, { 68 Name: "MultiLine", 69 Input: `"two 70 line","one line","three 71 line 72 field"`, 73 Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, 74 }, { 75 Name: "BlankLine", 76 Input: "a,b,c\n\nd,e,f\n\n", 77 Output: [][]string{ 78 {"a", "b", "c"}, 79 {"d", "e", "f"}, 80 }, 81 }, { 82 Name: "BlankLineFieldCount", 83 Input: "a,b,c\n\nd,e,f\n\n", 84 Output: [][]string{ 85 {"a", "b", "c"}, 86 {"d", "e", "f"}, 87 }, 88 UseFieldsPerRecord: true, 89 FieldsPerRecord: 0, 90 }, { 91 Name: "TrimSpace", 92 Input: " a, b, c\n", 93 Output: [][]string{{"a", "b", "c"}}, 94 TrimLeadingSpace: true, 95 }, { 96 Name: "LeadingSpace", 97 Input: " a, b, c\n", 98 Output: [][]string{{" a", " b", " c"}}, 99 }, { 100 Name: "Comment", 101 Input: "#1,2,3\na,b,c\n#comment", 102 Output: [][]string{{"a", "b", "c"}}, 103 Comment: '#', 104 }, { 105 Name: "NoComment", 106 Input: "#1,2,3\na,b,c", 107 Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, 108 }, { 109 Name: "LazyQuotes", 110 Input: `a "word","1"2",a","b`, 111 Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, 112 LazyQuotes: true, 113 }, { 114 Name: "BareQuotes", 115 Input: `a "word","1"2",a"`, 116 Output: [][]string{{`a "word"`, `1"2`, `a"`}}, 117 LazyQuotes: true, 118 }, { 119 Name: "BareDoubleQuotes", 120 Input: `a""b,c`, 121 Output: [][]string{{`a""b`, `c`}}, 122 LazyQuotes: true, 123 }, { 124 Name: "BadDoubleQuotes", 125 Input: `a""b,c`, 126 Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote}, 127 }, { 128 Name: "TrimQuote", 129 Input: ` "a"," b",c`, 130 Output: [][]string{{"a", " b", "c"}}, 131 TrimLeadingSpace: true, 132 }, { 133 Name: "BadBareQuote", 134 Input: `a "word","b"`, 135 Error: &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote}, 136 }, { 137 Name: "BadTrailingQuote", 138 Input: `"a word",b"`, 139 Error: &ParseError{StartLine: 1, Line: 1, Column: 10, Err: ErrBareQuote}, 140 }, { 141 Name: "ExtraneousQuote", 142 Input: `"a "word","b"`, 143 Error: &ParseError{StartLine: 1, Line: 1, Column: 3, Err: ErrQuote}, 144 }, { 145 Name: "BadFieldCount", 146 Input: "a,b,c\nd,e", 147 Error: &ParseError{StartLine: 2, Line: 2, Err: ErrFieldCount}, 148 UseFieldsPerRecord: true, 149 FieldsPerRecord: 0, 150 }, { 151 Name: "BadFieldCount1", 152 Input: `a,b,c`, 153 Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount}, 154 UseFieldsPerRecord: true, 155 FieldsPerRecord: 2, 156 }, { 157 Name: "FieldCount", 158 Input: "a,b,c\nd,e", 159 Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, 160 }, { 161 Name: "TrailingCommaEOF", 162 Input: "a,b,c,", 163 Output: [][]string{{"a", "b", "c", ""}}, 164 }, { 165 Name: "TrailingCommaEOL", 166 Input: "a,b,c,\n", 167 Output: [][]string{{"a", "b", "c", ""}}, 168 }, { 169 Name: "TrailingCommaSpaceEOF", 170 Input: "a,b,c, ", 171 Output: [][]string{{"a", "b", "c", ""}}, 172 TrimLeadingSpace: true, 173 }, { 174 Name: "TrailingCommaSpaceEOL", 175 Input: "a,b,c, \n", 176 Output: [][]string{{"a", "b", "c", ""}}, 177 TrimLeadingSpace: true, 178 }, { 179 Name: "TrailingCommaLine3", 180 Input: "a,b,c\nd,e,f\ng,hi,", 181 Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, 182 TrimLeadingSpace: true, 183 }, { 184 Name: "NotTrailingComma3", 185 Input: "a,b,c, \n", 186 Output: [][]string{{"a", "b", "c", " "}}, 187 }, { 188 Name: "CommaFieldTest", 189 Input: `x,y,z,w 190 x,y,z, 191 x,y,, 192 x,,, 193 ,,, 194 "x","y","z","w" 195 "x","y","z","" 196 "x","y","","" 197 "x","","","" 198 "","","","" 199 `, 200 Output: [][]string{ 201 {"x", "y", "z", "w"}, 202 {"x", "y", "z", ""}, 203 {"x", "y", "", ""}, 204 {"x", "", "", ""}, 205 {"", "", "", ""}, 206 {"x", "y", "z", "w"}, 207 {"x", "y", "z", ""}, 208 {"x", "y", "", ""}, 209 {"x", "", "", ""}, 210 {"", "", "", ""}, 211 }, 212 }, { 213 Name: "TrailingCommaIneffective1", 214 Input: "a,b,\nc,d,e", 215 Output: [][]string{ 216 {"a", "b", ""}, 217 {"c", "d", "e"}, 218 }, 219 TrimLeadingSpace: true, 220 }, { 221 Name: "ReadAllReuseRecord", 222 Input: "a,b\nc,d", 223 Output: [][]string{ 224 {"a", "b"}, 225 {"c", "d"}, 226 }, 227 ReuseRecord: true, 228 }, { 229 Name: "StartLine1", // Issue 19019 230 Input: "a,\"b\nc\"d,e", 231 Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote}, 232 }, { 233 Name: "StartLine2", 234 Input: "a,b\n\"d\n\n,e", 235 Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote}, 236 }, { 237 Name: "CRLFInQuotedField", // Issue 21201 238 Input: "A,\"Hello\r\nHi\",B\r\n", 239 Output: [][]string{ 240 {"A", "Hello\nHi", "B"}, 241 }, 242 }, { 243 Name: "BinaryBlobField", // Issue 19410 244 Input: "x09\x41\xb4\x1c,aktau", 245 Output: [][]string{{"x09A\xb4\x1c", "aktau"}}, 246 }, { 247 Name: "TrailingCR", 248 Input: "field1,field2\r", 249 Output: [][]string{{"field1", "field2"}}, 250 }, { 251 Name: "QuotedTrailingCR", 252 Input: "\"field\"\r", 253 Output: [][]string{{"field"}}, 254 }, { 255 Name: "QuotedTrailingCRCR", 256 Input: "\"field\"\r\r", 257 Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote}, 258 }, { 259 Name: "FieldCR", 260 Input: "field\rfield\r", 261 Output: [][]string{{"field\rfield"}}, 262 }, { 263 Name: "FieldCRCR", 264 Input: "field\r\rfield\r\r", 265 Output: [][]string{{"field\r\rfield\r"}}, 266 }, { 267 Name: "FieldCRCRLF", 268 Input: "field\r\r\nfield\r\r\n", 269 Output: [][]string{{"field\r"}, {"field\r"}}, 270 }, { 271 Name: "FieldCRCRLFCR", 272 Input: "field\r\r\n\rfield\r\r\n\r", 273 Output: [][]string{{"field\r"}, {"\rfield\r"}}, 274 }, { 275 Name: "FieldCRCRLFCRCR", 276 Input: "field\r\r\n\r\rfield\r\r\n\r\r", 277 Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}}, 278 }, { 279 Name: "MultiFieldCRCRLFCRCR", 280 Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,", 281 Output: [][]string{ 282 {"field1", "field2\r"}, 283 {"\r\rfield1", "field2\r"}, 284 {"\r\r", ""}, 285 }, 286 }, { 287 Name: "NonASCIICommaAndComment", 288 Input: "a£b,c£ \td,e\n€ comment\n", 289 Output: [][]string{{"a", "b,c", "d,e"}}, 290 TrimLeadingSpace: true, 291 Comma: '£', 292 Comment: '€', 293 }, { 294 Name: "NonASCIICommaAndCommentWithQuotes", 295 Input: "a€\" b,\"€ c\nλ comment\n", 296 Output: [][]string{{"a", " b,", " c"}}, 297 Comma: '€', 298 Comment: 'λ', 299 }, { 300 // λ and θ start with the same byte. 301 // This tests that the parser doesn't confuse such characters. 302 Name: "NonASCIICommaConfusion", 303 Input: "\"abθcd\"λefθgh", 304 Output: [][]string{{"abθcd", "efθgh"}}, 305 Comma: 'λ', 306 Comment: '€', 307 }, { 308 Name: "NonASCIICommentConfusion", 309 Input: "λ\nλ\nθ\nλ\n", 310 Output: [][]string{{"λ"}, {"λ"}, {"λ"}}, 311 Comment: 'θ', 312 }, { 313 Name: "QuotedFieldMultipleLF", 314 Input: "\"\n\n\n\n\"", 315 Output: [][]string{{"\n\n\n\n"}}, 316 }, { 317 Name: "MultipleCRLF", 318 Input: "\r\n\r\n\r\n\r\n", 319 }, { 320 // The implementation may read each line in several chunks if it doesn't fit entirely 321 // in the read buffer, so we should test the code to handle that condition. 322 Name: "HugeLines", 323 Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000), 324 Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}}, 325 Comment: '#', 326 }, { 327 Name: "QuoteWithTrailingCRLF", 328 Input: "\"foo\"bar\"\r\n", 329 Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote}, 330 }, { 331 Name: "LazyQuoteWithTrailingCRLF", 332 Input: "\"foo\"bar\"\r\n", 333 Output: [][]string{{`foo"bar`}}, 334 LazyQuotes: true, 335 }, { 336 Name: "DoubleQuoteWithTrailingCRLF", 337 Input: "\"foo\"\"bar\"\r\n", 338 Output: [][]string{{`foo"bar`}}, 339 }, { 340 Name: "EvenQuotes", 341 Input: `""""""""`, 342 Output: [][]string{{`"""`}}, 343 }, { 344 Name: "OddQuotes", 345 Input: `"""""""`, 346 Error: &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote}, 347 }, { 348 Name: "LazyOddQuotes", 349 Input: `"""""""`, 350 Output: [][]string{{`"""`}}, 351 LazyQuotes: true, 352 }, { 353 Name: "BadComma1", 354 Comma: '\n', 355 Error: errInvalidDelim, 356 }, { 357 Name: "BadComma2", 358 Comma: '\r', 359 Error: errInvalidDelim, 360 }, { 361 Name: "BadComma3", 362 Comma: utf8.RuneError, 363 Error: errInvalidDelim, 364 }, { 365 Name: "BadComment1", 366 Comment: '\n', 367 Error: errInvalidDelim, 368 }, { 369 Name: "BadComment2", 370 Comment: '\r', 371 Error: errInvalidDelim, 372 }, { 373 Name: "BadComment3", 374 Comment: utf8.RuneError, 375 Error: errInvalidDelim, 376 }, { 377 Name: "BadCommaComment", 378 Comma: 'X', 379 Comment: 'X', 380 Error: errInvalidDelim, 381 }} 382 383 for _, tt := range tests { 384 t.Run(tt.Name, func(t *testing.T) { 385 r := NewReader(strings.NewReader(tt.Input)) 386 387 if tt.Comma != 0 { 388 r.Comma = tt.Comma 389 } 390 r.Comment = tt.Comment 391 if tt.UseFieldsPerRecord { 392 r.FieldsPerRecord = tt.FieldsPerRecord 393 } else { 394 r.FieldsPerRecord = -1 395 } 396 r.LazyQuotes = tt.LazyQuotes 397 r.TrimLeadingSpace = tt.TrimLeadingSpace 398 r.ReuseRecord = tt.ReuseRecord 399 400 out, err := r.ReadAll() 401 if !reflect.DeepEqual(err, tt.Error) { 402 t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error) 403 } else if !reflect.DeepEqual(out, tt.Output) { 404 t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output) 405 } 406 }) 407 } 408 } 409 410 // nTimes is an io.Reader which yields the string s n times. 411 type nTimes struct { 412 s string 413 n int 414 off int 415 } 416 417 func (r *nTimes) Read(p []byte) (n int, err error) { 418 for { 419 if r.n <= 0 || r.s == "" { 420 return n, io.EOF 421 } 422 n0 := copy(p, r.s[r.off:]) 423 p = p[n0:] 424 n += n0 425 r.off += n0 426 if r.off == len(r.s) { 427 r.off = 0 428 r.n-- 429 } 430 if len(p) == 0 { 431 return 432 } 433 } 434 } 435 436 // benchmarkRead measures reading the provided CSV rows data. 437 // initReader, if non-nil, modifies the Reader before it's used. 438 func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) { 439 b.ReportAllocs() 440 r := NewReader(&nTimes{s: rows, n: b.N}) 441 if initReader != nil { 442 initReader(r) 443 } 444 for { 445 _, err := r.Read() 446 if err == io.EOF { 447 break 448 } 449 if err != nil { 450 b.Fatal(err) 451 } 452 } 453 } 454 455 const benchmarkCSVData = `x,y,z,w 456 x,y,z, 457 x,y,, 458 x,,, 459 ,,, 460 "x","y","z","w" 461 "x","y","z","" 462 "x","y","","" 463 "x","","","" 464 "","","","" 465 ` 466 467 func BenchmarkRead(b *testing.B) { 468 benchmarkRead(b, nil, benchmarkCSVData) 469 } 470 471 func BenchmarkReadWithFieldsPerRecord(b *testing.B) { 472 benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData) 473 } 474 475 func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) { 476 benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData) 477 } 478 479 func BenchmarkReadLargeFields(b *testing.B) { 480 benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 481 xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv 482 ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 483 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 484 `, 3)) 485 } 486 487 func BenchmarkReadReuseRecord(b *testing.B) { 488 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData) 489 } 490 491 func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) { 492 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData) 493 } 494 495 func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) { 496 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData) 497 } 498 499 func BenchmarkReadReuseRecordLargeFields(b *testing.B) { 500 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 501 xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv 502 ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 503 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 504 `, 3)) 505 }