github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/encoding/csv/reader_test.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package csv 6 7 import ( 8 "io" 9 "reflect" 10 "strings" 11 "testing" 12 ) 13 14 var readTests = []struct { 15 Name string 16 Input string 17 Output [][]string 18 UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 19 20 // These fields are copied into the Reader 21 Comma rune 22 Comment rune 23 FieldsPerRecord int 24 LazyQuotes bool 25 TrailingComma bool 26 TrimLeadingSpace bool 27 ReuseRecord bool 28 29 Error string 30 Line int // Expected error line if != 0 31 Column int // Expected error column if line != 0 32 }{ 33 { 34 Name: "Simple", 35 Input: "a,b,c\n", 36 Output: [][]string{{"a", "b", "c"}}, 37 }, 38 { 39 Name: "CRLF", 40 Input: "a,b\r\nc,d\r\n", 41 Output: [][]string{{"a", "b"}, {"c", "d"}}, 42 }, 43 { 44 Name: "BareCR", 45 Input: "a,b\rc,d\r\n", 46 Output: [][]string{{"a", "b\rc", "d"}}, 47 }, 48 { 49 Name: "RFC4180test", 50 UseFieldsPerRecord: true, 51 Input: `#field1,field2,field3 52 "aaa","bb 53 b","ccc" 54 "a,a","b""bb","ccc" 55 zzz,yyy,xxx 56 `, 57 Output: [][]string{ 58 {"#field1", "field2", "field3"}, 59 {"aaa", "bb\nb", "ccc"}, 60 {"a,a", `b"bb`, "ccc"}, 61 {"zzz", "yyy", "xxx"}, 62 }, 63 }, 64 { 65 Name: "NoEOLTest", 66 Input: "a,b,c", 67 Output: [][]string{{"a", "b", "c"}}, 68 }, 69 { 70 Name: "Semicolon", 71 Comma: ';', 72 Input: "a;b;c\n", 73 Output: [][]string{{"a", "b", "c"}}, 74 }, 75 { 76 Name: "MultiLine", 77 Input: `"two 78 line","one line","three 79 line 80 field"`, 81 Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, 82 }, 83 { 84 Name: "BlankLine", 85 Input: "a,b,c\n\nd,e,f\n\n", 86 Output: [][]string{ 87 {"a", "b", "c"}, 88 {"d", "e", "f"}, 89 }, 90 }, 91 { 92 Name: "BlankLineFieldCount", 93 Input: "a,b,c\n\nd,e,f\n\n", 94 UseFieldsPerRecord: true, 95 Output: [][]string{ 96 {"a", "b", "c"}, 97 {"d", "e", "f"}, 98 }, 99 }, 100 { 101 Name: "TrimSpace", 102 Input: " a, b, c\n", 103 TrimLeadingSpace: true, 104 Output: [][]string{{"a", "b", "c"}}, 105 }, 106 { 107 Name: "LeadingSpace", 108 Input: " a, b, c\n", 109 Output: [][]string{{" a", " b", " c"}}, 110 }, 111 { 112 Name: "Comment", 113 Comment: '#', 114 Input: "#1,2,3\na,b,c\n#comment", 115 Output: [][]string{{"a", "b", "c"}}, 116 }, 117 { 118 Name: "NoComment", 119 Input: "#1,2,3\na,b,c", 120 Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, 121 }, 122 { 123 Name: "LazyQuotes", 124 LazyQuotes: true, 125 Input: `a "word","1"2",a","b`, 126 Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, 127 }, 128 { 129 Name: "BareQuotes", 130 LazyQuotes: true, 131 Input: `a "word","1"2",a"`, 132 Output: [][]string{{`a "word"`, `1"2`, `a"`}}, 133 }, 134 { 135 Name: "BareDoubleQuotes", 136 LazyQuotes: true, 137 Input: `a""b,c`, 138 Output: [][]string{{`a""b`, `c`}}, 139 }, 140 { 141 Name: "BadDoubleQuotes", 142 Input: `a""b,c`, 143 Error: `bare " in non-quoted-field`, Line: 1, Column: 1, 144 }, 145 { 146 Name: "TrimQuote", 147 Input: ` "a"," b",c`, 148 TrimLeadingSpace: true, 149 Output: [][]string{{"a", " b", "c"}}, 150 }, 151 { 152 Name: "BadBareQuote", 153 Input: `a "word","b"`, 154 Error: `bare " in non-quoted-field`, Line: 1, Column: 2, 155 }, 156 { 157 Name: "BadTrailingQuote", 158 Input: `"a word",b"`, 159 Error: `bare " in non-quoted-field`, Line: 1, Column: 10, 160 }, 161 { 162 Name: "ExtraneousQuote", 163 Input: `"a "word","b"`, 164 Error: `extraneous " in field`, Line: 1, Column: 3, 165 }, 166 { 167 Name: "BadFieldCount", 168 UseFieldsPerRecord: true, 169 Input: "a,b,c\nd,e", 170 Error: "wrong number of fields", Line: 2, 171 }, 172 { 173 Name: "BadFieldCount1", 174 UseFieldsPerRecord: true, 175 FieldsPerRecord: 2, 176 Input: `a,b,c`, 177 Error: "wrong number of fields", Line: 1, 178 }, 179 { 180 Name: "FieldCount", 181 Input: "a,b,c\nd,e", 182 Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, 183 }, 184 { 185 Name: "TrailingCommaEOF", 186 Input: "a,b,c,", 187 Output: [][]string{{"a", "b", "c", ""}}, 188 }, 189 { 190 Name: "TrailingCommaEOL", 191 Input: "a,b,c,\n", 192 Output: [][]string{{"a", "b", "c", ""}}, 193 }, 194 { 195 Name: "TrailingCommaSpaceEOF", 196 TrimLeadingSpace: true, 197 Input: "a,b,c, ", 198 Output: [][]string{{"a", "b", "c", ""}}, 199 }, 200 { 201 Name: "TrailingCommaSpaceEOL", 202 TrimLeadingSpace: true, 203 Input: "a,b,c, \n", 204 Output: [][]string{{"a", "b", "c", ""}}, 205 }, 206 { 207 Name: "TrailingCommaLine3", 208 TrimLeadingSpace: true, 209 Input: "a,b,c\nd,e,f\ng,hi,", 210 Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, 211 }, 212 { 213 Name: "NotTrailingComma3", 214 Input: "a,b,c, \n", 215 Output: [][]string{{"a", "b", "c", " "}}, 216 }, 217 { 218 Name: "CommaFieldTest", 219 TrailingComma: true, 220 Input: `x,y,z,w 221 x,y,z, 222 x,y,, 223 x,,, 224 ,,, 225 "x","y","z","w" 226 "x","y","z","" 227 "x","y","","" 228 "x","","","" 229 "","","","" 230 `, 231 Output: [][]string{ 232 {"x", "y", "z", "w"}, 233 {"x", "y", "z", ""}, 234 {"x", "y", "", ""}, 235 {"x", "", "", ""}, 236 {"", "", "", ""}, 237 {"x", "y", "z", "w"}, 238 {"x", "y", "z", ""}, 239 {"x", "y", "", ""}, 240 {"x", "", "", ""}, 241 {"", "", "", ""}, 242 }, 243 }, 244 { 245 Name: "TrailingCommaIneffective1", 246 TrailingComma: true, 247 TrimLeadingSpace: true, 248 Input: "a,b,\nc,d,e", 249 Output: [][]string{ 250 {"a", "b", ""}, 251 {"c", "d", "e"}, 252 }, 253 }, 254 { 255 Name: "TrailingCommaIneffective2", 256 TrailingComma: false, 257 TrimLeadingSpace: true, 258 Input: "a,b,\nc,d,e", 259 Output: [][]string{ 260 {"a", "b", ""}, 261 {"c", "d", "e"}, 262 }, 263 }, 264 { 265 Name: "ReadAllReuseRecord", 266 ReuseRecord: true, 267 Input: "a,b\nc,d", 268 Output: [][]string{ 269 {"a", "b"}, 270 {"c", "d"}, 271 }, 272 }, 273 { // issue 19019 274 Name: "RecordLine1", 275 Input: "a,\"b\nc\"d,e", 276 Error: `extraneous " in field`, 277 Line: 1, 278 Column: 1, 279 }, 280 { 281 Name: "RecordLine2", 282 Input: "a,b\n\"d\n\n,e", 283 Error: `extraneous " in field`, 284 Line: 2, 285 Column: 2, 286 }, 287 { // issue 21201 288 Name: "CRLFInQuotedField", 289 Input: "\"Hello\r\nHi\"", 290 Output: [][]string{ 291 {"Hello\r\nHi"}, 292 }, 293 }, 294 } 295 296 func TestRead(t *testing.T) { 297 for _, tt := range readTests { 298 r := NewReader(strings.NewReader(tt.Input)) 299 r.Comment = tt.Comment 300 if tt.UseFieldsPerRecord { 301 r.FieldsPerRecord = tt.FieldsPerRecord 302 } else { 303 r.FieldsPerRecord = -1 304 } 305 r.LazyQuotes = tt.LazyQuotes 306 r.TrailingComma = tt.TrailingComma 307 r.TrimLeadingSpace = tt.TrimLeadingSpace 308 r.ReuseRecord = tt.ReuseRecord 309 if tt.Comma != 0 { 310 r.Comma = tt.Comma 311 } 312 out, err := r.ReadAll() 313 perr, _ := err.(*ParseError) 314 if tt.Error != "" { 315 if err == nil || !strings.Contains(err.Error(), tt.Error) { 316 t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error) 317 } else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) { 318 t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column) 319 } 320 } else if err != nil { 321 t.Errorf("%s: unexpected error %v", tt.Name, err) 322 } else if !reflect.DeepEqual(out, tt.Output) { 323 t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output) 324 } 325 } 326 } 327 328 // nTimes is an io.Reader which yields the string s n times. 329 type nTimes struct { 330 s string 331 n int 332 off int 333 } 334 335 func (r *nTimes) Read(p []byte) (n int, err error) { 336 for { 337 if r.n <= 0 || r.s == "" { 338 return n, io.EOF 339 } 340 n0 := copy(p, r.s[r.off:]) 341 p = p[n0:] 342 n += n0 343 r.off += n0 344 if r.off == len(r.s) { 345 r.off = 0 346 r.n-- 347 } 348 if len(p) == 0 { 349 return 350 } 351 } 352 } 353 354 // benchmarkRead measures reading the provided CSV rows data. 355 // initReader, if non-nil, modifies the Reader before it's used. 356 func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) { 357 b.ReportAllocs() 358 r := NewReader(&nTimes{s: rows, n: b.N}) 359 if initReader != nil { 360 initReader(r) 361 } 362 for { 363 _, err := r.Read() 364 if err == io.EOF { 365 break 366 } 367 if err != nil { 368 b.Fatal(err) 369 } 370 } 371 } 372 373 const benchmarkCSVData = `x,y,z,w 374 x,y,z, 375 x,y,, 376 x,,, 377 ,,, 378 "x","y","z","w" 379 "x","y","z","" 380 "x","y","","" 381 "x","","","" 382 "","","","" 383 ` 384 385 func BenchmarkRead(b *testing.B) { 386 benchmarkRead(b, nil, benchmarkCSVData) 387 } 388 389 func BenchmarkReadWithFieldsPerRecord(b *testing.B) { 390 benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData) 391 } 392 393 func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) { 394 benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData) 395 } 396 397 func BenchmarkReadLargeFields(b *testing.B) { 398 benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 399 xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv 400 ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 401 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 402 `, 3)) 403 } 404 405 func BenchmarkReadReuseRecord(b *testing.B) { 406 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData) 407 } 408 409 func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) { 410 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData) 411 } 412 413 func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) { 414 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData) 415 } 416 417 func BenchmarkReadReuseRecordLargeFields(b *testing.B) { 418 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 419 xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv 420 ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 421 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 422 `, 3)) 423 }