github.com/flyinox/gosm@v0.0.0-20171117061539-16768cb62077/src/encoding/csv/reader_test.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package csv 6 7 import ( 8 "io" 9 "reflect" 10 "strings" 11 "testing" 12 ) 13 14 var readTests = []struct { 15 Name string 16 Input string 17 Output [][]string 18 UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 19 20 // These fields are copied into the Reader 21 Comma rune 22 Comment rune 23 FieldsPerRecord int 24 LazyQuotes bool 25 TrailingComma bool 26 TrimLeadingSpace bool 27 ReuseRecord bool 28 29 Error string 30 Line int // Expected error line if != 0 31 Column int // Expected error column if line != 0 32 }{ 33 { 34 Name: "Simple", 35 Input: "a,b,c\n", 36 Output: [][]string{{"a", "b", "c"}}, 37 }, 38 { 39 Name: "CRLF", 40 Input: "a,b\r\nc,d\r\n", 41 Output: [][]string{{"a", "b"}, {"c", "d"}}, 42 }, 43 { 44 Name: "BareCR", 45 Input: "a,b\rc,d\r\n", 46 Output: [][]string{{"a", "b\rc", "d"}}, 47 }, 48 { 49 Name: "RFC4180test", 50 UseFieldsPerRecord: true, 51 Input: `#field1,field2,field3 52 "aaa","bb 53 b","ccc" 54 "a,a","b""bb","ccc" 55 zzz,yyy,xxx 56 `, 57 Output: [][]string{ 58 {"#field1", "field2", "field3"}, 59 {"aaa", "bb\nb", "ccc"}, 60 {"a,a", `b"bb`, "ccc"}, 61 {"zzz", "yyy", "xxx"}, 62 }, 63 }, 64 { 65 Name: "NoEOLTest", 66 Input: "a,b,c", 67 Output: [][]string{{"a", "b", "c"}}, 68 }, 69 { 70 Name: "Semicolon", 71 Comma: ';', 72 Input: "a;b;c\n", 73 Output: [][]string{{"a", "b", "c"}}, 74 }, 75 { 76 Name: "MultiLine", 77 Input: `"two 78 line","one line","three 79 line 80 field"`, 81 Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, 82 }, 83 { 84 Name: "BlankLine", 85 Input: "a,b,c\n\nd,e,f\n\n", 86 Output: [][]string{ 87 {"a", "b", "c"}, 88 {"d", "e", "f"}, 89 }, 90 }, 91 { 92 Name: "BlankLineFieldCount", 93 Input: "a,b,c\n\nd,e,f\n\n", 94 UseFieldsPerRecord: true, 95 Output: [][]string{ 96 {"a", "b", "c"}, 97 {"d", "e", "f"}, 98 }, 99 }, 100 { 101 Name: "TrimSpace", 102 Input: " a, b, c\n", 103 TrimLeadingSpace: true, 104 Output: [][]string{{"a", "b", "c"}}, 105 }, 106 { 107 Name: "LeadingSpace", 108 Input: " a, b, c\n", 109 Output: [][]string{{" a", " b", " c"}}, 110 }, 111 { 112 Name: "Comment", 113 Comment: '#', 114 Input: "#1,2,3\na,b,c\n#comment", 115 Output: [][]string{{"a", "b", "c"}}, 116 }, 117 { 118 Name: "NoComment", 119 Input: "#1,2,3\na,b,c", 120 Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, 121 }, 122 { 123 Name: "LazyQuotes", 124 LazyQuotes: true, 125 Input: `a "word","1"2",a","b`, 126 Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, 127 }, 128 { 129 Name: "BareQuotes", 130 LazyQuotes: true, 131 Input: `a "word","1"2",a"`, 132 Output: [][]string{{`a "word"`, `1"2`, `a"`}}, 133 }, 134 { 135 Name: "BareDoubleQuotes", 136 LazyQuotes: true, 137 Input: `a""b,c`, 138 Output: [][]string{{`a""b`, `c`}}, 139 }, 140 { 141 Name: "BadDoubleQuotes", 142 Input: `a""b,c`, 143 Error: `bare " in non-quoted-field`, Line: 1, Column: 1, 144 }, 145 { 146 Name: "TrimQuote", 147 Input: ` "a"," b",c`, 148 TrimLeadingSpace: true, 149 Output: [][]string{{"a", " b", "c"}}, 150 }, 151 { 152 Name: "BadBareQuote", 153 Input: `a "word","b"`, 154 Error: `bare " in non-quoted-field`, Line: 1, Column: 2, 155 }, 156 { 157 Name: "BadTrailingQuote", 158 Input: `"a word",b"`, 159 Error: `bare " in non-quoted-field`, Line: 1, Column: 10, 160 }, 161 { 162 Name: "ExtraneousQuote", 163 Input: `"a "word","b"`, 164 Error: `extraneous " in field`, Line: 1, Column: 3, 165 }, 166 { 167 Name: "BadFieldCount", 168 UseFieldsPerRecord: true, 169 Input: "a,b,c\nd,e", 170 Error: "wrong number of fields", Line: 2, 171 }, 172 { 173 Name: "BadFieldCount1", 174 UseFieldsPerRecord: true, 175 FieldsPerRecord: 2, 176 Input: `a,b,c`, 177 Error: "wrong number of fields", Line: 1, 178 }, 179 { 180 Name: "FieldCount", 181 Input: "a,b,c\nd,e", 182 Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, 183 }, 184 { 185 Name: "TrailingCommaEOF", 186 Input: "a,b,c,", 187 Output: [][]string{{"a", "b", "c", ""}}, 188 }, 189 { 190 Name: "TrailingCommaEOL", 191 Input: "a,b,c,\n", 192 Output: [][]string{{"a", "b", "c", ""}}, 193 }, 194 { 195 Name: "TrailingCommaSpaceEOF", 196 TrimLeadingSpace: true, 197 Input: "a,b,c, ", 198 Output: [][]string{{"a", "b", "c", ""}}, 199 }, 200 { 201 Name: "TrailingCommaSpaceEOL", 202 TrimLeadingSpace: true, 203 Input: "a,b,c, \n", 204 Output: [][]string{{"a", "b", "c", ""}}, 205 }, 206 { 207 Name: "TrailingCommaLine3", 208 TrimLeadingSpace: true, 209 Input: "a,b,c\nd,e,f\ng,hi,", 210 Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, 211 }, 212 { 213 Name: "NotTrailingComma3", 214 Input: "a,b,c, \n", 215 Output: [][]string{{"a", "b", "c", " "}}, 216 }, 217 { 218 Name: "CommaFieldTest", 219 TrailingComma: true, 220 Input: `x,y,z,w 221 x,y,z, 222 x,y,, 223 x,,, 224 ,,, 225 "x","y","z","w" 226 "x","y","z","" 227 "x","y","","" 228 "x","","","" 229 "","","","" 230 `, 231 Output: [][]string{ 232 {"x", "y", "z", "w"}, 233 {"x", "y", "z", ""}, 234 {"x", "y", "", ""}, 235 {"x", "", "", ""}, 236 {"", "", "", ""}, 237 {"x", "y", "z", "w"}, 238 {"x", "y", "z", ""}, 239 {"x", "y", "", ""}, 240 {"x", "", "", ""}, 241 {"", "", "", ""}, 242 }, 243 }, 244 { 245 Name: "TrailingCommaIneffective1", 246 TrailingComma: true, 247 TrimLeadingSpace: true, 248 Input: "a,b,\nc,d,e", 249 Output: [][]string{ 250 {"a", "b", ""}, 251 {"c", "d", "e"}, 252 }, 253 }, 254 { 255 Name: "TrailingCommaIneffective2", 256 TrailingComma: false, 257 TrimLeadingSpace: true, 258 Input: "a,b,\nc,d,e", 259 Output: [][]string{ 260 {"a", "b", ""}, 261 {"c", "d", "e"}, 262 }, 263 }, 264 { 265 Name: "ReadAllReuseRecord", 266 ReuseRecord: true, 267 Input: "a,b\nc,d", 268 Output: [][]string{ 269 {"a", "b"}, 270 {"c", "d"}, 271 }, 272 }, 273 } 274 275 func TestRead(t *testing.T) { 276 for _, tt := range readTests { 277 r := NewReader(strings.NewReader(tt.Input)) 278 r.Comment = tt.Comment 279 if tt.UseFieldsPerRecord { 280 r.FieldsPerRecord = tt.FieldsPerRecord 281 } else { 282 r.FieldsPerRecord = -1 283 } 284 r.LazyQuotes = tt.LazyQuotes 285 r.TrailingComma = tt.TrailingComma 286 r.TrimLeadingSpace = tt.TrimLeadingSpace 287 r.ReuseRecord = tt.ReuseRecord 288 if tt.Comma != 0 { 289 r.Comma = tt.Comma 290 } 291 out, err := r.ReadAll() 292 perr, _ := err.(*ParseError) 293 if tt.Error != "" { 294 if err == nil || !strings.Contains(err.Error(), tt.Error) { 295 t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error) 296 } else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) { 297 t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column) 298 } 299 } else if err != nil { 300 t.Errorf("%s: unexpected error %v", tt.Name, err) 301 } else if !reflect.DeepEqual(out, tt.Output) { 302 t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output) 303 } 304 } 305 } 306 307 // nTimes is an io.Reader which yields the string s n times. 308 type nTimes struct { 309 s string 310 n int 311 off int 312 } 313 314 func (r *nTimes) Read(p []byte) (n int, err error) { 315 for { 316 if r.n <= 0 || r.s == "" { 317 return n, io.EOF 318 } 319 n0 := copy(p, r.s[r.off:]) 320 p = p[n0:] 321 n += n0 322 r.off += n0 323 if r.off == len(r.s) { 324 r.off = 0 325 r.n-- 326 } 327 if len(p) == 0 { 328 return 329 } 330 } 331 } 332 333 // benchmarkRead measures reading the provided CSV rows data. 334 // initReader, if non-nil, modifies the Reader before it's used. 335 func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) { 336 b.ReportAllocs() 337 r := NewReader(&nTimes{s: rows, n: b.N}) 338 if initReader != nil { 339 initReader(r) 340 } 341 for { 342 _, err := r.Read() 343 if err == io.EOF { 344 break 345 } 346 if err != nil { 347 b.Fatal(err) 348 } 349 } 350 } 351 352 const benchmarkCSVData = `x,y,z,w 353 x,y,z, 354 x,y,, 355 x,,, 356 ,,, 357 "x","y","z","w" 358 "x","y","z","" 359 "x","y","","" 360 "x","","","" 361 "","","","" 362 ` 363 364 func BenchmarkRead(b *testing.B) { 365 benchmarkRead(b, nil, benchmarkCSVData) 366 } 367 368 func BenchmarkReadWithFieldsPerRecord(b *testing.B) { 369 benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData) 370 } 371 372 func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) { 373 benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData) 374 } 375 376 func BenchmarkReadLargeFields(b *testing.B) { 377 benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 378 xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv 379 ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 380 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 381 `, 3)) 382 } 383 384 func BenchmarkReadReuseRecord(b *testing.B) { 385 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData) 386 } 387 388 func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) { 389 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData) 390 } 391 392 func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) { 393 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData) 394 } 395 396 func BenchmarkReadReuseRecordLargeFields(b *testing.B) { 397 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 398 xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv 399 ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 400 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 401 `, 3)) 402 }