github.com/grailbio/base@v0.0.11/tsv/reader_test.go (about) 1 package tsv_test 2 3 import ( 4 "bytes" 5 "fmt" 6 "io" 7 "testing" 8 9 "github.com/grailbio/base/tsv" 10 "github.com/grailbio/testutil/assert" 11 "github.com/grailbio/testutil/expect" 12 ) 13 14 func TestReadBool(t *testing.T) { 15 read := func(data string) bool { 16 type row struct { 17 Col0 bool 18 } 19 r := tsv.NewReader(bytes.NewReader([]byte("col0\n" + data))) 20 r.HasHeaderRow = true 21 var v row 22 expect.NoError(t, r.Read(&v)) 23 return v.Col0 24 } 25 26 expect.True(t, read("true")) 27 expect.False(t, read("false")) 28 expect.True(t, read("Y")) 29 expect.True(t, read("yes")) 30 expect.False(t, read("N")) 31 expect.False(t, read("no")) 32 } 33 34 func TestReadInt(t *testing.T) { 35 newReader := func() *tsv.Reader { 36 r := tsv.NewReader(bytes.NewReader([]byte(`col0 col1 37 0 0.5 38 `))) 39 r.HasHeaderRow = true 40 return r 41 } 42 43 { 44 type row struct { 45 Col0 int8 46 Col1 float32 47 } 48 r := newReader() 49 var v row 50 expect.NoError(t, r.Read(&v)) 51 expect.EQ(t, v, row{0, 0.5}) 52 } 53 54 { 55 type row struct { 56 Col0 int16 57 Col1 float64 58 } 59 r := newReader() 60 var v row 61 expect.NoError(t, r.Read(&v)) 62 expect.EQ(t, v, row{0, 0.5}) 63 } 64 65 { 66 type row struct { 67 Col0 int32 68 Col1 float64 69 } 70 r := newReader() 71 var v row 72 expect.NoError(t, r.Read(&v)) 73 expect.EQ(t, v, row{0, 0.5}) 74 } 75 { 76 type row struct { 77 Col0 int64 78 Col1 float64 79 } 80 r := newReader() 81 var v row 82 expect.NoError(t, r.Read(&v)) 83 expect.EQ(t, v, row{0, 0.5}) 84 } 85 { 86 type row struct { 87 Col0 int 88 Col1 float64 89 } 90 r := newReader() 91 var v row 92 expect.NoError(t, r.Read(&v)) 93 expect.EQ(t, v, row{0, 0.5}) 94 } 95 { 96 type row struct { 97 Col0 uint8 98 Col1 float32 99 } 100 r := newReader() 101 var v row 102 expect.NoError(t, r.Read(&v)) 103 expect.EQ(t, v, row{0, 0.5}) 104 } 105 106 { 107 type row struct { 108 Col0 uint16 109 Col1 float64 110 } 111 r := newReader() 112 var v row 113 expect.NoError(t, r.Read(&v)) 114 expect.EQ(t, v, row{0, 0.5}) 115 } 116 117 { 118 type row struct { 119 Col0 uint32 120 Col1 float64 121 } 122 r := newReader() 123 var v row 124 expect.NoError(t, r.Read(&v)) 125 expect.EQ(t, v, row{0, 0.5}) 126 } 127 } 128 129 func TestReadFmt(t *testing.T) { 130 r := tsv.NewReader(bytes.NewReader([]byte(`"""helloworld""" 05.20 true 0a`))) 131 type row struct { 132 ColA string `tsv:",fmt=q"` 133 ColB float64 `tsv:",fmt=1.2f"` 134 ColC bool `tsv:",fmt=t"` 135 ColD int `tsv:",fmt=x"` 136 } 137 var v row 138 assert.NoError(t, r.Read(&v)) 139 assert.EQ(t, v, row{`helloworld`, 5.2, true, 10}) 140 } 141 142 func TestReadFmtWithSpace(t *testing.T) { 143 r := tsv.NewReader(bytes.NewReader([]byte(`"hello world"`))) 144 type row struct { 145 ColA string `tsv:",fmt=s"` 146 } 147 var v row 148 expect.Regexp(t, r.Read(&v), "value with fmt option can not have whitespace") 149 } 150 151 func TestReadWithoutHeader(t *testing.T) { 152 type row struct { 153 ColA string 154 ColB int 155 } 156 r := tsv.NewReader(bytes.NewReader([]byte(`key1 2 157 key2 3 158 `))) 159 var v row 160 assert.NoError(t, r.Read(&v)) 161 expect.EQ(t, v, row{"key1", 2}) 162 assert.NoError(t, r.Read(&v)) 163 expect.EQ(t, v, row{"key2", 3}) 164 assert.EQ(t, r.Read(&v), io.EOF) 165 } 166 167 func TestReadSkipUnexportedFields(t *testing.T) { 168 type row struct { 169 colA string 170 colB int 171 ColC int `tsv:"col0"` 172 } 173 r := tsv.NewReader(bytes.NewReader([]byte(`key col0 col1 174 key0 1 0.5 175 key1 2 1.5 176 `))) 177 r.HasHeaderRow = true 178 r.UseHeaderNames = true 179 var v row 180 assert.NoError(t, r.Read(&v)) 181 expect.EQ(t, v, row{"", 0, 1}) 182 assert.NoError(t, r.Read(&v)) 183 expect.EQ(t, v, row{"", 0, 2}) 184 assert.EQ(t, r.Read(&v), io.EOF) 185 } 186 187 func TestReadEmbeddedStruct(t *testing.T) { 188 type embedded1 struct { 189 Col1 int `tsv:"col1"` 190 Col2 float64 `tsv:"col2_2,fmt=0.3f"` 191 } 192 type embedded2 struct { 193 Col2 float32 `tsv:"col2_1"` 194 } 195 type row struct { 196 Key string `tsv:"key"` 197 embedded1 198 embedded2 199 } 200 r := tsv.NewReader(bytes.NewReader([]byte(`key col2_1 col1 col2_2 201 key0 0.5 1 0.123 202 key1 1.5 2 0.789 203 `))) 204 r.HasHeaderRow = true 205 r.UseHeaderNames = true 206 var v row 207 assert.NoError(t, r.Read(&v)) 208 expect.EQ(t, v, row{"key0", embedded1{1, 0.123}, embedded2{0.5}}) 209 assert.NoError(t, r.Read(&v)) 210 expect.EQ(t, v, row{"key1", embedded1{2, 0.789}, embedded2{1.5}}) 211 assert.EQ(t, r.Read(&v), io.EOF) 212 } 213 214 func TestReadExtraColumns(t *testing.T) { 215 type row struct { 216 ColA string 217 ColB int 218 } 219 r := tsv.NewReader(bytes.NewReader([]byte(`key1 2 22 220 key2 3 33 221 `))) 222 r.RequireParseAllColumns = true 223 var v row 224 expect.Regexp(t, r.Read(&v), "extra columns found") 225 } 226 227 func TestReadDisallowExtraNamedColumns(t *testing.T) { 228 type row struct { 229 ColA string 230 ColB int 231 } 232 r := tsv.NewReader(bytes.NewReader([]byte(`ColA ColB ColC 233 key1 2 22 234 key2 3 33 235 `))) 236 r.HasHeaderRow = true 237 r.UseHeaderNames = true 238 r.RequireParseAllColumns = true 239 var v row 240 expect.Regexp(t, r.Read(&v), "number of columns found") 241 } 242 243 func TestReadMissingColumns(t *testing.T) { 244 type row struct { 245 ColA string 246 ColB int 247 } 248 r := tsv.NewReader(bytes.NewReader([]byte(`ColA 249 key1 250 key2 251 `))) 252 r.HasHeaderRow = true 253 r.UseHeaderNames = true 254 r.RequireParseAllColumns = true 255 var v row 256 expect.Regexp(t, r.Read(&v), "number of columns found") 257 } 258 259 func TestReadMismatchedColumns(t *testing.T) { 260 type row struct { 261 ColA string 262 ColB int 263 } 264 r := tsv.NewReader(bytes.NewReader([]byte(`ColA ColC 265 key1 2 266 key2 3 267 `))) 268 r.HasHeaderRow = true 269 r.UseHeaderNames = true 270 r.RequireParseAllColumns = true 271 var v row 272 expect.Regexp(t, r.Read(&v), "does not appear in the header") 273 } 274 275 func TestReadPartialStruct(t *testing.T) { 276 type row struct { 277 ColA string 278 ColB int 279 } 280 r := tsv.NewReader(bytes.NewReader([]byte(`ColA 281 key1 282 key2 283 `))) 284 r.HasHeaderRow = true 285 r.UseHeaderNames = true 286 r.RequireParseAllColumns = true 287 r.IgnoreMissingColumns = true 288 var v row 289 assert.NoError(t, r.Read(&v)) 290 expect.EQ(t, v, row{"key1", 0}) 291 assert.NoError(t, r.Read(&v)) 292 expect.EQ(t, v, row{"key2", 0}) 293 assert.EQ(t, r.Read(&v), io.EOF) 294 } 295 296 func TestReadAllowExtraNamedColumns(t *testing.T) { 297 type row struct { 298 ColB int 299 ColA string 300 } 301 r := tsv.NewReader(bytes.NewReader([]byte(`ColA ColB ColC 302 key1 2 22 303 key2 3 33 304 `))) 305 r.HasHeaderRow = true 306 r.UseHeaderNames = true 307 var v row 308 expect.NoError(t, r.Read(&v)) 309 expect.EQ(t, v, row{2, "key1"}) 310 expect.NoError(t, r.Read(&v)) 311 expect.EQ(t, v, row{3, "key2"}) 312 } 313 314 func TestReadParseError(t *testing.T) { 315 type row struct { 316 ColA int `tsv:"cola"` 317 ColB string `tsv:"colb"` 318 } 319 r := tsv.NewReader(bytes.NewReader([]byte(`key1 2 320 `))) 321 var v row 322 expect.Regexp(t, r.Read(&v), `line 1, column 0, 'cola' \(Go field 'ColA'\):`) 323 } 324 325 func TestReadValueError(t *testing.T) { 326 type row struct { 327 ColA string 328 ColB int 329 } 330 r := tsv.NewReader(bytes.NewReader([]byte(`key1 2 331 key2 3 332 `))) 333 var v int 334 expect.Regexp(t, r.Read(&v), `destination must be a pointer to struct, but found \*int`) 335 expect.Regexp(t, r.Read(v), `destination must be a pointer to struct, but found int`) 336 } 337 338 func TestReadMultipleRowTypes(t *testing.T) { 339 r := tsv.NewReader(bytes.NewReader([]byte(`key1 2 340 3 key2 341 `))) 342 { 343 type row struct { 344 ColA string 345 ColB int 346 } 347 var v row 348 assert.NoError(t, r.Read(&v)) 349 expect.EQ(t, v, row{"key1", 2}) 350 } 351 { 352 type row struct { 353 ColA int 354 ColB string 355 } 356 var v row 357 assert.NoError(t, r.Read(&v)) 358 expect.EQ(t, v, row{3, "key2"}) 359 } 360 } 361 362 func ExampleReader() { 363 type row struct { 364 Key string 365 Col0 uint 366 Col1 float64 367 } 368 369 readRow := func(r *tsv.Reader) row { 370 var v row 371 if err := r.Read(&v); err != nil { 372 panic(err) 373 } 374 return v 375 } 376 377 r := tsv.NewReader(bytes.NewReader([]byte(`Key Col0 Col1 378 key0 0 0.5 379 key1 1 1.5 380 `))) 381 r.HasHeaderRow = true 382 r.UseHeaderNames = true 383 fmt.Printf("%+v\n", readRow(r)) 384 fmt.Printf("%+v\n", readRow(r)) 385 386 var v row 387 if err := r.Read(&v); err != io.EOF { 388 panic(err) 389 } 390 // Output: 391 // {Key:key0 Col0:0 Col1:0.5} 392 // {Key:key1 Col0:1 Col1:1.5} 393 } 394 395 func ExampleReader_withTag() { 396 type row struct { 397 ColA string `tsv:"key"` 398 ColB float64 `tsv:"col1"` 399 Skipped int `tsv:"-"` 400 ColC int `tsv:"col0,fmt=d"` 401 Hex int `tsv:",fmt=x"` 402 Hyphen int `tsv:"-,"` 403 } 404 readRow := func(r *tsv.Reader) row { 405 var v row 406 if err := r.Read(&v); err != nil { 407 panic(err) 408 } 409 return v 410 } 411 412 r := tsv.NewReader(bytes.NewReader([]byte(`key col0 col1 Hex - 413 key0 0 0.5 a 1 414 key1 1 1.5 f 2 415 `))) 416 r.HasHeaderRow = true 417 r.UseHeaderNames = true 418 fmt.Printf("%+v\n", readRow(r)) 419 fmt.Printf("%+v\n", readRow(r)) 420 421 var v row 422 if err := r.Read(&v); err != io.EOF { 423 panic(err) 424 } 425 // Output: 426 // {ColA:key0 ColB:0.5 Skipped:0 ColC:0 Hex:10 Hyphen:1} 427 // {ColA:key1 ColB:1.5 Skipped:0 ColC:1 Hex:15 Hyphen:2} 428 } 429 430 func BenchmarkReader(b *testing.B) { 431 b.StopTimer() 432 const nRow = 10000 433 data := bytes.Buffer{} 434 for i := 0; i < nRow; i++ { 435 data.WriteString(fmt.Sprintf("key%d\t%d\t%f\n", i, i, float64(i)+0.5)) 436 } 437 b.StartTimer() 438 439 type row struct { 440 Key string 441 Int int 442 Float float64 443 } 444 for i := 0; i < b.N; i++ { 445 r := tsv.NewReader(bytes.NewReader(data.Bytes())) 446 var ( 447 val row 448 n int 449 ) 450 for { 451 err := r.Read(&val) 452 if err != nil { 453 if err == io.EOF { 454 break 455 } 456 panic(err) 457 } 458 n++ 459 } 460 assert.EQ(b, n, nRow) 461 } 462 }