github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/tsv/reader_test.go (about)

     1  package tsv_test
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io"
     7  	"testing"
     8  
     9  	"github.com/Schaudge/grailbase/tsv"
    10  	"github.com/grailbio/testutil/assert"
    11  	"github.com/grailbio/testutil/expect"
    12  )
    13  
    14  func TestReadBool(t *testing.T) {
    15  	read := func(data string) bool {
    16  		type row struct {
    17  			Col0 bool
    18  		}
    19  		r := tsv.NewReader(bytes.NewReader([]byte("col0\n" + data)))
    20  		r.HasHeaderRow = true
    21  		var v row
    22  		expect.NoError(t, r.Read(&v))
    23  		return v.Col0
    24  	}
    25  
    26  	expect.True(t, read("true"))
    27  	expect.False(t, read("false"))
    28  	expect.True(t, read("Y"))
    29  	expect.True(t, read("yes"))
    30  	expect.False(t, read("N"))
    31  	expect.False(t, read("no"))
    32  }
    33  
    34  func TestReadInt(t *testing.T) {
    35  	newReader := func() *tsv.Reader {
    36  		r := tsv.NewReader(bytes.NewReader([]byte(`col0	col1
    37  0	0.5
    38  `)))
    39  		r.HasHeaderRow = true
    40  		return r
    41  	}
    42  
    43  	{
    44  		type row struct {
    45  			Col0 int8
    46  			Col1 float32
    47  		}
    48  		r := newReader()
    49  		var v row
    50  		expect.NoError(t, r.Read(&v))
    51  		expect.EQ(t, v, row{0, 0.5})
    52  	}
    53  
    54  	{
    55  		type row struct {
    56  			Col0 int16
    57  			Col1 float64
    58  		}
    59  		r := newReader()
    60  		var v row
    61  		expect.NoError(t, r.Read(&v))
    62  		expect.EQ(t, v, row{0, 0.5})
    63  	}
    64  
    65  	{
    66  		type row struct {
    67  			Col0 int32
    68  			Col1 float64
    69  		}
    70  		r := newReader()
    71  		var v row
    72  		expect.NoError(t, r.Read(&v))
    73  		expect.EQ(t, v, row{0, 0.5})
    74  	}
    75  	{
    76  		type row struct {
    77  			Col0 int64
    78  			Col1 float64
    79  		}
    80  		r := newReader()
    81  		var v row
    82  		expect.NoError(t, r.Read(&v))
    83  		expect.EQ(t, v, row{0, 0.5})
    84  	}
    85  	{
    86  		type row struct {
    87  			Col0 int
    88  			Col1 float64
    89  		}
    90  		r := newReader()
    91  		var v row
    92  		expect.NoError(t, r.Read(&v))
    93  		expect.EQ(t, v, row{0, 0.5})
    94  	}
    95  	{
    96  		type row struct {
    97  			Col0 uint8
    98  			Col1 float32
    99  		}
   100  		r := newReader()
   101  		var v row
   102  		expect.NoError(t, r.Read(&v))
   103  		expect.EQ(t, v, row{0, 0.5})
   104  	}
   105  
   106  	{
   107  		type row struct {
   108  			Col0 uint16
   109  			Col1 float64
   110  		}
   111  		r := newReader()
   112  		var v row
   113  		expect.NoError(t, r.Read(&v))
   114  		expect.EQ(t, v, row{0, 0.5})
   115  	}
   116  
   117  	{
   118  		type row struct {
   119  			Col0 uint32
   120  			Col1 float64
   121  		}
   122  		r := newReader()
   123  		var v row
   124  		expect.NoError(t, r.Read(&v))
   125  		expect.EQ(t, v, row{0, 0.5})
   126  	}
   127  }
   128  
   129  func TestReadFmt(t *testing.T) {
   130  	r := tsv.NewReader(bytes.NewReader([]byte(`"""helloworld"""	05.20	true	0a`)))
   131  	type row struct {
   132  		ColA string  `tsv:",fmt=q"`
   133  		ColB float64 `tsv:",fmt=1.2f"`
   134  		ColC bool    `tsv:",fmt=t"`
   135  		ColD int     `tsv:",fmt=x"`
   136  	}
   137  	var v row
   138  	assert.NoError(t, r.Read(&v))
   139  	assert.EQ(t, v, row{`helloworld`, 5.2, true, 10})
   140  }
   141  
   142  func TestReadFmtWithSpace(t *testing.T) {
   143  	r := tsv.NewReader(bytes.NewReader([]byte(`"hello world"`)))
   144  	type row struct {
   145  		ColA string `tsv:",fmt=s"`
   146  	}
   147  	var v row
   148  	expect.Regexp(t, r.Read(&v), "value with fmt option can not have whitespace")
   149  }
   150  
   151  func TestReadWithoutHeader(t *testing.T) {
   152  	type row struct {
   153  		ColA string
   154  		ColB int
   155  	}
   156  	r := tsv.NewReader(bytes.NewReader([]byte(`key1	2
   157  key2	3
   158  `)))
   159  	var v row
   160  	assert.NoError(t, r.Read(&v))
   161  	expect.EQ(t, v, row{"key1", 2})
   162  	assert.NoError(t, r.Read(&v))
   163  	expect.EQ(t, v, row{"key2", 3})
   164  	assert.EQ(t, r.Read(&v), io.EOF)
   165  }
   166  
   167  func TestReadSkipUnexportedFields(t *testing.T) {
   168  	type row struct {
   169  		colA string
   170  		colB int
   171  		ColC int `tsv:"col0"`
   172  	}
   173  	r := tsv.NewReader(bytes.NewReader([]byte(`key	col0	col1
   174  key0	1	0.5
   175  key1	2	1.5
   176  `)))
   177  	r.HasHeaderRow = true
   178  	r.UseHeaderNames = true
   179  	var v row
   180  	assert.NoError(t, r.Read(&v))
   181  	expect.EQ(t, v, row{"", 0, 1})
   182  	assert.NoError(t, r.Read(&v))
   183  	expect.EQ(t, v, row{"", 0, 2})
   184  	assert.EQ(t, r.Read(&v), io.EOF)
   185  }
   186  
   187  func TestReadEmbeddedStruct(t *testing.T) {
   188  	type embedded1 struct {
   189  		Col1 int     `tsv:"col1"`
   190  		Col2 float64 `tsv:"col2_2,fmt=0.3f"`
   191  	}
   192  	type embedded2 struct {
   193  		Col2 float32 `tsv:"col2_1"`
   194  	}
   195  	type row struct {
   196  		Key string `tsv:"key"`
   197  		embedded1
   198  		embedded2
   199  	}
   200  	r := tsv.NewReader(bytes.NewReader([]byte(`key	col2_1	col1	col2_2
   201  key0	0.5	1	0.123
   202  key1	1.5	2	0.789
   203  `)))
   204  	r.HasHeaderRow = true
   205  	r.UseHeaderNames = true
   206  	var v row
   207  	assert.NoError(t, r.Read(&v))
   208  	expect.EQ(t, v, row{"key0", embedded1{1, 0.123}, embedded2{0.5}})
   209  	assert.NoError(t, r.Read(&v))
   210  	expect.EQ(t, v, row{"key1", embedded1{2, 0.789}, embedded2{1.5}})
   211  	assert.EQ(t, r.Read(&v), io.EOF)
   212  }
   213  
   214  func TestReadExtraColumns(t *testing.T) {
   215  	type row struct {
   216  		ColA string
   217  		ColB int
   218  	}
   219  	r := tsv.NewReader(bytes.NewReader([]byte(`key1	2	22
   220  key2	3	33
   221  `)))
   222  	r.RequireParseAllColumns = true
   223  	var v row
   224  	expect.Regexp(t, r.Read(&v), "extra columns found")
   225  }
   226  
   227  func TestReadDisallowExtraNamedColumns(t *testing.T) {
   228  	type row struct {
   229  		ColA string
   230  		ColB int
   231  	}
   232  	r := tsv.NewReader(bytes.NewReader([]byte(`ColA	ColB	ColC
   233  key1	2	22
   234  key2	3	33
   235  `)))
   236  	r.HasHeaderRow = true
   237  	r.UseHeaderNames = true
   238  	r.RequireParseAllColumns = true
   239  	var v row
   240  	expect.Regexp(t, r.Read(&v), "number of columns found")
   241  }
   242  
   243  func TestReadMissingColumns(t *testing.T) {
   244  	type row struct {
   245  		ColA string
   246  		ColB int
   247  	}
   248  	r := tsv.NewReader(bytes.NewReader([]byte(`ColA
   249  key1
   250  key2
   251  `)))
   252  	r.HasHeaderRow = true
   253  	r.UseHeaderNames = true
   254  	r.RequireParseAllColumns = true
   255  	var v row
   256  	expect.Regexp(t, r.Read(&v), "number of columns found")
   257  }
   258  
   259  func TestReadMismatchedColumns(t *testing.T) {
   260  	type row struct {
   261  		ColA string
   262  		ColB int
   263  	}
   264  	r := tsv.NewReader(bytes.NewReader([]byte(`ColA	ColC
   265  key1	2
   266  key2	3
   267  `)))
   268  	r.HasHeaderRow = true
   269  	r.UseHeaderNames = true
   270  	r.RequireParseAllColumns = true
   271  	var v row
   272  	expect.Regexp(t, r.Read(&v), "does not appear in the header")
   273  }
   274  
   275  func TestReadPartialStruct(t *testing.T) {
   276  	type row struct {
   277  		ColA string
   278  		ColB int
   279  	}
   280  	r := tsv.NewReader(bytes.NewReader([]byte(`ColA
   281  key1
   282  key2
   283  `)))
   284  	r.HasHeaderRow = true
   285  	r.UseHeaderNames = true
   286  	r.RequireParseAllColumns = true
   287  	r.IgnoreMissingColumns = true
   288  	var v row
   289  	assert.NoError(t, r.Read(&v))
   290  	expect.EQ(t, v, row{"key1", 0})
   291  	assert.NoError(t, r.Read(&v))
   292  	expect.EQ(t, v, row{"key2", 0})
   293  	assert.EQ(t, r.Read(&v), io.EOF)
   294  }
   295  
   296  func TestReadAllowExtraNamedColumns(t *testing.T) {
   297  	type row struct {
   298  		ColB int
   299  		ColA string
   300  	}
   301  	r := tsv.NewReader(bytes.NewReader([]byte(`ColA	ColB	ColC
   302  key1	2	22
   303  key2	3	33
   304  `)))
   305  	r.HasHeaderRow = true
   306  	r.UseHeaderNames = true
   307  	var v row
   308  	expect.NoError(t, r.Read(&v))
   309  	expect.EQ(t, v, row{2, "key1"})
   310  	expect.NoError(t, r.Read(&v))
   311  	expect.EQ(t, v, row{3, "key2"})
   312  }
   313  
   314  func TestReadParseError(t *testing.T) {
   315  	type row struct {
   316  		ColA int    `tsv:"cola"`
   317  		ColB string `tsv:"colb"`
   318  	}
   319  	r := tsv.NewReader(bytes.NewReader([]byte(`key1	2
   320  `)))
   321  	var v row
   322  	expect.Regexp(t, r.Read(&v), `line 1, column 0, 'cola' \(Go field 'ColA'\):`)
   323  }
   324  
   325  func TestReadValueError(t *testing.T) {
   326  	type row struct {
   327  		ColA string
   328  		ColB int
   329  	}
   330  	r := tsv.NewReader(bytes.NewReader([]byte(`key1	2
   331  key2	3
   332  `)))
   333  	var v int
   334  	expect.Regexp(t, r.Read(&v), `destination must be a pointer to struct, but found \*int`)
   335  	expect.Regexp(t, r.Read(v), `destination must be a pointer to struct, but found int`)
   336  }
   337  
   338  func TestReadMultipleRowTypes(t *testing.T) {
   339  	r := tsv.NewReader(bytes.NewReader([]byte(`key1	2
   340  3	key2
   341  `)))
   342  	{
   343  		type row struct {
   344  			ColA string
   345  			ColB int
   346  		}
   347  		var v row
   348  		assert.NoError(t, r.Read(&v))
   349  		expect.EQ(t, v, row{"key1", 2})
   350  	}
   351  	{
   352  		type row struct {
   353  			ColA int
   354  			ColB string
   355  		}
   356  		var v row
   357  		assert.NoError(t, r.Read(&v))
   358  		expect.EQ(t, v, row{3, "key2"})
   359  	}
   360  }
   361  
   362  func ExampleReader() {
   363  	type row struct {
   364  		Key  string
   365  		Col0 uint
   366  		Col1 float64
   367  	}
   368  
   369  	readRow := func(r *tsv.Reader) row {
   370  		var v row
   371  		if err := r.Read(&v); err != nil {
   372  			panic(err)
   373  		}
   374  		return v
   375  	}
   376  
   377  	r := tsv.NewReader(bytes.NewReader([]byte(`Key	Col0	Col1
   378  key0	0	0.5
   379  key1	1	1.5
   380  `)))
   381  	r.HasHeaderRow = true
   382  	r.UseHeaderNames = true
   383  	fmt.Printf("%+v\n", readRow(r))
   384  	fmt.Printf("%+v\n", readRow(r))
   385  
   386  	var v row
   387  	if err := r.Read(&v); err != io.EOF {
   388  		panic(err)
   389  	}
   390  	// Output:
   391  	// {Key:key0 Col0:0 Col1:0.5}
   392  	// {Key:key1 Col0:1 Col1:1.5}
   393  }
   394  
   395  func ExampleReader_withTag() {
   396  	type row struct {
   397  		ColA    string  `tsv:"key"`
   398  		ColB    float64 `tsv:"col1"`
   399  		Skipped int     `tsv:"-"`
   400  		ColC    int     `tsv:"col0,fmt=d"`
   401  		Hex     int     `tsv:",fmt=x"`
   402  		Hyphen  int     `tsv:"-,"`
   403  	}
   404  	readRow := func(r *tsv.Reader) row {
   405  		var v row
   406  		if err := r.Read(&v); err != nil {
   407  			panic(err)
   408  		}
   409  		return v
   410  	}
   411  
   412  	r := tsv.NewReader(bytes.NewReader([]byte(`key	col0	col1	Hex	-
   413  key0	0	0.5	a	1
   414  key1	1	1.5	f	2
   415  `)))
   416  	r.HasHeaderRow = true
   417  	r.UseHeaderNames = true
   418  	fmt.Printf("%+v\n", readRow(r))
   419  	fmt.Printf("%+v\n", readRow(r))
   420  
   421  	var v row
   422  	if err := r.Read(&v); err != io.EOF {
   423  		panic(err)
   424  	}
   425  	// Output:
   426  	// {ColA:key0 ColB:0.5 Skipped:0 ColC:0 Hex:10 Hyphen:1}
   427  	// {ColA:key1 ColB:1.5 Skipped:0 ColC:1 Hex:15 Hyphen:2}
   428  }
   429  
   430  func BenchmarkReader(b *testing.B) {
   431  	b.StopTimer()
   432  	const nRow = 10000
   433  	data := bytes.Buffer{}
   434  	for i := 0; i < nRow; i++ {
   435  		data.WriteString(fmt.Sprintf("key%d\t%d\t%f\n", i, i, float64(i)+0.5))
   436  	}
   437  	b.StartTimer()
   438  
   439  	type row struct {
   440  		Key   string
   441  		Int   int
   442  		Float float64
   443  	}
   444  	for i := 0; i < b.N; i++ {
   445  		r := tsv.NewReader(bytes.NewReader(data.Bytes()))
   446  		var (
   447  			val row
   448  			n   int
   449  		)
   450  		for {
   451  			err := r.Read(&val)
   452  			if err != nil {
   453  				if err == io.EOF {
   454  					break
   455  				}
   456  				panic(err)
   457  			}
   458  			n++
   459  		}
   460  		assert.EQ(b, n, nRow)
   461  	}
   462  }