github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/samples/go/csv/csv-import/importer_test.go (about)

     1  // Copyright 2016 Attic Labs, Inc. All rights reserved.
     2  // Licensed under the Apache License, version 2.0:
     3  // http://www.apache.org/licenses/LICENSE-2.0
     4  
     5  package main
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"io/ioutil"
    12  	"os"
    13  	"testing"
    14  
    15  	"github.com/attic-labs/noms/go/d"
    16  	"github.com/attic-labs/noms/go/datas"
    17  	"github.com/attic-labs/noms/go/nbs"
    18  	"github.com/attic-labs/noms/go/spec"
    19  	"github.com/attic-labs/noms/go/types"
    20  	"github.com/attic-labs/noms/go/util/clienttest"
    21  	"github.com/stretchr/testify/suite"
    22  )
    23  
    24  const (
    25  	TEST_DATA_SIZE = 100
    26  	TEST_YEAR      = 2012
    27  	TEST_FIELDS    = "Number,String,Number,Number"
    28  )
    29  
    30  func TestCSVImporter(t *testing.T) {
    31  	suite.Run(t, &testSuite{})
    32  }
    33  
    34  type testSuite struct {
    35  	clienttest.ClientTestSuite
    36  	tmpFileName string
    37  }
    38  
    39  func (s *testSuite) SetupTest() {
    40  	input, err := ioutil.TempFile(s.TempDir, "")
    41  	d.Chk.NoError(err)
    42  	defer input.Close()
    43  	s.tmpFileName = input.Name()
    44  	writeCSV(input)
    45  }
    46  
    47  func (s *testSuite) TearDownTest() {
    48  	os.Remove(s.tmpFileName)
    49  }
    50  
    51  func writeCSV(w io.Writer) {
    52  	writeCSVWithHeader(w, "year,a,b,c\n", 0)
    53  }
    54  
    55  func writeCSVWithHeader(w io.Writer, header string, startingValue int) {
    56  	_, err := io.WriteString(w, header)
    57  	d.Chk.NoError(err)
    58  	for i := 0; i < TEST_DATA_SIZE; i++ {
    59  		j := i + startingValue
    60  		_, err = io.WriteString(w, fmt.Sprintf("%d,a%d,%d,%d\n", TEST_YEAR+j%3, j, j, j*2))
    61  		d.Chk.NoError(err)
    62  	}
    63  }
    64  
    65  func (s *testSuite) validateList(l types.List) {
    66  	s.Equal(uint64(TEST_DATA_SIZE), l.Len())
    67  
    68  	i := uint64(0)
    69  	l.IterAll(func(v types.Value, j uint64) {
    70  		s.Equal(i, j)
    71  		st := v.(types.Struct)
    72  		s.Equal(types.Number(TEST_YEAR+i%3), st.Get("year"))
    73  		s.Equal(types.String(fmt.Sprintf("a%d", i)), st.Get("a"))
    74  		s.Equal(types.Number(i), st.Get("b"))
    75  		s.Equal(types.Number(i*2), st.Get("c"))
    76  		i++
    77  	})
    78  }
    79  
    80  func (s *testSuite) validateMap(vrw types.ValueReadWriter, m types.Map) {
    81  	// --dest-type=map:1 so key is field "a"
    82  	s.Equal(uint64(TEST_DATA_SIZE), m.Len())
    83  
    84  	for i := 0; i < TEST_DATA_SIZE; i++ {
    85  		v := m.Get(types.String(fmt.Sprintf("a%d", i))).(types.Struct)
    86  		s.True(v.Equals(
    87  			types.NewStruct("Row", types.StructData{
    88  				"year": types.Number(TEST_YEAR + i%3),
    89  				"a":    types.String(fmt.Sprintf("a%d", i)),
    90  				"b":    types.Number(i),
    91  				"c":    types.Number(i * 2),
    92  			})))
    93  	}
    94  }
    95  
    96  func (s *testSuite) validateNestedMap(vrw types.ValueReadWriter, m types.Map) {
    97  	// --dest-type=map:0,1 so keys are fields "year", then field "a"
    98  	s.Equal(uint64(3), m.Len())
    99  
   100  	for i := 0; i < TEST_DATA_SIZE; i++ {
   101  		n := m.Get(types.Number(TEST_YEAR + i%3)).(types.Map)
   102  		o := n.Get(types.String(fmt.Sprintf("a%d", i))).(types.Struct)
   103  		s.True(o.Equals(types.NewStruct("Row", types.StructData{
   104  			"year": types.Number(TEST_YEAR + i%3),
   105  			"a":    types.String(fmt.Sprintf("a%d", i)),
   106  			"b":    types.Number(i),
   107  			"c":    types.Number(i * 2),
   108  		})))
   109  	}
   110  }
   111  
   112  func (s *testSuite) validateColumnar(vrw types.ValueReadWriter, str types.Struct, reps int) {
   113  	s.Equal("Columnar", str.Name())
   114  
   115  	lists := map[string]types.List{}
   116  	for _, nm := range []string{"year", "a", "b", "c"} {
   117  		l := str.Get(nm).(types.Ref).TargetValue(vrw).(types.List)
   118  		s.Equal(uint64(reps*TEST_DATA_SIZE), l.Len())
   119  		lists[nm] = l
   120  	}
   121  
   122  	for i := 0; i < reps*TEST_DATA_SIZE; i++ {
   123  		s.Equal(types.Number(TEST_YEAR+i%3), lists["year"].Get(uint64(i)))
   124  		s.Equal(types.String(fmt.Sprintf("a%d", i)), lists["a"].Get(uint64(i)))
   125  		s.Equal(types.Number(i), lists["b"].Get(uint64(i)))
   126  		s.Equal(types.Number(i*2), lists["c"].Get(uint64(i)))
   127  	}
   128  }
   129  
   130  func (s *testSuite) TestCSVImporter() {
   131  	setName := "csv"
   132  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   133  	stdout, stderr := s.MustRun(main, []string{"--no-progress", "--column-types", TEST_FIELDS, dataspec, s.tmpFileName})
   134  	s.Equal("", stdout)
   135  	s.Equal("", stderr)
   136  
   137  	db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize))
   138  	defer os.RemoveAll(s.DBDir)
   139  	defer db.Close()
   140  	ds := db.GetDataset(setName)
   141  
   142  	s.validateList(ds.HeadValue().(types.List))
   143  }
   144  
   145  func (s *testSuite) TestCSVImporterLowercase() {
   146  	input, err := ioutil.TempFile(s.TempDir, "")
   147  	d.Chk.NoError(err)
   148  	defer input.Close()
   149  	writeCSVWithHeader(input, "YeAr,a,B,c\n", 0)
   150  	defer os.Remove(input.Name())
   151  
   152  	setName := "csv"
   153  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   154  	stdout, stderr := s.MustRun(main, []string{"--no-progress", "--lowercase", "--column-types", TEST_FIELDS, dataspec, input.Name()})
   155  	s.Equal("", stdout)
   156  	s.Equal("", stderr)
   157  
   158  	db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize))
   159  	defer os.RemoveAll(s.DBDir)
   160  	defer db.Close()
   161  	ds := db.GetDataset(setName)
   162  
   163  	s.validateList(ds.HeadValue().(types.List))
   164  }
   165  
   166  func (s *testSuite) TestCSVImporterLowercaseDuplicate() {
   167  	input, err := ioutil.TempFile(s.TempDir, "")
   168  	d.Chk.NoError(err)
   169  	defer input.Close()
   170  	writeCSVWithHeader(input, "YeAr,a,B,year\n", 0)
   171  	defer os.Remove(input.Name())
   172  
   173  	setName := "csv"
   174  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   175  	_, stderr, _ := s.Run(main, []string{"--no-progress", "--lowercase", "--column-types", TEST_FIELDS, dataspec, input.Name()})
   176  	s.Contains(stderr, "must be unique")
   177  }
   178  
   179  func (s *testSuite) TestCSVImporterFromBlob() {
   180  	test := func(pathFlag string) {
   181  		defer os.RemoveAll(s.DBDir)
   182  
   183  		newDB := func() datas.Database {
   184  			os.Mkdir(s.DBDir, 0777)
   185  			cs := nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize)
   186  			return datas.NewDatabase(cs)
   187  		}
   188  
   189  		db := newDB()
   190  		rawDS := db.GetDataset("raw")
   191  		csv := &bytes.Buffer{}
   192  		writeCSV(csv)
   193  		db.CommitValue(rawDS, types.NewBlob(db, csv))
   194  		db.Close()
   195  
   196  		stdout, stderr := s.MustRun(main, []string{
   197  			"--no-progress", "--column-types", TEST_FIELDS,
   198  			pathFlag, spec.CreateValueSpecString("nbs", s.DBDir, "raw.value"),
   199  			spec.CreateValueSpecString("nbs", s.DBDir, "csv"),
   200  		})
   201  		s.Equal("", stdout)
   202  		s.Equal("", stderr)
   203  
   204  		db = newDB()
   205  		defer db.Close()
   206  		csvDS := db.GetDataset("csv")
   207  		s.validateList(csvDS.HeadValue().(types.List))
   208  	}
   209  	test("--path")
   210  	test("-p")
   211  }
   212  
   213  func (s *testSuite) TestCSVImporterToMap() {
   214  	setName := "csv"
   215  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   216  	stdout, stderr := s.MustRun(main, []string{"--no-progress", "--column-types", TEST_FIELDS, "--dest-type", "map:1", dataspec, s.tmpFileName})
   217  	s.Equal("", stdout)
   218  	s.Equal("", stderr)
   219  
   220  	db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize))
   221  	defer os.RemoveAll(s.DBDir)
   222  	defer db.Close()
   223  	ds := db.GetDataset(setName)
   224  
   225  	m := ds.HeadValue().(types.Map)
   226  	s.validateMap(db, m)
   227  }
   228  
   229  func (s *testSuite) TestCSVImporterToNestedMap() {
   230  	setName := "csv"
   231  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   232  	stdout, stderr := s.MustRun(main, []string{"--no-progress", "--column-types", TEST_FIELDS, "--dest-type", "map:0,1", dataspec, s.tmpFileName})
   233  	s.Equal("", stdout)
   234  	s.Equal("", stderr)
   235  
   236  	db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize))
   237  	defer os.RemoveAll(s.DBDir)
   238  	defer db.Close()
   239  	ds := db.GetDataset(setName)
   240  
   241  	m := ds.HeadValue().(types.Map)
   242  	s.validateNestedMap(db, m)
   243  }
   244  
   245  func (s *testSuite) TestCSVImporterToNestedMapByName() {
   246  	setName := "csv"
   247  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   248  	stdout, stderr := s.MustRun(main, []string{"--no-progress", "--column-types", TEST_FIELDS, "--dest-type", "map:year,a", dataspec, s.tmpFileName})
   249  	s.Equal("", stdout)
   250  	s.Equal("", stderr)
   251  
   252  	db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize))
   253  	defer os.RemoveAll(s.DBDir)
   254  	defer db.Close()
   255  	ds := db.GetDataset(setName)
   256  
   257  	m := ds.HeadValue().(types.Map)
   258  	s.validateNestedMap(db, m)
   259  }
   260  
   261  func (s *testSuite) TestCSVImporterToColumnar() {
   262  	setName := "csv"
   263  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   264  	stdout, stderr := s.MustRun(main, []string{"--no-progress", "--invert", "--column-types", TEST_FIELDS, dataspec, s.tmpFileName})
   265  	s.Equal("", stdout)
   266  	s.Equal("", stderr)
   267  
   268  	db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize))
   269  	defer os.RemoveAll(s.DBDir)
   270  	defer db.Close()
   271  	ds := db.GetDataset(setName)
   272  
   273  	str := ds.HeadValue().(types.Struct)
   274  	s.validateColumnar(db, str, 1)
   275  }
   276  
   277  func (s *testSuite) TestCSVImporterToColumnarAppend() {
   278  	setName := "csv"
   279  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   280  	stdout, stderr := s.MustRun(main, []string{"--no-progress", "--invert", "--column-types", TEST_FIELDS, dataspec, s.tmpFileName})
   281  	s.Equal("", stdout)
   282  	s.Equal("", stderr)
   283  
   284  	input, err := ioutil.TempFile(s.TempDir, "")
   285  	d.Chk.NoError(err)
   286  	defer input.Close()
   287  	writeCSVWithHeader(input, "year,a,b,c\n", 100)
   288  	defer os.Remove(input.Name())
   289  
   290  	stdout, stderr = s.MustRun(main, []string{"--no-progress", "--invert", "--append", "--column-types", TEST_FIELDS, dataspec, input.Name()})
   291  	s.Equal("", stdout)
   292  	s.Equal("", stderr)
   293  
   294  	db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize))
   295  	defer os.RemoveAll(s.DBDir)
   296  	defer db.Close()
   297  	ds := db.GetDataset(setName)
   298  
   299  	str := ds.HeadValue().(types.Struct)
   300  	s.validateColumnar(db, str, 2)
   301  }
   302  
   303  func (s *testSuite) TestCSVImporterWithPipe() {
   304  	input, err := ioutil.TempFile(s.TempDir, "")
   305  	d.Chk.NoError(err)
   306  	defer input.Close()
   307  	defer os.Remove(input.Name())
   308  
   309  	_, err = input.WriteString("a|b\n1|2\n")
   310  	d.Chk.NoError(err)
   311  
   312  	setName := "csv"
   313  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   314  	stdout, stderr := s.MustRun(main, []string{"--no-progress", "--column-types", "String,Number", "--delimiter", "|", dataspec, input.Name()})
   315  	s.Equal("", stdout)
   316  	s.Equal("", stderr)
   317  
   318  	db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize))
   319  	defer os.RemoveAll(s.DBDir)
   320  	defer db.Close()
   321  	ds := db.GetDataset(setName)
   322  
   323  	l := ds.HeadValue().(types.List)
   324  	s.Equal(uint64(1), l.Len())
   325  	v := l.Get(0)
   326  	st := v.(types.Struct)
   327  	s.Equal(types.String("1"), st.Get("a"))
   328  	s.Equal(types.Number(2), st.Get("b"))
   329  }
   330  
   331  func (s *testSuite) TestCSVImporterWithExternalHeader() {
   332  	input, err := ioutil.TempFile(s.TempDir, "")
   333  	d.Chk.NoError(err)
   334  	defer input.Close()
   335  	defer os.Remove(input.Name())
   336  
   337  	_, err = input.WriteString("7,8\n")
   338  	d.Chk.NoError(err)
   339  
   340  	setName := "csv"
   341  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   342  	stdout, stderr := s.MustRun(main, []string{"--no-progress", "--column-types", "String,Number", "--header", "x,y", dataspec, input.Name()})
   343  	s.Equal("", stdout)
   344  	s.Equal("", stderr)
   345  
   346  	db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize))
   347  	defer os.RemoveAll(s.DBDir)
   348  	defer db.Close()
   349  	ds := db.GetDataset(setName)
   350  
   351  	l := ds.HeadValue().(types.List)
   352  	s.Equal(uint64(1), l.Len())
   353  	v := l.Get(0)
   354  	st := v.(types.Struct)
   355  	s.Equal(types.String("7"), st.Get("x"))
   356  	s.Equal(types.Number(8), st.Get("y"))
   357  }
   358  
   359  func (s *testSuite) TestCSVImporterWithInvalidExternalHeader() {
   360  	input, err := ioutil.TempFile(s.TempDir, "")
   361  	d.Chk.NoError(err)
   362  	defer input.Close()
   363  	defer os.Remove(input.Name())
   364  
   365  	_, err = input.WriteString("7#8\n")
   366  	d.Chk.NoError(err)
   367  
   368  	setName := "csv"
   369  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   370  	stdout, stderr, exitErr := s.Run(main, []string{"--no-progress", "--column-types", "String,Number", "--header", "x,x", dataspec, input.Name()})
   371  	s.Equal("", stdout)
   372  	s.Equal("error: Invalid headers specified, headers must be unique\n", stderr)
   373  	s.Equal(clienttest.ExitError{Code: 1}, exitErr)
   374  }
   375  
   376  func (s *testSuite) TestCSVImporterWithInvalidNumColumnTypeSpec() {
   377  	input, err := ioutil.TempFile(s.TempDir, "")
   378  	d.Chk.NoError(err)
   379  	defer input.Close()
   380  	defer os.Remove(input.Name())
   381  
   382  	_, err = input.WriteString("7,8\n")
   383  	d.Chk.NoError(err)
   384  
   385  	setName := "csv"
   386  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   387  	stdout, stderr, exitErr := s.Run(main, []string{"--no-progress", "--column-types", "String", "--header", "x,y", dataspec, input.Name()})
   388  	s.Equal("", stdout)
   389  	s.Equal("error: Invalid column-types specified, column types do not correspond to number of headers\n", stderr)
   390  	s.Equal(clienttest.ExitError{Code: 1}, exitErr)
   391  }
   392  
   393  func (s *testSuite) TestCSVImportSkipRecords() {
   394  	input, err := ioutil.TempFile(s.TempDir, "")
   395  	d.Chk.NoError(err)
   396  	defer input.Close()
   397  	defer os.Remove(input.Name())
   398  
   399  	_, err = input.WriteString("garbage foo\n")
   400  	d.Chk.NoError(err)
   401  
   402  	_, err = input.WriteString("garbage bar\n")
   403  	d.Chk.NoError(err)
   404  
   405  	_, err = input.WriteString("a,b\n")
   406  	d.Chk.NoError(err)
   407  
   408  	_, err = input.WriteString("7,8\n")
   409  	d.Chk.NoError(err)
   410  
   411  	setName := "csv"
   412  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   413  
   414  	stdout, stderr := s.MustRun(main, []string{"--no-progress", "--skip-records", "2", dataspec, input.Name()})
   415  	s.Equal("", stdout)
   416  	s.Equal("", stderr)
   417  
   418  	db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize))
   419  	defer os.RemoveAll(s.DBDir)
   420  	defer db.Close()
   421  	ds := db.GetDataset(setName)
   422  
   423  	l := ds.HeadValue().(types.List)
   424  	s.Equal(uint64(1), l.Len())
   425  	v := l.Get(0)
   426  	st := v.(types.Struct)
   427  	s.Equal(types.String("7"), st.Get("a"))
   428  	s.Equal(types.String("8"), st.Get("b"))
   429  }
   430  
   431  func (s *testSuite) TestCSVImportSkipRecordsTooMany() {
   432  	input, err := ioutil.TempFile(s.TempDir, "")
   433  	d.Chk.NoError(err)
   434  	defer input.Close()
   435  	defer os.Remove(input.Name())
   436  
   437  	_, err = input.WriteString("a,b\n")
   438  	d.Chk.NoError(err)
   439  
   440  	setName := "csv"
   441  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   442  
   443  	stdout, stderr, recoveredErr := s.Run(main, []string{"--no-progress", "--skip-records", "100", dataspec, input.Name()})
   444  	s.Equal("", stdout)
   445  	s.Equal("error: skip-records skipped past EOF\n", stderr)
   446  	s.Equal(clienttest.ExitError{Code: 1}, recoveredErr)
   447  }
   448  
   449  func (s *testSuite) TestCSVImportSkipRecordsCustomHeader() {
   450  	input, err := ioutil.TempFile(s.TempDir, "")
   451  	d.Chk.NoError(err)
   452  	defer input.Close()
   453  	defer os.Remove(input.Name())
   454  
   455  	_, err = input.WriteString("a,b\n")
   456  	d.Chk.NoError(err)
   457  
   458  	_, err = input.WriteString("7,8\n")
   459  	d.Chk.NoError(err)
   460  
   461  	setName := "csv"
   462  	dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName)
   463  	stdout, stderr := s.MustRun(main, []string{"--no-progress", "--skip-records", "1", "--header", "x,y", dataspec, input.Name()})
   464  	s.Equal("", stdout)
   465  	s.Equal("", stderr)
   466  
   467  	db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize))
   468  	defer os.RemoveAll(s.DBDir)
   469  	defer db.Close()
   470  	ds := db.GetDataset(setName)
   471  
   472  	l := ds.HeadValue().(types.List)
   473  	s.Equal(uint64(1), l.Len())
   474  	v := l.Get(0)
   475  	st := v.(types.Struct)
   476  	s.Equal(types.String("7"), st.Get("x"))
   477  	s.Equal(types.String("8"), st.Get("y"))
   478  }