github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/samples/go/csv/csv-import/importer_test.go (about) 1 // Copyright 2016 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 package main 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "io/ioutil" 12 "os" 13 "testing" 14 15 "github.com/attic-labs/noms/go/d" 16 "github.com/attic-labs/noms/go/datas" 17 "github.com/attic-labs/noms/go/nbs" 18 "github.com/attic-labs/noms/go/spec" 19 "github.com/attic-labs/noms/go/types" 20 "github.com/attic-labs/noms/go/util/clienttest" 21 "github.com/stretchr/testify/suite" 22 ) 23 24 const ( 25 TEST_DATA_SIZE = 100 26 TEST_YEAR = 2012 27 TEST_FIELDS = "Number,String,Number,Number" 28 ) 29 30 func TestCSVImporter(t *testing.T) { 31 suite.Run(t, &testSuite{}) 32 } 33 34 type testSuite struct { 35 clienttest.ClientTestSuite 36 tmpFileName string 37 } 38 39 func (s *testSuite) SetupTest() { 40 input, err := ioutil.TempFile(s.TempDir, "") 41 d.Chk.NoError(err) 42 defer input.Close() 43 s.tmpFileName = input.Name() 44 writeCSV(input) 45 } 46 47 func (s *testSuite) TearDownTest() { 48 os.Remove(s.tmpFileName) 49 } 50 51 func writeCSV(w io.Writer) { 52 writeCSVWithHeader(w, "year,a,b,c\n", 0) 53 } 54 55 func writeCSVWithHeader(w io.Writer, header string, startingValue int) { 56 _, err := io.WriteString(w, header) 57 d.Chk.NoError(err) 58 for i := 0; i < TEST_DATA_SIZE; i++ { 59 j := i + startingValue 60 _, err = io.WriteString(w, fmt.Sprintf("%d,a%d,%d,%d\n", TEST_YEAR+j%3, j, j, j*2)) 61 d.Chk.NoError(err) 62 } 63 } 64 65 func (s *testSuite) validateList(l types.List) { 66 s.Equal(uint64(TEST_DATA_SIZE), l.Len()) 67 68 i := uint64(0) 69 l.IterAll(func(v types.Value, j uint64) { 70 s.Equal(i, j) 71 st := v.(types.Struct) 72 s.Equal(types.Number(TEST_YEAR+i%3), st.Get("year")) 73 s.Equal(types.String(fmt.Sprintf("a%d", i)), st.Get("a")) 74 s.Equal(types.Number(i), st.Get("b")) 75 s.Equal(types.Number(i*2), st.Get("c")) 76 i++ 77 }) 78 } 79 80 func (s *testSuite) validateMap(vrw types.ValueReadWriter, m types.Map) { 81 // --dest-type=map:1 so key is field "a" 82 s.Equal(uint64(TEST_DATA_SIZE), m.Len()) 83 84 for i := 0; i < TEST_DATA_SIZE; i++ { 85 v := m.Get(types.String(fmt.Sprintf("a%d", i))).(types.Struct) 86 s.True(v.Equals( 87 types.NewStruct("Row", types.StructData{ 88 "year": types.Number(TEST_YEAR + i%3), 89 "a": types.String(fmt.Sprintf("a%d", i)), 90 "b": types.Number(i), 91 "c": types.Number(i * 2), 92 }))) 93 } 94 } 95 96 func (s *testSuite) validateNestedMap(vrw types.ValueReadWriter, m types.Map) { 97 // --dest-type=map:0,1 so keys are fields "year", then field "a" 98 s.Equal(uint64(3), m.Len()) 99 100 for i := 0; i < TEST_DATA_SIZE; i++ { 101 n := m.Get(types.Number(TEST_YEAR + i%3)).(types.Map) 102 o := n.Get(types.String(fmt.Sprintf("a%d", i))).(types.Struct) 103 s.True(o.Equals(types.NewStruct("Row", types.StructData{ 104 "year": types.Number(TEST_YEAR + i%3), 105 "a": types.String(fmt.Sprintf("a%d", i)), 106 "b": types.Number(i), 107 "c": types.Number(i * 2), 108 }))) 109 } 110 } 111 112 func (s *testSuite) validateColumnar(vrw types.ValueReadWriter, str types.Struct, reps int) { 113 s.Equal("Columnar", str.Name()) 114 115 lists := map[string]types.List{} 116 for _, nm := range []string{"year", "a", "b", "c"} { 117 l := str.Get(nm).(types.Ref).TargetValue(vrw).(types.List) 118 s.Equal(uint64(reps*TEST_DATA_SIZE), l.Len()) 119 lists[nm] = l 120 } 121 122 for i := 0; i < reps*TEST_DATA_SIZE; i++ { 123 s.Equal(types.Number(TEST_YEAR+i%3), lists["year"].Get(uint64(i))) 124 s.Equal(types.String(fmt.Sprintf("a%d", i)), lists["a"].Get(uint64(i))) 125 s.Equal(types.Number(i), lists["b"].Get(uint64(i))) 126 s.Equal(types.Number(i*2), lists["c"].Get(uint64(i))) 127 } 128 } 129 130 func (s *testSuite) TestCSVImporter() { 131 setName := "csv" 132 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 133 stdout, stderr := s.MustRun(main, []string{"--no-progress", "--column-types", TEST_FIELDS, dataspec, s.tmpFileName}) 134 s.Equal("", stdout) 135 s.Equal("", stderr) 136 137 db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize)) 138 defer os.RemoveAll(s.DBDir) 139 defer db.Close() 140 ds := db.GetDataset(setName) 141 142 s.validateList(ds.HeadValue().(types.List)) 143 } 144 145 func (s *testSuite) TestCSVImporterLowercase() { 146 input, err := ioutil.TempFile(s.TempDir, "") 147 d.Chk.NoError(err) 148 defer input.Close() 149 writeCSVWithHeader(input, "YeAr,a,B,c\n", 0) 150 defer os.Remove(input.Name()) 151 152 setName := "csv" 153 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 154 stdout, stderr := s.MustRun(main, []string{"--no-progress", "--lowercase", "--column-types", TEST_FIELDS, dataspec, input.Name()}) 155 s.Equal("", stdout) 156 s.Equal("", stderr) 157 158 db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize)) 159 defer os.RemoveAll(s.DBDir) 160 defer db.Close() 161 ds := db.GetDataset(setName) 162 163 s.validateList(ds.HeadValue().(types.List)) 164 } 165 166 func (s *testSuite) TestCSVImporterLowercaseDuplicate() { 167 input, err := ioutil.TempFile(s.TempDir, "") 168 d.Chk.NoError(err) 169 defer input.Close() 170 writeCSVWithHeader(input, "YeAr,a,B,year\n", 0) 171 defer os.Remove(input.Name()) 172 173 setName := "csv" 174 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 175 _, stderr, _ := s.Run(main, []string{"--no-progress", "--lowercase", "--column-types", TEST_FIELDS, dataspec, input.Name()}) 176 s.Contains(stderr, "must be unique") 177 } 178 179 func (s *testSuite) TestCSVImporterFromBlob() { 180 test := func(pathFlag string) { 181 defer os.RemoveAll(s.DBDir) 182 183 newDB := func() datas.Database { 184 os.Mkdir(s.DBDir, 0777) 185 cs := nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize) 186 return datas.NewDatabase(cs) 187 } 188 189 db := newDB() 190 rawDS := db.GetDataset("raw") 191 csv := &bytes.Buffer{} 192 writeCSV(csv) 193 db.CommitValue(rawDS, types.NewBlob(db, csv)) 194 db.Close() 195 196 stdout, stderr := s.MustRun(main, []string{ 197 "--no-progress", "--column-types", TEST_FIELDS, 198 pathFlag, spec.CreateValueSpecString("nbs", s.DBDir, "raw.value"), 199 spec.CreateValueSpecString("nbs", s.DBDir, "csv"), 200 }) 201 s.Equal("", stdout) 202 s.Equal("", stderr) 203 204 db = newDB() 205 defer db.Close() 206 csvDS := db.GetDataset("csv") 207 s.validateList(csvDS.HeadValue().(types.List)) 208 } 209 test("--path") 210 test("-p") 211 } 212 213 func (s *testSuite) TestCSVImporterToMap() { 214 setName := "csv" 215 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 216 stdout, stderr := s.MustRun(main, []string{"--no-progress", "--column-types", TEST_FIELDS, "--dest-type", "map:1", dataspec, s.tmpFileName}) 217 s.Equal("", stdout) 218 s.Equal("", stderr) 219 220 db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize)) 221 defer os.RemoveAll(s.DBDir) 222 defer db.Close() 223 ds := db.GetDataset(setName) 224 225 m := ds.HeadValue().(types.Map) 226 s.validateMap(db, m) 227 } 228 229 func (s *testSuite) TestCSVImporterToNestedMap() { 230 setName := "csv" 231 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 232 stdout, stderr := s.MustRun(main, []string{"--no-progress", "--column-types", TEST_FIELDS, "--dest-type", "map:0,1", dataspec, s.tmpFileName}) 233 s.Equal("", stdout) 234 s.Equal("", stderr) 235 236 db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize)) 237 defer os.RemoveAll(s.DBDir) 238 defer db.Close() 239 ds := db.GetDataset(setName) 240 241 m := ds.HeadValue().(types.Map) 242 s.validateNestedMap(db, m) 243 } 244 245 func (s *testSuite) TestCSVImporterToNestedMapByName() { 246 setName := "csv" 247 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 248 stdout, stderr := s.MustRun(main, []string{"--no-progress", "--column-types", TEST_FIELDS, "--dest-type", "map:year,a", dataspec, s.tmpFileName}) 249 s.Equal("", stdout) 250 s.Equal("", stderr) 251 252 db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize)) 253 defer os.RemoveAll(s.DBDir) 254 defer db.Close() 255 ds := db.GetDataset(setName) 256 257 m := ds.HeadValue().(types.Map) 258 s.validateNestedMap(db, m) 259 } 260 261 func (s *testSuite) TestCSVImporterToColumnar() { 262 setName := "csv" 263 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 264 stdout, stderr := s.MustRun(main, []string{"--no-progress", "--invert", "--column-types", TEST_FIELDS, dataspec, s.tmpFileName}) 265 s.Equal("", stdout) 266 s.Equal("", stderr) 267 268 db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize)) 269 defer os.RemoveAll(s.DBDir) 270 defer db.Close() 271 ds := db.GetDataset(setName) 272 273 str := ds.HeadValue().(types.Struct) 274 s.validateColumnar(db, str, 1) 275 } 276 277 func (s *testSuite) TestCSVImporterToColumnarAppend() { 278 setName := "csv" 279 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 280 stdout, stderr := s.MustRun(main, []string{"--no-progress", "--invert", "--column-types", TEST_FIELDS, dataspec, s.tmpFileName}) 281 s.Equal("", stdout) 282 s.Equal("", stderr) 283 284 input, err := ioutil.TempFile(s.TempDir, "") 285 d.Chk.NoError(err) 286 defer input.Close() 287 writeCSVWithHeader(input, "year,a,b,c\n", 100) 288 defer os.Remove(input.Name()) 289 290 stdout, stderr = s.MustRun(main, []string{"--no-progress", "--invert", "--append", "--column-types", TEST_FIELDS, dataspec, input.Name()}) 291 s.Equal("", stdout) 292 s.Equal("", stderr) 293 294 db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize)) 295 defer os.RemoveAll(s.DBDir) 296 defer db.Close() 297 ds := db.GetDataset(setName) 298 299 str := ds.HeadValue().(types.Struct) 300 s.validateColumnar(db, str, 2) 301 } 302 303 func (s *testSuite) TestCSVImporterWithPipe() { 304 input, err := ioutil.TempFile(s.TempDir, "") 305 d.Chk.NoError(err) 306 defer input.Close() 307 defer os.Remove(input.Name()) 308 309 _, err = input.WriteString("a|b\n1|2\n") 310 d.Chk.NoError(err) 311 312 setName := "csv" 313 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 314 stdout, stderr := s.MustRun(main, []string{"--no-progress", "--column-types", "String,Number", "--delimiter", "|", dataspec, input.Name()}) 315 s.Equal("", stdout) 316 s.Equal("", stderr) 317 318 db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize)) 319 defer os.RemoveAll(s.DBDir) 320 defer db.Close() 321 ds := db.GetDataset(setName) 322 323 l := ds.HeadValue().(types.List) 324 s.Equal(uint64(1), l.Len()) 325 v := l.Get(0) 326 st := v.(types.Struct) 327 s.Equal(types.String("1"), st.Get("a")) 328 s.Equal(types.Number(2), st.Get("b")) 329 } 330 331 func (s *testSuite) TestCSVImporterWithExternalHeader() { 332 input, err := ioutil.TempFile(s.TempDir, "") 333 d.Chk.NoError(err) 334 defer input.Close() 335 defer os.Remove(input.Name()) 336 337 _, err = input.WriteString("7,8\n") 338 d.Chk.NoError(err) 339 340 setName := "csv" 341 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 342 stdout, stderr := s.MustRun(main, []string{"--no-progress", "--column-types", "String,Number", "--header", "x,y", dataspec, input.Name()}) 343 s.Equal("", stdout) 344 s.Equal("", stderr) 345 346 db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize)) 347 defer os.RemoveAll(s.DBDir) 348 defer db.Close() 349 ds := db.GetDataset(setName) 350 351 l := ds.HeadValue().(types.List) 352 s.Equal(uint64(1), l.Len()) 353 v := l.Get(0) 354 st := v.(types.Struct) 355 s.Equal(types.String("7"), st.Get("x")) 356 s.Equal(types.Number(8), st.Get("y")) 357 } 358 359 func (s *testSuite) TestCSVImporterWithInvalidExternalHeader() { 360 input, err := ioutil.TempFile(s.TempDir, "") 361 d.Chk.NoError(err) 362 defer input.Close() 363 defer os.Remove(input.Name()) 364 365 _, err = input.WriteString("7#8\n") 366 d.Chk.NoError(err) 367 368 setName := "csv" 369 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 370 stdout, stderr, exitErr := s.Run(main, []string{"--no-progress", "--column-types", "String,Number", "--header", "x,x", dataspec, input.Name()}) 371 s.Equal("", stdout) 372 s.Equal("error: Invalid headers specified, headers must be unique\n", stderr) 373 s.Equal(clienttest.ExitError{Code: 1}, exitErr) 374 } 375 376 func (s *testSuite) TestCSVImporterWithInvalidNumColumnTypeSpec() { 377 input, err := ioutil.TempFile(s.TempDir, "") 378 d.Chk.NoError(err) 379 defer input.Close() 380 defer os.Remove(input.Name()) 381 382 _, err = input.WriteString("7,8\n") 383 d.Chk.NoError(err) 384 385 setName := "csv" 386 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 387 stdout, stderr, exitErr := s.Run(main, []string{"--no-progress", "--column-types", "String", "--header", "x,y", dataspec, input.Name()}) 388 s.Equal("", stdout) 389 s.Equal("error: Invalid column-types specified, column types do not correspond to number of headers\n", stderr) 390 s.Equal(clienttest.ExitError{Code: 1}, exitErr) 391 } 392 393 func (s *testSuite) TestCSVImportSkipRecords() { 394 input, err := ioutil.TempFile(s.TempDir, "") 395 d.Chk.NoError(err) 396 defer input.Close() 397 defer os.Remove(input.Name()) 398 399 _, err = input.WriteString("garbage foo\n") 400 d.Chk.NoError(err) 401 402 _, err = input.WriteString("garbage bar\n") 403 d.Chk.NoError(err) 404 405 _, err = input.WriteString("a,b\n") 406 d.Chk.NoError(err) 407 408 _, err = input.WriteString("7,8\n") 409 d.Chk.NoError(err) 410 411 setName := "csv" 412 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 413 414 stdout, stderr := s.MustRun(main, []string{"--no-progress", "--skip-records", "2", dataspec, input.Name()}) 415 s.Equal("", stdout) 416 s.Equal("", stderr) 417 418 db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize)) 419 defer os.RemoveAll(s.DBDir) 420 defer db.Close() 421 ds := db.GetDataset(setName) 422 423 l := ds.HeadValue().(types.List) 424 s.Equal(uint64(1), l.Len()) 425 v := l.Get(0) 426 st := v.(types.Struct) 427 s.Equal(types.String("7"), st.Get("a")) 428 s.Equal(types.String("8"), st.Get("b")) 429 } 430 431 func (s *testSuite) TestCSVImportSkipRecordsTooMany() { 432 input, err := ioutil.TempFile(s.TempDir, "") 433 d.Chk.NoError(err) 434 defer input.Close() 435 defer os.Remove(input.Name()) 436 437 _, err = input.WriteString("a,b\n") 438 d.Chk.NoError(err) 439 440 setName := "csv" 441 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 442 443 stdout, stderr, recoveredErr := s.Run(main, []string{"--no-progress", "--skip-records", "100", dataspec, input.Name()}) 444 s.Equal("", stdout) 445 s.Equal("error: skip-records skipped past EOF\n", stderr) 446 s.Equal(clienttest.ExitError{Code: 1}, recoveredErr) 447 } 448 449 func (s *testSuite) TestCSVImportSkipRecordsCustomHeader() { 450 input, err := ioutil.TempFile(s.TempDir, "") 451 d.Chk.NoError(err) 452 defer input.Close() 453 defer os.Remove(input.Name()) 454 455 _, err = input.WriteString("a,b\n") 456 d.Chk.NoError(err) 457 458 _, err = input.WriteString("7,8\n") 459 d.Chk.NoError(err) 460 461 setName := "csv" 462 dataspec := spec.CreateValueSpecString("nbs", s.DBDir, setName) 463 stdout, stderr := s.MustRun(main, []string{"--no-progress", "--skip-records", "1", "--header", "x,y", dataspec, input.Name()}) 464 s.Equal("", stdout) 465 s.Equal("", stderr) 466 467 db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize)) 468 defer os.RemoveAll(s.DBDir) 469 defer db.Close() 470 ds := db.GetDataset(setName) 471 472 l := ds.HeadValue().(types.List) 473 s.Equal(uint64(1), l.Len()) 474 v := l.Get(0) 475 st := v.(types.Struct) 476 s.Equal(types.String("7"), st.Get("x")) 477 s.Equal(types.String("8"), st.Get("y")) 478 }