github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/samples/go/csv/read_test.go (about)

     1  // Copyright 2016 Attic Labs, Inc. All rights reserved.
     2  // Licensed under the Apache License, version 2.0:
     3  // http://www.apache.org/licenses/LICENSE-2.0
     4  
     5  package csv
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/csv"
    10  	"math"
    11  	"testing"
    12  
    13  	"github.com/attic-labs/noms/go/chunks"
    14  	"github.com/attic-labs/noms/go/datas"
    15  	"github.com/attic-labs/noms/go/types"
    16  	"github.com/stretchr/testify/assert"
    17  )
    18  
    19  var LIMIT = uint64(math.MaxUint64)
    20  
    21  func TestReadToList(t *testing.T) {
    22  	assert := assert.New(t)
    23  	storage := &chunks.MemoryStorage{}
    24  	db := datas.NewDatabase(storage.NewView())
    25  
    26  	dataString := `a,1,true
    27  b,2,false
    28  `
    29  	r := NewCSVReader(bytes.NewBufferString(dataString), ',')
    30  
    31  	headers := []string{"A", "B", "C"}
    32  	kinds := KindSlice{types.StringKind, types.NumberKind, types.BoolKind}
    33  	l := ReadToList(r, "test", headers, kinds, db, LIMIT)
    34  
    35  	assert.Equal(uint64(2), l.Len())
    36  
    37  	assert.True(l.Get(0).(types.Struct).Get("A").Equals(types.String("a")))
    38  	assert.True(l.Get(1).(types.Struct).Get("A").Equals(types.String("b")))
    39  
    40  	assert.True(l.Get(0).(types.Struct).Get("B").Equals(types.Number(1)))
    41  	assert.True(l.Get(1).(types.Struct).Get("B").Equals(types.Number(2)))
    42  
    43  	assert.True(l.Get(0).(types.Struct).Get("C").Equals(types.Bool(true)))
    44  	assert.True(l.Get(1).(types.Struct).Get("C").Equals(types.Bool(false)))
    45  }
    46  
    47  func TestReadToMap(t *testing.T) {
    48  	assert := assert.New(t)
    49  	storage := &chunks.MemoryStorage{}
    50  	db := datas.NewDatabase(storage.NewView())
    51  
    52  	dataString := `a,1,true
    53  b,2,false
    54  `
    55  	r := NewCSVReader(bytes.NewBufferString(dataString), ',')
    56  
    57  	headers := []string{"A", "B", "C"}
    58  	kinds := KindSlice{types.StringKind, types.NumberKind, types.BoolKind}
    59  	m := ReadToMap(r, "test", headers, []string{"0"}, kinds, db, LIMIT)
    60  
    61  	assert.Equal(uint64(2), m.Len())
    62  	assert.True(types.TypeOf(m).Equals(
    63  		types.MakeMapType(types.StringType, types.MakeStructType("test",
    64  			types.StructField{Name: "A", Type: types.StringType, Optional: false},
    65  			types.StructField{Name: "B", Type: types.NumberType, Optional: false},
    66  			types.StructField{Name: "C", Type: types.BoolType, Optional: false},
    67  		))))
    68  
    69  	assert.True(m.Get(types.String("a")).Equals(types.NewStruct("test", types.StructData{
    70  		"A": types.String("a"),
    71  		"B": types.Number(1),
    72  		"C": types.Bool(true),
    73  	})))
    74  	assert.True(m.Get(types.String("b")).Equals(types.NewStruct("test", types.StructData{
    75  		"A": types.String("b"),
    76  		"B": types.Number(2),
    77  		"C": types.Bool(false),
    78  	})))
    79  }
    80  
    81  func testTrailingHelper(t *testing.T, dataString string) {
    82  	assert := assert.New(t)
    83  	storage := &chunks.MemoryStorage{}
    84  	db1 := datas.NewDatabase(storage.NewView())
    85  	defer db1.Close()
    86  
    87  	r := NewCSVReader(bytes.NewBufferString(dataString), ',')
    88  
    89  	headers := []string{"A", "B"}
    90  	kinds := KindSlice{types.StringKind, types.StringKind}
    91  	l := ReadToList(r, "test", headers, kinds, db1, LIMIT)
    92  	assert.Equal(uint64(3), l.Len())
    93  
    94  	storage = &chunks.MemoryStorage{}
    95  	db2 := datas.NewDatabase(storage.NewView())
    96  	defer db2.Close()
    97  	r = NewCSVReader(bytes.NewBufferString(dataString), ',')
    98  	m := ReadToMap(r, "test", headers, []string{"0"}, kinds, db2, LIMIT)
    99  	assert.Equal(uint64(3), m.Len())
   100  }
   101  
   102  func TestReadTrailingHole(t *testing.T) {
   103  	dataString := `a,b,
   104  d,e,
   105  g,h,
   106  `
   107  	testTrailingHelper(t, dataString)
   108  }
   109  
   110  func TestReadTrailingHoles(t *testing.T) {
   111  	dataString := `a,b,,
   112  d,e
   113  g,h
   114  `
   115  	testTrailingHelper(t, dataString)
   116  }
   117  
   118  func TestReadTrailingValues(t *testing.T) {
   119  	dataString := `a,b
   120  d,e,f
   121  g,h,i,j
   122  `
   123  	testTrailingHelper(t, dataString)
   124  }
   125  
   126  func TestEscapeStructFieldFromCSV(t *testing.T) {
   127  	assert := assert.New(t)
   128  	cases := []string{
   129  		"a", "a",
   130  		"1a", "a",
   131  		"AaZz19_", "AaZz19_",
   132  		"Q", "Q",
   133  		"AQ", "AQ",
   134  		"_content", "content",
   135  		"Few ¢ents Short", "fewEntsShort",
   136  		"CAMEL💩case letTerS", "camelcaseLetters",
   137  		"https://picasaweb.google.com/data", "httpspicasawebgooglecomdata",
   138  		"💩", "",
   139  		"11 1💩", "",
   140  		"-- A B", "aB",
   141  		"-- A --", "a",
   142  		"-- A -- B", "aB",
   143  	}
   144  
   145  	for i := 0; i < len(cases); i += 2 {
   146  		orig, expected := cases[i], cases[i+1]
   147  		assert.Equal(expected, EscapeStructFieldFromCSV(orig))
   148  	}
   149  }
   150  
   151  func TestReadParseError(t *testing.T) {
   152  	assert := assert.New(t)
   153  	storage := &chunks.MemoryStorage{}
   154  	db := datas.NewDatabase(storage.NewView())
   155  
   156  	dataString := `a,"b`
   157  	r := NewCSVReader(bytes.NewBufferString(dataString), ',')
   158  
   159  	headers := []string{"A", "B"}
   160  	kinds := KindSlice{types.StringKind, types.StringKind}
   161  	func() {
   162  		defer func() {
   163  			r := recover()
   164  			assert.NotNil(r)
   165  			_, ok := r.(*csv.ParseError)
   166  			assert.True(ok, "Should be a ParseError")
   167  		}()
   168  		ReadToList(r, "test", headers, kinds, db, LIMIT)
   169  	}()
   170  }
   171  
   172  func TestDuplicateHeaderName(t *testing.T) {
   173  	assert := assert.New(t)
   174  	storage := &chunks.MemoryStorage{}
   175  	db := datas.NewDatabase(storage.NewView())
   176  	dataString := "1,2\n3,4\n"
   177  	r := NewCSVReader(bytes.NewBufferString(dataString), ',')
   178  	headers := []string{"A", "A"}
   179  	kinds := KindSlice{types.StringKind, types.StringKind}
   180  	assert.Panics(func() { ReadToList(r, "test", headers, kinds, db, LIMIT) })
   181  }
   182  
   183  func TestEscapeFieldNames(t *testing.T) {
   184  	assert := assert.New(t)
   185  	storage := &chunks.MemoryStorage{}
   186  	db := datas.NewDatabase(storage.NewView())
   187  	dataString := "1,2\n"
   188  	r := NewCSVReader(bytes.NewBufferString(dataString), ',')
   189  	headers := []string{"A A", "B"}
   190  	kinds := KindSlice{types.NumberKind, types.NumberKind}
   191  
   192  	l := ReadToList(r, "test", headers, kinds, db, LIMIT)
   193  	assert.Equal(uint64(1), l.Len())
   194  	assert.Equal(types.Number(1), l.Get(0).(types.Struct).Get(EscapeStructFieldFromCSV("A A")))
   195  
   196  	r = NewCSVReader(bytes.NewBufferString(dataString), ',')
   197  	m := ReadToMap(r, "test", headers, []string{"1"}, kinds, db, LIMIT)
   198  	assert.Equal(uint64(1), l.Len())
   199  	assert.Equal(types.Number(1), m.Get(types.Number(2)).(types.Struct).Get(EscapeStructFieldFromCSV("A A")))
   200  }
   201  
   202  func TestDefaults(t *testing.T) {
   203  	assert := assert.New(t)
   204  	storage := &chunks.MemoryStorage{}
   205  	db := datas.NewDatabase(storage.NewView())
   206  	dataString := "42,,,\n"
   207  	r := NewCSVReader(bytes.NewBufferString(dataString), ',')
   208  	headers := []string{"A", "B", "C", "D"}
   209  	kinds := KindSlice{types.NumberKind, types.NumberKind, types.BoolKind, types.StringKind}
   210  
   211  	l := ReadToList(r, "test", headers, kinds, db, LIMIT)
   212  	assert.Equal(uint64(1), l.Len())
   213  	row := l.Get(0).(types.Struct)
   214  	assert.Equal(types.Number(42), row.Get("A"))
   215  	assert.Equal(types.Number(0), row.Get("B"))
   216  	assert.Equal(types.Bool(false), row.Get("C"))
   217  	assert.Equal(types.String(""), row.Get("D"))
   218  }
   219  
   220  func TestBooleanStrings(t *testing.T) {
   221  	assert := assert.New(t)
   222  	storage := &chunks.MemoryStorage{}
   223  	db := datas.NewDatabase(storage.NewView())
   224  	dataString := "true,false\n1,0\ny,n\nY,N\nY,\n"
   225  	r := NewCSVReader(bytes.NewBufferString(dataString), ',')
   226  	headers := []string{"T", "F"}
   227  	kinds := KindSlice{types.BoolKind, types.BoolKind}
   228  
   229  	l := ReadToList(r, "test", headers, kinds, db, LIMIT)
   230  	assert.Equal(uint64(5), l.Len())
   231  	for i := uint64(0); i < l.Len(); i++ {
   232  		row := l.Get(i).(types.Struct)
   233  		assert.True(types.Bool(true).Equals(row.Get("T")))
   234  		assert.True(types.Bool(false).Equals(row.Get("F")))
   235  	}
   236  }