github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/samples/go/csv/csv-invert/main.go (about) 1 // Copyright 2017 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 package main 6 7 import ( 8 "fmt" 9 "os" 10 "strings" 11 12 "github.com/attic-labs/kingpin" 13 14 "github.com/attic-labs/noms/go/config" 15 "github.com/attic-labs/noms/go/d" 16 "github.com/attic-labs/noms/go/datas" 17 "github.com/attic-labs/noms/go/types" 18 "github.com/attic-labs/noms/go/util/profile" 19 ) 20 21 func main() { 22 app := kingpin.New("csv-invert", "") 23 input := app.Arg("input-dataset", "dataset to invert").Required().String() 24 output := app.Arg("output-dataset", "dataset to write to").Required().String() 25 26 profile.RegisterProfileFlags(app) 27 kingpin.MustParse(app.Parse(os.Args[1:])) 28 29 cfg := config.NewResolver() 30 inDB, inDS, err := cfg.GetDataset(*input) 31 d.CheckError(err) 32 defer inDB.Close() 33 34 head, present := inDS.MaybeHead() 35 if !present { 36 d.CheckErrorNoUsage(fmt.Errorf("The dataset %s has no head", *input)) 37 } 38 v := head.Get(datas.ValueField) 39 l, isList := v.(types.List) 40 if !isList { 41 d.CheckErrorNoUsage(fmt.Errorf("The head value of %s is not a list, but rather %s", *input, types.TypeOf(v).Describe())) 42 } 43 44 outDB, outDS, err := cfg.GetDataset(*output) 45 defer outDB.Close() 46 47 // I don't want to allocate a new types.Value every time someone calls zeroVal(), so instead have a map of canned Values to reference. 48 zeroVals := map[types.NomsKind]types.Value{ 49 types.BoolKind: types.Bool(false), 50 types.NumberKind: types.Number(0), 51 types.StringKind: types.String(""), 52 } 53 54 zeroVal := func(t *types.Type) types.Value { 55 v, present := zeroVals[t.TargetKind()] 56 if !present { 57 d.CheckErrorNoUsage(fmt.Errorf("csv-invert doesn't support values of type %s", t.Describe())) 58 } 59 return v 60 } 61 62 defer profile.MaybeStartProfile().Stop() 63 type stream struct { 64 ch chan types.Value 65 zeroVal types.Value 66 } 67 streams := map[string]*stream{} 68 lists := map[string]<-chan types.List{} 69 lowers := map[string]string{} 70 71 sDesc := types.TypeOf(l).Desc.(types.CompoundDesc).ElemTypes[0].Desc.(types.StructDesc) 72 sDesc.IterFields(func(name string, t *types.Type, optional bool) { 73 lowerName := strings.ToLower(name) 74 if _, present := streams[lowerName]; !present { 75 s := &stream{make(chan types.Value, 1024), zeroVal(t)} 76 streams[lowerName] = s 77 lists[lowerName] = types.NewStreamingList(outDB, s.ch) 78 } 79 lowers[name] = lowerName 80 }) 81 82 filledCols := make(map[string]struct{}, len(streams)) 83 l.IterAll(func(v types.Value, index uint64) { 84 // First, iterate the fields that are present in |v| and append values to the correct lists 85 v.(types.Struct).IterFields(func(name string, value types.Value) bool { 86 ln := lowers[name] 87 filledCols[ln] = struct{}{} 88 streams[ln].ch <- value 89 90 return false 91 }) 92 // Second, iterate all the streams, skipping the ones we already sent a value for, and send an empty String for the remaining ones. 93 for lowerName, stream := range streams { 94 if _, present := filledCols[lowerName]; present { 95 delete(filledCols, lowerName) 96 continue 97 } 98 stream.ch <- stream.zeroVal 99 } 100 }) 101 102 invertedStructData := types.StructData{} 103 for lowerName, stream := range streams { 104 close(stream.ch) 105 invertedStructData[lowerName] = <-lists[lowerName] 106 } 107 str := types.NewStruct("Columnar", invertedStructData) 108 109 parents := types.NewSet(outDB) 110 if headRef, present := outDS.MaybeHeadRef(); present { 111 parents = types.NewSet(outDB, headRef) 112 } 113 114 _, err = outDB.Commit(outDS, str, datas.CommitOptions{Parents: parents, Meta: head.Get(datas.MetaField).(types.Struct)}) 115 d.PanicIfError(err) 116 }