github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/samples/go/csv/csv-invert/main.go (about)

     1  // Copyright 2017 Attic Labs, Inc. All rights reserved.
     2  // Licensed under the Apache License, version 2.0:
     3  // http://www.apache.org/licenses/LICENSE-2.0
     4  
     5  package main
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"strings"
    11  
    12  	"github.com/attic-labs/kingpin"
    13  
    14  	"github.com/attic-labs/noms/go/config"
    15  	"github.com/attic-labs/noms/go/d"
    16  	"github.com/attic-labs/noms/go/datas"
    17  	"github.com/attic-labs/noms/go/types"
    18  	"github.com/attic-labs/noms/go/util/profile"
    19  )
    20  
    21  func main() {
    22  	app := kingpin.New("csv-invert", "")
    23  	input := app.Arg("input-dataset", "dataset to invert").Required().String()
    24  	output := app.Arg("output-dataset", "dataset to write to").Required().String()
    25  
    26  	profile.RegisterProfileFlags(app)
    27  	kingpin.MustParse(app.Parse(os.Args[1:]))
    28  
    29  	cfg := config.NewResolver()
    30  	inDB, inDS, err := cfg.GetDataset(*input)
    31  	d.CheckError(err)
    32  	defer inDB.Close()
    33  
    34  	head, present := inDS.MaybeHead()
    35  	if !present {
    36  		d.CheckErrorNoUsage(fmt.Errorf("The dataset %s has no head", *input))
    37  	}
    38  	v := head.Get(datas.ValueField)
    39  	l, isList := v.(types.List)
    40  	if !isList {
    41  		d.CheckErrorNoUsage(fmt.Errorf("The head value of %s is not a list, but rather %s", *input, types.TypeOf(v).Describe()))
    42  	}
    43  
    44  	outDB, outDS, err := cfg.GetDataset(*output)
    45  	defer outDB.Close()
    46  
    47  	// I don't want to allocate a new types.Value every time someone calls zeroVal(), so instead have a map of canned Values to reference.
    48  	zeroVals := map[types.NomsKind]types.Value{
    49  		types.BoolKind:   types.Bool(false),
    50  		types.NumberKind: types.Number(0),
    51  		types.StringKind: types.String(""),
    52  	}
    53  
    54  	zeroVal := func(t *types.Type) types.Value {
    55  		v, present := zeroVals[t.TargetKind()]
    56  		if !present {
    57  			d.CheckErrorNoUsage(fmt.Errorf("csv-invert doesn't support values of type %s", t.Describe()))
    58  		}
    59  		return v
    60  	}
    61  
    62  	defer profile.MaybeStartProfile().Stop()
    63  	type stream struct {
    64  		ch      chan types.Value
    65  		zeroVal types.Value
    66  	}
    67  	streams := map[string]*stream{}
    68  	lists := map[string]<-chan types.List{}
    69  	lowers := map[string]string{}
    70  
    71  	sDesc := types.TypeOf(l).Desc.(types.CompoundDesc).ElemTypes[0].Desc.(types.StructDesc)
    72  	sDesc.IterFields(func(name string, t *types.Type, optional bool) {
    73  		lowerName := strings.ToLower(name)
    74  		if _, present := streams[lowerName]; !present {
    75  			s := &stream{make(chan types.Value, 1024), zeroVal(t)}
    76  			streams[lowerName] = s
    77  			lists[lowerName] = types.NewStreamingList(outDB, s.ch)
    78  		}
    79  		lowers[name] = lowerName
    80  	})
    81  
    82  	filledCols := make(map[string]struct{}, len(streams))
    83  	l.IterAll(func(v types.Value, index uint64) {
    84  		// First, iterate the fields that are present in |v| and append values to the correct lists
    85  		v.(types.Struct).IterFields(func(name string, value types.Value) bool {
    86  			ln := lowers[name]
    87  			filledCols[ln] = struct{}{}
    88  			streams[ln].ch <- value
    89  
    90  			return false
    91  		})
    92  		// Second, iterate all the streams, skipping the ones we already sent a value for, and send an empty String for the remaining ones.
    93  		for lowerName, stream := range streams {
    94  			if _, present := filledCols[lowerName]; present {
    95  				delete(filledCols, lowerName)
    96  				continue
    97  			}
    98  			stream.ch <- stream.zeroVal
    99  		}
   100  	})
   101  
   102  	invertedStructData := types.StructData{}
   103  	for lowerName, stream := range streams {
   104  		close(stream.ch)
   105  		invertedStructData[lowerName] = <-lists[lowerName]
   106  	}
   107  	str := types.NewStruct("Columnar", invertedStructData)
   108  
   109  	parents := types.NewSet(outDB)
   110  	if headRef, present := outDS.MaybeHeadRef(); present {
   111  		parents = types.NewSet(outDB, headRef)
   112  	}
   113  
   114  	_, err = outDB.Commit(outDS, str, datas.CommitOptions{Parents: parents, Meta: head.Get(datas.MetaField).(types.Struct)})
   115  	d.PanicIfError(err)
   116  }