github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/samples/go/csv/csv-analyze/analyze.go (about) 1 // Copyright 2016 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 package main 6 7 import ( 8 "fmt" 9 "os" 10 "strings" 11 12 "github.com/attic-labs/kingpin" 13 14 "github.com/attic-labs/noms/go/d" 15 "github.com/attic-labs/noms/go/types" 16 "github.com/attic-labs/noms/go/util/profile" 17 "github.com/attic-labs/noms/samples/go/csv" 18 ) 19 20 func main() { 21 app := kingpin.New("csv-analyze", "") 22 // Actually the delimiter uses runes, which can be multiple characters long. 23 // https://blog.golang.org/strings 24 delimiter := app.Flag("delimiter", "field delimiter for csv file, must be exactly one character long.").String() 25 header := app.Flag("header", "header row. If empty, we'll use the first row of the file").String() 26 skipRecords := app.Flag("skip-records", "number of records to skip at beginning of file").Uint() 27 detectColumnTypes := app.Flag("detect-column-types", "detect column types by analyzing a portion of csv file").Bool() 28 detectPrimaryKeys := app.Flag("detect-pk", "detect primary key candidates by analyzing a portion of csv file").Bool() 29 numSamples := app.Flag("num-samples", "number of records to use for samples").Default("1000000").Int() 30 numFieldsInPK := app.Flag("num-fields-pk", "maximum number of columns to consider when detecting PKs").Default("3").Int() 31 r := app.Arg("filepath", "csv file to analyze").Required().File() 32 33 profile.RegisterProfileFlags(app) 34 35 kingpin.MustParse(app.Parse(os.Args[1:])) 36 37 defer profile.MaybeStartProfile().Stop() 38 defer (*r).Close() 39 40 comma, err := csv.StringToRune(*delimiter) 41 d.CheckError(err) 42 43 cr := csv.NewCSVReader(*r, comma) 44 csv.SkipRecords(cr, *skipRecords) 45 46 var headers []string 47 if *header == "" { 48 headers, err = cr.Read() 49 d.PanicIfError(err) 50 } else { 51 headers = strings.Split(*header, string(comma)) 52 } 53 54 kinds := []types.NomsKind{} 55 if *detectColumnTypes { 56 kinds = csv.GetSchema(cr, *numSamples, len(headers)) 57 fmt.Fprintf(os.Stdout, "%s\n", strings.Join(csv.KindsToStrings(kinds), ",")) 58 } 59 60 if *detectPrimaryKeys { 61 pks := csv.FindPrimaryKeys(cr, *numSamples, *numFieldsInPK, len(headers)) 62 for _, pk := range pks { 63 fmt.Fprintf(os.Stdout, "%s\n", strings.Join(csv.GetFieldNamesFromIndices(headers, pk), ",")) 64 } 65 } 66 }