github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/samples/go/csv/csv-analyze/analyze.go (about)

     1  // Copyright 2016 Attic Labs, Inc. All rights reserved.
     2  // Licensed under the Apache License, version 2.0:
     3  // http://www.apache.org/licenses/LICENSE-2.0
     4  
     5  package main
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"strings"
    11  
    12  	"github.com/attic-labs/kingpin"
    13  
    14  	"github.com/attic-labs/noms/go/d"
    15  	"github.com/attic-labs/noms/go/types"
    16  	"github.com/attic-labs/noms/go/util/profile"
    17  	"github.com/attic-labs/noms/samples/go/csv"
    18  )
    19  
    20  func main() {
    21  	app := kingpin.New("csv-analyze", "")
    22  	// Actually the delimiter uses runes, which can be multiple characters long.
    23  	// https://blog.golang.org/strings
    24  	delimiter := app.Flag("delimiter", "field delimiter for csv file, must be exactly one character long.").String()
    25  	header := app.Flag("header", "header row. If empty, we'll use the first row of the file").String()
    26  	skipRecords := app.Flag("skip-records", "number of records to skip at beginning of file").Uint()
    27  	detectColumnTypes := app.Flag("detect-column-types", "detect column types by analyzing a portion of csv file").Bool()
    28  	detectPrimaryKeys := app.Flag("detect-pk", "detect primary key candidates by analyzing a portion of csv file").Bool()
    29  	numSamples := app.Flag("num-samples", "number of records to use for samples").Default("1000000").Int()
    30  	numFieldsInPK := app.Flag("num-fields-pk", "maximum number of columns to consider when detecting PKs").Default("3").Int()
    31  	r := app.Arg("filepath", "csv file to analyze").Required().File()
    32  
    33  	profile.RegisterProfileFlags(app)
    34  
    35  	kingpin.MustParse(app.Parse(os.Args[1:]))
    36  
    37  	defer profile.MaybeStartProfile().Stop()
    38  	defer (*r).Close()
    39  
    40  	comma, err := csv.StringToRune(*delimiter)
    41  	d.CheckError(err)
    42  
    43  	cr := csv.NewCSVReader(*r, comma)
    44  	csv.SkipRecords(cr, *skipRecords)
    45  
    46  	var headers []string
    47  	if *header == "" {
    48  		headers, err = cr.Read()
    49  		d.PanicIfError(err)
    50  	} else {
    51  		headers = strings.Split(*header, string(comma))
    52  	}
    53  
    54  	kinds := []types.NomsKind{}
    55  	if *detectColumnTypes {
    56  		kinds = csv.GetSchema(cr, *numSamples, len(headers))
    57  		fmt.Fprintf(os.Stdout, "%s\n", strings.Join(csv.KindsToStrings(kinds), ","))
    58  	}
    59  
    60  	if *detectPrimaryKeys {
    61  		pks := csv.FindPrimaryKeys(cr, *numSamples, *numFieldsInPK, len(headers))
    62  		for _, pk := range pks {
    63  			fmt.Fprintf(os.Stdout, "%s\n", strings.Join(csv.GetFieldNamesFromIndices(headers, pk), ","))
    64  		}
    65  	}
    66  }