github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/samples/go/csv/csv-import/importer.go (about) 1 // Copyright 2016 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 package main 6 7 import ( 8 "errors" 9 "fmt" 10 "io" 11 "math" 12 "os" 13 "strings" 14 "time" 15 16 "github.com/attic-labs/kingpin" 17 humanize "github.com/dustin/go-humanize" 18 19 "github.com/attic-labs/noms/go/config" 20 "github.com/attic-labs/noms/go/d" 21 "github.com/attic-labs/noms/go/datas" 22 "github.com/attic-labs/noms/go/spec" 23 "github.com/attic-labs/noms/go/types" 24 "github.com/attic-labs/noms/go/util/profile" 25 "github.com/attic-labs/noms/go/util/progressreader" 26 "github.com/attic-labs/noms/go/util/status" 27 "github.com/attic-labs/noms/go/util/verbose" 28 "github.com/attic-labs/noms/samples/go/csv" 29 ) 30 31 const ( 32 destList = iota 33 destMap = iota 34 ) 35 36 func main() { 37 app := kingpin.New("csv-importer", "") 38 39 // Actually the delimiter uses runes, which can be multiple characters long. 40 // https://blog.golang.org/strings 41 delimiter := app.Flag("delimiter", "field delimiter for csv file, must be exactly one character long.").Default(",").String() 42 header := app.Flag("header", "header row. If empdataaty, we'll use the first row of the file").String() 43 lowercase := app.Flag("lowercase", "convert column names to lowercase (otherwise preserve the case in the resulting struct fields)").Bool() 44 name := app.Flag("name", "struct name. The user-visible name to give to the struct type that will hold each row of data.").Default("Row").String() 45 columnTypes := app.Flag("column-types", "a comma-separated list of types representing the desired type of each column. if absent all types default to be String").String() 46 path := app.Flag("path", "noms path to blob to import").Short('p').String() 47 noProgress := app.Flag("no-progress", "prevents progress from being output if true").Bool() 48 destType := app.Flag("dest-type", "the destination type to import to. can be 'list' or 'map:<pk>', where <pk> is a list of comma-delimited column headers or indexes (0-based) used to uniquely identify a row").Default("list").String() 49 skipRecords := app.Flag("skip-records", "number of records to skip at beginning of file").Uint() 50 limit := app.Flag("limit-records", "maximum number of records to process").Default(fmt.Sprintf("%d", math.MaxUint32)).Uint64() 51 performCommit := app.Flag("commit", "commit the data to head of the dataset (otherwise only write the data to the dataset)").Default("true").Bool() 52 appendFlag := app.Flag("append", "append new data to list at head of specified dataset.").Bool() 53 invert := app.Flag("invert", "import rows in column major format rather than row major").Bool() 54 dataset := app.Arg("dataset", "datset to write to").Required().String() 55 csvFile := app.Arg("csvfile", "csv file to import").String() 56 57 verbose.RegisterVerboseFlags(app) 58 profile.RegisterProfileFlags(app) 59 60 kingpin.MustParse(app.Parse(os.Args[1:])) 61 62 var err error 63 switch { 64 case *csvFile == "" && *path == "": 65 err = errors.New("Either csvfile or path is required") 66 case *csvFile != "" && *path != "": 67 err = errors.New("Cannot specify both csvfile and path") 68 case strings.HasPrefix(*destType, "map") && *appendFlag: 69 err = errors.New("--append is only compatible with list imports") 70 case strings.HasPrefix(*destType, "map") && *invert: 71 err = errors.New("--invert is only compatible with list imports") 72 } 73 d.CheckError(err) 74 75 defer profile.MaybeStartProfile().Stop() 76 77 var r io.Reader 78 var size uint64 79 80 cfg := config.NewResolver() 81 if *path != "" { 82 db, val, err := cfg.GetPath(*path) 83 d.CheckError(err) 84 if val == nil { 85 d.CheckError(fmt.Errorf("Path %s not found\n", *path)) 86 } 87 blob, ok := val.(types.Blob) 88 if !ok { 89 d.CheckError(fmt.Errorf("Path %s not a Blob: %s\n", *path, types.EncodedValue(types.TypeOf(val)))) 90 } 91 defer db.Close() 92 preader, pwriter := io.Pipe() 93 go func() { 94 blob.Copy(pwriter) 95 pwriter.Close() 96 }() 97 r = preader 98 size = blob.Len() 99 } else { 100 res, err := os.Open(*csvFile) 101 d.CheckError(err) 102 defer res.Close() 103 fi, err := res.Stat() 104 d.CheckError(err) 105 r = res 106 size = uint64(fi.Size()) 107 } 108 109 if !*noProgress { 110 r = progressreader.New(r, getStatusPrinter(size)) 111 } 112 113 delim, err := csv.StringToRune(*delimiter) 114 d.CheckErrorNoUsage(err) 115 116 var dest int 117 var strPks []string 118 if *destType == "list" { 119 dest = destList 120 } else if strings.HasPrefix(*destType, "map:") { 121 dest = destMap 122 strPks = strings.Split(strings.TrimPrefix(*destType, "map:"), ",") 123 if len(strPks) == 0 { 124 fmt.Println("Invalid dest-type map: ", *destType) 125 return 126 } 127 } else { 128 fmt.Println("Invalid dest-type: ", *destType) 129 return 130 } 131 132 cr := csv.NewCSVReader(r, delim) 133 err = csv.SkipRecords(cr, *skipRecords) 134 135 if err == io.EOF { 136 err = fmt.Errorf("skip-records skipped past EOF") 137 } 138 d.CheckErrorNoUsage(err) 139 140 var headers []string 141 if *header == "" { 142 headers, err = cr.Read() 143 d.PanicIfError(err) 144 } else { 145 headers = strings.Split(*header, ",") 146 } 147 if *lowercase { 148 for i, _ := range headers { 149 headers[i] = strings.ToLower(headers[i]) 150 } 151 } 152 153 uniqueHeaders := make(map[string]bool) 154 for _, header := range headers { 155 uniqueHeaders[header] = true 156 } 157 if len(uniqueHeaders) != len(headers) { 158 d.CheckErrorNoUsage(fmt.Errorf("Invalid headers specified, headers must be unique")) 159 } 160 161 kinds := []types.NomsKind{} 162 if *columnTypes != "" { 163 kinds = csv.StringsToKinds(strings.Split(*columnTypes, ",")) 164 if len(kinds) != len(uniqueHeaders) { 165 d.CheckErrorNoUsage(fmt.Errorf("Invalid column-types specified, column types do not correspond to number of headers")) 166 } 167 } 168 169 db, ds, err := cfg.GetDataset(*dataset) 170 d.CheckError(err) 171 defer db.Close() 172 173 var value types.Value 174 if dest == destMap { 175 value = csv.ReadToMap(cr, *name, headers, strPks, kinds, db, *limit) 176 } else if *invert { 177 value = csv.ReadToColumnar(cr, *name, headers, kinds, db, *limit) 178 } else { 179 value = csv.ReadToList(cr, *name, headers, kinds, db, *limit) 180 } 181 182 if *performCommit { 183 meta, err := spec.CreateCommitMetaStruct(ds.Database(), "", "", additionalMetaInfo(*csvFile, *path), nil) 184 d.CheckErrorNoUsage(err) 185 if *appendFlag { 186 if headVal, present := ds.MaybeHeadValue(); present { 187 switch headVal.Kind() { 188 case types.ListKind: 189 l, isList := headVal.(types.List) 190 d.PanicIfFalse(isList) 191 ref := db.WriteValue(value) 192 value = l.Concat(ref.TargetValue(db).(types.List)) 193 case types.StructKind: 194 hstr, isStruct := headVal.(types.Struct) 195 d.PanicIfFalse(isStruct) 196 d.PanicIfFalse(hstr.Name() == "Columnar") 197 str := value.(types.Struct) 198 hstr.IterFields(func(fieldname string, v types.Value) bool { 199 hl := v.(types.Ref).TargetValue(db).(types.List) 200 nl := str.Get(fieldname).(types.Ref).TargetValue(db).(types.List) 201 l := hl.Concat(nl) 202 r := db.WriteValue(l) 203 str = str.Set(fieldname, r) 204 205 return false 206 }) 207 value = str 208 default: 209 d.Panic("append can only be used with list or columnar") 210 } 211 } 212 } 213 _, err = db.Commit(ds, value, datas.CommitOptions{Meta: meta}) 214 if !*noProgress { 215 status.Clear() 216 } 217 d.PanicIfError(err) 218 } else { 219 ref := db.WriteValue(value) 220 if !*noProgress { 221 status.Clear() 222 } 223 fmt.Fprintf(os.Stdout, "#%s\n", ref.TargetHash().String()) 224 } 225 } 226 227 func additionalMetaInfo(filePath, nomsPath string) map[string]string { 228 fileOrNomsPath := "inputPath" 229 path := nomsPath 230 if path == "" { 231 path = filePath 232 fileOrNomsPath = "inputFile" 233 } 234 return map[string]string{fileOrNomsPath: path} 235 } 236 237 func getStatusPrinter(expected uint64) progressreader.Callback { 238 startTime := time.Now() 239 return func(seen uint64) { 240 percent := float64(seen) / float64(expected) * 100 241 elapsed := time.Since(startTime) 242 rate := float64(seen) / elapsed.Seconds() 243 244 status.Printf("%.2f%% of %s (%s/s)...", 245 percent, 246 humanize.Bytes(expected), 247 humanize.Bytes(uint64(rate))) 248 } 249 }