github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/samples/go/xml-import/xml_importer.go (about) 1 // Copyright 2016 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 package main 6 7 import ( 8 "fmt" 9 "log" 10 "os" 11 "path/filepath" 12 "runtime" 13 "sort" 14 "sync" 15 16 "github.com/attic-labs/kingpin" 17 "github.com/attic-labs/noms/go/config" 18 "github.com/attic-labs/noms/go/d" 19 "github.com/attic-labs/noms/go/datas" 20 "github.com/attic-labs/noms/go/spec" 21 "github.com/attic-labs/noms/go/types" 22 jsontonoms "github.com/attic-labs/noms/go/util/json" 23 "github.com/attic-labs/noms/go/util/profile" 24 "github.com/attic-labs/noms/go/util/verbose" 25 "github.com/clbanning/mxj" 26 ) 27 28 var ( 29 noIO = kingpin.Flag("benchmark", "Run in 'benchmark' mode: walk directories and parse XML files but do not write to Noms").Bool() 30 performCommit = kingpin.Flag("commit", "commit the data to head of the dataset (otherwise only write the data to the dataset)").Default("true").Bool() 31 rootDir = kingpin.Arg("dir", "directory to find for xml files in").Required().String() 32 dataset = kingpin.Arg("dataset", "dataset to write to").Required().String() 33 ) 34 35 type fileIndex struct { 36 path string 37 index int 38 } 39 40 type refIndex struct { 41 ref types.Ref 42 index int 43 } 44 45 type refIndexList []refIndex 46 47 func (a refIndexList) Len() int { return len(a) } 48 func (a refIndexList) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 49 func (a refIndexList) Less(i, j int) bool { return a[i].index < a[j].index } 50 51 func main() { 52 err := d.Try(func() { 53 verbose.RegisterVerboseFlags(kingpin.CommandLine) 54 profile.RegisterProfileFlags(kingpin.CommandLine) 55 kingpin.Parse() 56 57 cfg := config.NewResolver() 58 db, ds, err := cfg.GetDataset(*dataset) 59 d.CheckError(err) 60 defer db.Close() 61 62 defer profile.MaybeStartProfile().Stop() 63 64 cpuCount := runtime.NumCPU() 65 66 filesChan := make(chan fileIndex, 1024) 67 refsChan := make(chan refIndex, 1024) 68 69 getFilePaths := func() { 70 index := 0 71 err := filepath.Walk(*rootDir, func(path string, info os.FileInfo, err error) error { 72 if err != nil { 73 d.Panic("Cannot traverse directories") 74 } 75 if !info.IsDir() && filepath.Ext(path) == ".xml" { 76 filesChan <- fileIndex{path, index} 77 index++ 78 } 79 80 return nil 81 }) 82 d.PanicIfError(err) 83 close(filesChan) 84 } 85 86 wg := sync.WaitGroup{} 87 importXML := func() { 88 expectedType := types.NewMap(db) 89 for f := range filesChan { 90 file, err := os.Open(f.path) 91 if err != nil { 92 d.Panic("Error getting XML") 93 } 94 95 xmlObject, err := mxj.NewMapXmlReader(file) 96 if err != nil { 97 d.Panic("Error decoding XML") 98 } 99 object := xmlObject.Old() 100 file.Close() 101 102 nomsObj := jsontonoms.NomsValueFromDecodedJSON(db, object, false) 103 d.Chk.IsType(expectedType, nomsObj) 104 105 var r types.Ref 106 if !*noIO { 107 r = ds.Database().WriteValue(nomsObj) 108 } 109 110 refsChan <- refIndex{r, f.index} 111 } 112 113 wg.Done() 114 } 115 116 go getFilePaths() 117 for i := 0; i < cpuCount*8; i++ { 118 wg.Add(1) 119 go importXML() 120 } 121 go func() { 122 wg.Wait() 123 close(refsChan) // done converting xml to noms 124 }() 125 126 refList := refIndexList{} 127 for r := range refsChan { 128 refList = append(refList, r) 129 } 130 sort.Sort(refList) 131 132 refs := make([]types.Value, len(refList)) 133 for idx, r := range refList { 134 refs[idx] = r.ref 135 } 136 137 rl := types.NewList(db, refs...) 138 139 if !*noIO { 140 if *performCommit { 141 additionalMetaInfo := map[string]string{"inputDir": *rootDir} 142 meta, err := spec.CreateCommitMetaStruct(ds.Database(), "", "", additionalMetaInfo, nil) 143 d.CheckErrorNoUsage(err) 144 _, err = db.Commit(ds, rl, datas.CommitOptions{Meta: meta}) 145 d.PanicIfError(err) 146 } else { 147 ref := db.WriteValue(rl) 148 fmt.Fprintf(os.Stdout, "#%s\n", ref.TargetHash().String()) 149 } 150 } 151 }) 152 if err != nil { 153 log.Fatal(err) 154 } 155 }