github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/samples/go/xml-import/xml_importer.go (about)

     1  // Copyright 2016 Attic Labs, Inc. All rights reserved.
     2  // Licensed under the Apache License, version 2.0:
     3  // http://www.apache.org/licenses/LICENSE-2.0
     4  
     5  package main
     6  
     7  import (
     8  	"fmt"
     9  	"log"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"sort"
    14  	"sync"
    15  
    16  	"github.com/attic-labs/kingpin"
    17  	"github.com/attic-labs/noms/go/config"
    18  	"github.com/attic-labs/noms/go/d"
    19  	"github.com/attic-labs/noms/go/datas"
    20  	"github.com/attic-labs/noms/go/spec"
    21  	"github.com/attic-labs/noms/go/types"
    22  	jsontonoms "github.com/attic-labs/noms/go/util/json"
    23  	"github.com/attic-labs/noms/go/util/profile"
    24  	"github.com/attic-labs/noms/go/util/verbose"
    25  	"github.com/clbanning/mxj"
    26  )
    27  
    28  var (
    29  	noIO          = kingpin.Flag("benchmark", "Run in 'benchmark' mode: walk directories and parse XML files but do not write to Noms").Bool()
    30  	performCommit = kingpin.Flag("commit", "commit the data to head of the dataset (otherwise only write the data to the dataset)").Default("true").Bool()
    31  	rootDir       = kingpin.Arg("dir", "directory to find for xml files in").Required().String()
    32  	dataset       = kingpin.Arg("dataset", "dataset to write to").Required().String()
    33  )
    34  
    35  type fileIndex struct {
    36  	path  string
    37  	index int
    38  }
    39  
    40  type refIndex struct {
    41  	ref   types.Ref
    42  	index int
    43  }
    44  
    45  type refIndexList []refIndex
    46  
    47  func (a refIndexList) Len() int           { return len(a) }
    48  func (a refIndexList) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
    49  func (a refIndexList) Less(i, j int) bool { return a[i].index < a[j].index }
    50  
    51  func main() {
    52  	err := d.Try(func() {
    53  		verbose.RegisterVerboseFlags(kingpin.CommandLine)
    54  		profile.RegisterProfileFlags(kingpin.CommandLine)
    55  		kingpin.Parse()
    56  
    57  		cfg := config.NewResolver()
    58  		db, ds, err := cfg.GetDataset(*dataset)
    59  		d.CheckError(err)
    60  		defer db.Close()
    61  
    62  		defer profile.MaybeStartProfile().Stop()
    63  
    64  		cpuCount := runtime.NumCPU()
    65  
    66  		filesChan := make(chan fileIndex, 1024)
    67  		refsChan := make(chan refIndex, 1024)
    68  
    69  		getFilePaths := func() {
    70  			index := 0
    71  			err := filepath.Walk(*rootDir, func(path string, info os.FileInfo, err error) error {
    72  				if err != nil {
    73  					d.Panic("Cannot traverse directories")
    74  				}
    75  				if !info.IsDir() && filepath.Ext(path) == ".xml" {
    76  					filesChan <- fileIndex{path, index}
    77  					index++
    78  				}
    79  
    80  				return nil
    81  			})
    82  			d.PanicIfError(err)
    83  			close(filesChan)
    84  		}
    85  
    86  		wg := sync.WaitGroup{}
    87  		importXML := func() {
    88  			expectedType := types.NewMap(db)
    89  			for f := range filesChan {
    90  				file, err := os.Open(f.path)
    91  				if err != nil {
    92  					d.Panic("Error getting XML")
    93  				}
    94  
    95  				xmlObject, err := mxj.NewMapXmlReader(file)
    96  				if err != nil {
    97  					d.Panic("Error decoding XML")
    98  				}
    99  				object := xmlObject.Old()
   100  				file.Close()
   101  
   102  				nomsObj := jsontonoms.NomsValueFromDecodedJSON(db, object, false)
   103  				d.Chk.IsType(expectedType, nomsObj)
   104  
   105  				var r types.Ref
   106  				if !*noIO {
   107  					r = ds.Database().WriteValue(nomsObj)
   108  				}
   109  
   110  				refsChan <- refIndex{r, f.index}
   111  			}
   112  
   113  			wg.Done()
   114  		}
   115  
   116  		go getFilePaths()
   117  		for i := 0; i < cpuCount*8; i++ {
   118  			wg.Add(1)
   119  			go importXML()
   120  		}
   121  		go func() {
   122  			wg.Wait()
   123  			close(refsChan) // done converting xml to noms
   124  		}()
   125  
   126  		refList := refIndexList{}
   127  		for r := range refsChan {
   128  			refList = append(refList, r)
   129  		}
   130  		sort.Sort(refList)
   131  
   132  		refs := make([]types.Value, len(refList))
   133  		for idx, r := range refList {
   134  			refs[idx] = r.ref
   135  		}
   136  
   137  		rl := types.NewList(db, refs...)
   138  
   139  		if !*noIO {
   140  			if *performCommit {
   141  				additionalMetaInfo := map[string]string{"inputDir": *rootDir}
   142  				meta, err := spec.CreateCommitMetaStruct(ds.Database(), "", "", additionalMetaInfo, nil)
   143  				d.CheckErrorNoUsage(err)
   144  				_, err = db.Commit(ds, rl, datas.CommitOptions{Meta: meta})
   145  				d.PanicIfError(err)
   146  			} else {
   147  				ref := db.WriteValue(rl)
   148  				fmt.Fprintf(os.Stdout, "#%s\n", ref.TargetHash().String())
   149  			}
   150  		}
   151  	})
   152  	if err != nil {
   153  		log.Fatal(err)
   154  	}
   155  }