go-hep.org/x/hep@v0.38.1/cmd/root2arrow/main.go (about)

     1  // Copyright ©2019 The go-hep Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // root2arrow converts the content of a ROOT TTree to an ARROW file.
     6  //
     7  //	Usage of root2arrow:
     8  //	  -o string
     9  //	    	path to output ARROW file name (default "output.data")
    10  //	  -stream
    11  //	    	enable ARROW stream (default is to create an ARROW file)
    12  //	  -t string
    13  //	    	name of the tree to convert (default "tree")
    14  //
    15  //
    16  //	$> root2arrow -o foo.data -t tree ../../groot/testdata/simple.root
    17  //	$> arrow-ls ./foo.data
    18  //	version: V4
    19  //	schema:
    20  //	  fields: 3
    21  //	    - one: type=int32
    22  //	    - two: type=float32
    23  //	    - three: type=utf8
    24  //	records: 1
    25  //	$> arrow-cat ./foo.data
    26  //	version: V4
    27  //	record 1/1...
    28  //	  col[0] "one": [1 2 3 4]
    29  //	  col[1] "two": [1.1 2.2 3.3 4.4]
    30  //	  col[2] "three": ["uno" "dos" "tres" "quatro"]
    31  package main // import "go-hep.org/x/hep/cmd/root2arrow"
    32  
    33  import (
    34  	"flag"
    35  	"fmt"
    36  	"io"
    37  	"log"
    38  	"os"
    39  
    40  	"git.sr.ht/~sbinet/go-arrow/array"
    41  	"git.sr.ht/~sbinet/go-arrow/ipc"
    42  	"git.sr.ht/~sbinet/go-arrow/memory"
    43  	"go-hep.org/x/hep/groot"
    44  	"go-hep.org/x/hep/groot/rarrow"
    45  	"go-hep.org/x/hep/groot/riofs"
    46  	_ "go-hep.org/x/hep/groot/riofs/plugin/http"
    47  	_ "go-hep.org/x/hep/groot/riofs/plugin/xrootd"
    48  	"go-hep.org/x/hep/groot/rtree"
    49  )
    50  
    51  func main() {
    52  	log.SetPrefix("root2arrow: ")
    53  	log.SetFlags(0)
    54  
    55  	oname := flag.String("o", "output.data", "path to output ARROW file name")
    56  	tname := flag.String("t", "tree", "name of the tree to convert")
    57  	stream := flag.Bool("stream", false, "enable ARROW stream (default is to create an ARROW file)")
    58  
    59  	flag.Parse()
    60  
    61  	if flag.NArg() != 1 {
    62  		flag.Usage()
    63  		log.Fatalf("missing input ROOT filename argument")
    64  	}
    65  	fname := flag.Arg(0)
    66  
    67  	err := process(*oname, fname, *tname, *stream)
    68  	if err != nil {
    69  		log.Fatal(err)
    70  	}
    71  }
    72  
    73  func process(oname, fname, tname string, stream bool) error {
    74  	f, err := groot.Open(fname)
    75  	if err != nil {
    76  		return err
    77  	}
    78  	defer f.Close()
    79  
    80  	obj, err := riofs.Dir(f).Get(tname)
    81  	if err != nil {
    82  		return err
    83  	}
    84  
    85  	tree, ok := obj.(rtree.Tree)
    86  	if !ok {
    87  		return fmt.Errorf("object %q in file %q is not a rtree.Tree", tname, fname)
    88  	}
    89  
    90  	mem := memory.NewGoAllocator()
    91  
    92  	r := rarrow.NewRecordReader(tree, rarrow.WithAllocator(mem))
    93  	defer r.Release()
    94  
    95  	var o *os.File
    96  
    97  	switch oname {
    98  	case "":
    99  		o = os.Stdout
   100  	default:
   101  		o, err = os.Create(oname)
   102  		if err != nil {
   103  			return err
   104  		}
   105  		defer o.Close()
   106  	}
   107  
   108  	switch {
   109  	case stream:
   110  		err = processStream(o, r, mem)
   111  	default:
   112  		err = processFile(o, r, mem)
   113  	}
   114  
   115  	return err
   116  }
   117  
   118  func processStream(o io.Writer, r array.RecordReader, mem memory.Allocator) error {
   119  	var err error
   120  	w := ipc.NewWriter(o, ipc.WithSchema(r.Schema()), ipc.WithAllocator(mem))
   121  	defer w.Close()
   122  
   123  	i := 0
   124  	for r.Next() {
   125  		rec := r.Record()
   126  		err = w.Write(rec)
   127  		if err != nil {
   128  			return fmt.Errorf("could not write record[%d]: %w", i, err)
   129  		}
   130  		i++
   131  	}
   132  
   133  	err = w.Close()
   134  	if err != nil {
   135  		return fmt.Errorf("could not close Arrow stream writer: %w", err)
   136  	}
   137  
   138  	return nil
   139  }
   140  
   141  func processFile(o *os.File, r array.RecordReader, mem memory.Allocator) error {
   142  	w, err := ipc.NewFileWriter(o, ipc.WithSchema(r.Schema()), ipc.WithAllocator(mem))
   143  	if err != nil {
   144  		return fmt.Errorf("could not create Arrow file writer: %w", err)
   145  	}
   146  	defer w.Close()
   147  
   148  	i := 0
   149  	for r.Next() {
   150  		rec := r.Record()
   151  		err = w.Write(rec)
   152  		if err != nil {
   153  			return fmt.Errorf("could not write record[%d]: %w", i, err)
   154  		}
   155  		i++
   156  	}
   157  
   158  	err = w.Close()
   159  	if err != nil {
   160  		return fmt.Errorf("could not close Arrow file writer: %w", err)
   161  	}
   162  
   163  	err = o.Sync()
   164  	if err != nil {
   165  		return fmt.Errorf("could not sync data to disk: %w", err)
   166  	}
   167  
   168  	err = o.Close()
   169  	if err != nil {
   170  		return fmt.Errorf("could not close output file: %w", err)
   171  	}
   172  
   173  	return nil
   174  }