go-hep.org/x/hep@v0.38.1/cmd/root2npy/main.go (about)

     1  // Copyright ©2017 The go-hep Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // root2npy converts the content of a ROOT TTree to a NumPy data file.
     6  //
     7  //	Usage of root2npy:
     8  //	 -f string
     9  //	   	path to input ROOT file name
    10  //	 -o string
    11  //	   	path to output npz file name (default "output.npz")
    12  //	 -t string
    13  //	   	name of the tree to convert (default "tree")
    14  //
    15  // The NumPy data file format is described here:
    16  //
    17  //	https://numpy.org/neps/nep-0001-npy-format.html
    18  //
    19  // Example:
    20  //
    21  //	$> root2npy -f $GOPATH/src/go-hep.org/x/hep/groot/testdata/simple.root -t tree -o output.npz
    22  //	$> python2 -c 'import sys, numpy as np; print(dict(np.load(sys.argv[1])))' ./output.npz
    23  //	{'one':   array([1, 2, 3, 4], dtype=int32),
    24  //	 'two':   array([ 1.10000002,  2.20000005,  3.29999995,  4.4000001 ], dtype=float32),
    25  //	 'three': array([u'uno', u'dos', u'tres', u'quatro'], dtype='<U6')}
    26  //
    27  //	$> python3 -c 'import sys, numpy as np; print(dict(np.load(sys.argv[1])))' ./output.npz
    28  //	{'one':   array([1, 2, 3, 4], dtype=int32),
    29  //	 'two':   array([ 1.10000002,  2.20000005,  3.29999995,  4.4000001 ], dtype=float32),
    30  //	 'three': array(['uno', 'dos', 'tres', 'quatro'], dtype='<U6')}
    31  //
    32  //	$> go install codeberg.org/sbinet/npyio/cmd/npyio-ls@latest
    33  //	$> npyio-ls ./output.npz
    34  //	================================================================================
    35  //	file: ./output.npz
    36  //	entry: one
    37  //	npy-header: Header{Major:2, Minor:0, Descr:{Type:<i4, Fortran:false, Shape:[4]}}
    38  //	data = [1 2 3 4]
    39  //
    40  //	entry: two
    41  //	npy-header: Header{Major:2, Minor:0, Descr:{Type:<f4, Fortran:false, Shape:[4]}}
    42  //	data = [1.1 2.2 3.3 4.4]
    43  //
    44  //	entry: three
    45  //	npy-header: Header{Major:2, Minor:0, Descr:{Type:<U6, Fortran:false, Shape:[4]}}
    46  //	data = [uno dos tres quatro]
    47  //
    48  //	$> root-ls -t $GOPATH/src/go-hep.org/x/hep/groot/testdata/simple.root
    49  //	=== [$GOPATH/src/go-hep.org/x/hep/groot/testdata/simple.root] ===
    50  //	version: 60600
    51  //	TTree   tree      fake data (entries=4)
    52  //	  one   "one/I"   TBranch
    53  //	  two   "two/F"   TBranch
    54  //	  three "three/C" TBranch
    55  //
    56  // If you have a 10-events tree with a branch "doubles" containing an array of 3 float64,
    57  // root2npy will convert it to a NumPy data file containing a NumPy array with a shape (10,3).
    58  //
    59  // Example:
    60  //
    61  //	$> root-ls -t $GOPATH/src/go-hep.org/x/hep/groot/testdata/small-flat-tree.root
    62  //	=== [$GOPATH/src/go-hep.org/x/hep/groot/testdata/small-flat-tree.root] ===
    63  //	version: 60806
    64  //	TTree          tree                 my tree title (entries=100)
    65  //	  Int32        "Int32/I"            TBranch
    66  //	  Int64        "Int64/L"            TBranch
    67  //	  UInt32       "UInt32/i"           TBranch
    68  //	  UInt64       "UInt64/l"           TBranch
    69  //	  Float32      "Float32/F"          TBranch
    70  //	  Float64      "Float64/D"          TBranch
    71  //	  Str          "Str/C"              TBranch
    72  //	  ArrayInt32   "ArrayInt32[10]/I"   TBranch
    73  //	  ArrayInt64   "ArrayInt64[10]/L"   TBranch
    74  //	  ArrayUInt32  "ArrayInt32[10]/i"   TBranch
    75  //	  ArrayUInt64  "ArrayInt64[10]/l"   TBranch
    76  //	  ArrayFloat32 "ArrayFloat32[10]/F" TBranch
    77  //	  ArrayFloat64 "ArrayFloat64[10]/D" TBranch
    78  //	  N            "N/I"                TBranch
    79  //	  SliceInt32   "SliceInt32[N]/I"    TBranch
    80  //	  SliceInt64   "SliceInt64[N]/L"    TBranch
    81  //	  SliceUInt32  "SliceInt32[N]/i"    TBranch
    82  //	  SliceUInt64  "SliceInt64[N]/l"    TBranch
    83  //	  SliceFloat32 "SliceFloat32[N]/F"  TBranch
    84  //	  SliceFloat64 "SliceFloat64[N]/D"  TBranch
    85  //
    86  //	$> root2npy $GOPATH/src/go-hep.org/x/hep/groot/testdata/small-flat-tree.root
    87  //	root2npy: scanning leaves...
    88  //	root2npy: >>> "SliceInt32" []int32 not supported
    89  //	root2npy: >>> "SliceInt64" []int64 not supported
    90  //	root2npy: >>> "SliceInt32" []int32 not supported
    91  //	root2npy: >>> "SliceInt64" []int64 not supported
    92  //	root2npy: >>> "SliceFloat32" []float32 not supported
    93  //	root2npy: >>> "SliceFloat64" []float64 not supported
    94  //	root2npy: scanning leaves... [done]
    95  //
    96  //	$> npyio-ls ./output.npz
    97  //	================================================================================
    98  //	file: ./output.npz
    99  //	entry: Int32
   100  //	npy-header: Header{Major:2, Minor:0, Descr:{Type:<i4, Fortran:false, Shape:[100]}}
   101  //	data = [0 1 2 3 4 5 6 7 8 9 10 11 ... 90 91 92 93 94 95 96 97 98 99]
   102  //
   103  //	entry: Int64
   104  //	npy-header: Header{Major:2, Minor:0, Descr:{Type:<i8, Fortran:false, Shape:[100]}}
   105  //	data = [0 1 2 3 4 5 6 7 8 9 10 11 ... 90 91 92 93 94 95 96 97 98 99]
   106  //
   107  //	[...]
   108  //
   109  //	entry: Float64
   110  //	npy-header: Header{Major:2, Minor:0, Descr:{Type:<f8, Fortran:false, Shape:[100]}}
   111  //	data = [0 1 2 3 4 5 6 7 8 9 10 11 ... 90 91 92 93 94 95 96 97 98 99]
   112  //
   113  //	entry: Str
   114  //	npy-header: Header{Major:2, Minor:0, Descr:{Type:<U7, Fortran:false, Shape:[100]}}
   115  //	data = [evt-000 evt-001 evt-002 evt-003 evt-004 evt-005 evt-006 evt-007 ...
   116  //	evt-092 evt-093 evt-094 evt-095 evt-096 evt-097 evt-098 evt-099]
   117  //
   118  //	entry: ArrayInt32
   119  //	npy-header: Header{Major:2, Minor:0, Descr:{Type:<i4, Fortran:false, Shape:[100 10]}}
   120  //	data = [0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 ...
   121  //	... 97 98 98 98 98 98 98 98 98 98 98 99 99 99 99 99 99 99 99 99 99]
   122  //
   123  //	[...]
   124  package main
   125  
   126  import (
   127  	"archive/zip"
   128  	"bytes"
   129  	"flag"
   130  	"fmt"
   131  	"io"
   132  	"log"
   133  	"os"
   134  
   135  	"codeberg.org/sbinet/npyio"
   136  
   137  	"go-hep.org/x/hep/groot"
   138  	"go-hep.org/x/hep/groot/riofs"
   139  	_ "go-hep.org/x/hep/groot/riofs/plugin/http"
   140  	_ "go-hep.org/x/hep/groot/riofs/plugin/xrootd"
   141  	"go-hep.org/x/hep/groot/rnpy"
   142  	"go-hep.org/x/hep/groot/rtree"
   143  )
   144  
   145  func main() {
   146  	log.SetPrefix("root2npy: ")
   147  	log.SetFlags(0)
   148  
   149  	fname := flag.String("f", "", "path to input ROOT file name")
   150  	oname := flag.String("o", "output.npz", "path to output npz file name")
   151  	tname := flag.String("t", "tree", "name of the tree to convert")
   152  
   153  	flag.Parse()
   154  
   155  	if *fname == "" {
   156  		flag.Usage()
   157  		log.Fatalf("missing input ROOT filename argument")
   158  	}
   159  
   160  	err := process(*oname, *fname, *tname)
   161  	if err != nil {
   162  		log.Fatalf("%+v", err)
   163  	}
   164  }
   165  
   166  func process(oname, fname, tname string) error {
   167  	f, err := groot.Open(fname)
   168  	if err != nil {
   169  		return fmt.Errorf("could not open ROOT file: %w", err)
   170  	}
   171  	defer f.Close()
   172  
   173  	obj, err := riofs.Dir(f).Get(tname)
   174  	if err != nil {
   175  		return fmt.Errorf("%w", err)
   176  	}
   177  
   178  	tree, ok := obj.(rtree.Tree)
   179  	if !ok {
   180  		return fmt.Errorf("object %q in file %q is not a rtree.Tree", tname, fname)
   181  	}
   182  
   183  	cols := rnpy.NewColumns(tree)
   184  
   185  	out, err := os.Create(oname)
   186  	if err != nil {
   187  		return fmt.Errorf("could not create NumPy file: %w", err)
   188  	}
   189  	defer out.Close()
   190  
   191  	npz := zip.NewWriter(out)
   192  	defer npz.Close()
   193  
   194  	wrk := make([]byte, 1*1024*1024)
   195  	buf := new(bytes.Buffer)
   196  	for _, col := range cols {
   197  		buf.Reset()
   198  
   199  		sli, err := col.Slice()
   200  		if err != nil {
   201  			return fmt.Errorf("could not read %q: %w", col.Name(), err)
   202  		}
   203  
   204  		err = npyio.Write(buf, sli)
   205  		if err != nil {
   206  			return fmt.Errorf("could not write %q: %w", col.Name(), err)
   207  		}
   208  
   209  		if err != nil {
   210  			return fmt.Errorf("could not process column %q: %w", col.Name(), err)
   211  		}
   212  
   213  		wz, err := npz.Create(col.Name())
   214  		if err != nil {
   215  			return fmt.Errorf("could not create column %q: %w", col.Name(), err)
   216  		}
   217  
   218  		_, err = io.CopyBuffer(wz, buf, wrk)
   219  		if err != nil {
   220  			return fmt.Errorf("could not save column %q: %w", col.Name(), err)
   221  		}
   222  	}
   223  
   224  	err = npz.Flush()
   225  	if err != nil {
   226  		return fmt.Errorf("could not flush NumPy zip-file: %w", err)
   227  	}
   228  
   229  	err = npz.Close()
   230  	if err != nil {
   231  		return fmt.Errorf("could not close NumPy zip-file: %w", err)
   232  	}
   233  
   234  	err = out.Close()
   235  	if err != nil {
   236  		return fmt.Errorf("could not close NumPy file: %w", err)
   237  	}
   238  
   239  	return nil
   240  }