go-hep.org/x/hep@v0.38.1/groot/rtree/reader.go (about)

     1  // Copyright ©2020 The go-hep Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package rtree
     6  
     7  import (
     8  	"fmt"
     9  	"io"
    10  	"reflect"
    11  	"strings"
    12  
    13  	"go-hep.org/x/hep/groot/rtree/rfunc"
    14  )
    15  
    16  // Reader reads data from a Tree.
    17  type Reader struct {
    18  	r    reader
    19  	beg  int64
    20  	end  int64
    21  	nrab int // number of read-ahead baskets
    22  
    23  	tree  Tree
    24  	rvars []ReadVar
    25  
    26  	evals []rfunc.Formula
    27  	dirty bool // whether we need to re-create scanner (if formula needed new branches)
    28  }
    29  
    30  // ReadOption configures how a ROOT tree should be traversed.
    31  type ReadOption func(r *Reader) error
    32  
    33  // WithRange specifies the half-open interval [beg, end) of entries
    34  // a Tree reader will read through.
    35  func WithRange(beg, end int64) ReadOption {
    36  	return func(r *Reader) error {
    37  		r.beg = beg
    38  		r.end = end
    39  		return nil
    40  	}
    41  }
    42  
    43  // WithPrefetchBaskets specifies the number of baskets to read-ahead, per branch.
    44  // The default is 2.
    45  // The number of prefetch baskets is cap'ed by the number of baskets, per branch.
    46  func WithPrefetchBaskets(n int) ReadOption {
    47  	return func(r *Reader) error {
    48  		r.nrab = n
    49  		return nil
    50  	}
    51  }
    52  
    53  // NewReader creates a new Tree Reader from the provided ROOT Tree and
    54  // the set of read-variables into which data will be read.
    55  func NewReader(t Tree, rvars []ReadVar, opts ...ReadOption) (*Reader, error) {
    56  	r := Reader{tree: t}
    57  
    58  	err := r.setup(t, opts)
    59  	if err != nil {
    60  		return nil, err
    61  	}
    62  
    63  	rvars, err = sanitizeRVars(t, rvars)
    64  	if err != nil {
    65  		return nil, fmt.Errorf("rtree: could not create reader: %w", err)
    66  	}
    67  
    68  	r.r = newReader(t, rvars, r.nrab, r.beg, r.end)
    69  	r.rvars = r.r.rvars()
    70  
    71  	return &r, nil
    72  }
    73  
    74  func (r *Reader) setup(t Tree, opts []ReadOption) error {
    75  	r.beg = 0
    76  	r.end = -1
    77  	r.nrab = 2
    78  
    79  	for i, opt := range opts {
    80  		err := opt(r)
    81  		if err != nil {
    82  			return fmt.Errorf(
    83  				"rtree: could not set reader option %d: %w",
    84  				i, err,
    85  			)
    86  		}
    87  	}
    88  
    89  	if r.end < 0 {
    90  		r.end = t.Entries()
    91  	}
    92  
    93  	if r.beg < 0 {
    94  		return fmt.Errorf("rtree: invalid event reader range [%d, %d) (start=%d < 0)",
    95  			r.beg, r.end, r.beg,
    96  		)
    97  	}
    98  
    99  	if r.beg > r.end {
   100  		return fmt.Errorf("rtree: invalid event reader range [%d, %d) (start=%d > end=%d)",
   101  			r.beg, r.end, r.beg, r.end,
   102  		)
   103  	}
   104  
   105  	if r.beg > t.Entries() {
   106  		return fmt.Errorf("rtree: invalid event reader range [%d, %d) (start=%d > tree-entries=%d)",
   107  			r.beg, r.end, r.beg, t.Entries(),
   108  		)
   109  	}
   110  
   111  	if r.end > t.Entries() {
   112  		return fmt.Errorf("rtree: invalid event reader range [%d, %d) (end=%d > tree-entries=%d)",
   113  			r.beg, r.end, r.end, t.Entries(),
   114  		)
   115  	}
   116  
   117  	return nil
   118  }
   119  
   120  // Close closes the Reader.
   121  func (r *Reader) Close() error {
   122  	if r.r == nil {
   123  		return nil
   124  	}
   125  	err := r.r.Close()
   126  	r.r = nil
   127  	r.evals = nil
   128  	return err
   129  }
   130  
   131  // RCtx provides an entry-wise local context to the tree Reader.
   132  type RCtx struct {
   133  	Entry int64 // Current tree entry.
   134  }
   135  
   136  // Read will read data from the underlying tree over the whole specified range.
   137  // Read calls the provided user function f for each entry successfully read.
   138  func (r *Reader) Read(f func(ctx RCtx) error) error {
   139  	if r.dirty {
   140  		r.dirty = false
   141  		_ = r.r.Close()
   142  		r.r = newReader(r.tree, r.rvars, r.nrab, r.beg, r.end)
   143  	}
   144  	r.r.reset()
   145  
   146  	const eoff = 0 // entry offset
   147  	return r.r.run(eoff, r.beg, r.end, f)
   148  }
   149  
   150  // Reset resets the current Reader with the provided options.
   151  func (r *Reader) Reset(opts ...ReadOption) error {
   152  	if r.r != nil {
   153  		err := r.r.Close()
   154  		if err != nil {
   155  			return fmt.Errorf("rtree: could not reset internal reader: %w", err)
   156  		}
   157  	}
   158  
   159  	err := r.setup(r.tree, opts)
   160  	if err != nil {
   161  		return fmt.Errorf("rtree: could not reset reader options: %w", err)
   162  	}
   163  
   164  	r.r = newReader(r.tree, r.rvars, r.nrab, r.beg, r.end)
   165  	r.rvars = r.r.rvars()
   166  
   167  	return nil
   168  }
   169  
   170  // FormulaFunc creates a new formula based on the provided function and
   171  // the list of branches as inputs.
   172  func (r *Reader) FormulaFunc(branches []string, fct any) (rfunc.Formula, error) {
   173  	f, err := rfunc.NewGenericFormula(branches, fct)
   174  	if err != nil {
   175  		return nil, fmt.Errorf("rtree: could not create formula: %w", err)
   176  	}
   177  	return r.Formula(f)
   178  }
   179  
   180  // Formula creates a new formula based on the provided user provided formula.
   181  // Formula binds the provided function with the requested list of leaves.
   182  func (r *Reader) Formula(f rfunc.Formula) (rfunc.Formula, error) {
   183  	n := len(r.rvars)
   184  	f, err := newFormula(r, f)
   185  	if err != nil {
   186  		return nil, fmt.Errorf("rtree: could not create formula: %w", err)
   187  	}
   188  
   189  	r.evals = append(r.evals, f)
   190  
   191  	if n != len(r.rvars) {
   192  		// formula needed to auto-load new branches.
   193  		// mark reader as dirty to re-create its internal scanner
   194  		// before the event-loop.
   195  		r.dirty = true
   196  	}
   197  	return f, nil
   198  }
   199  
   200  func sanitizeRVars(t Tree, rvars []ReadVar) ([]ReadVar, error) {
   201  	rvs := make([]ReadVar, 0, len(rvars))
   202  	for i := range rvars {
   203  		rvar := &rvars[i]
   204  		if rvar.Leaf == "" {
   205  			rvar.Leaf = rvar.Name
   206  		}
   207  		if rvar.count != "" {
   208  			rvs = append(rvs, *rvar)
   209  			continue
   210  		}
   211  		br := t.Branch(rvar.Name)
   212  		if br == nil {
   213  			return nil, fmt.Errorf("rtree: tree %q has no branch named %q", t.Name(), rvar.Name)
   214  		}
   215  		leaf := br.Leaf(rvar.Leaf)
   216  		switch {
   217  		case leaf == nil:
   218  			kind := reflect.ValueOf(rvar.Value).Elem().Kind()
   219  			switch {
   220  			case kind == reflect.Struct && strings.Contains(br.Title(), ":"):
   221  				// maybe a branch with a contiguous buffer of bytes.
   222  				subs := ReadVarsFromStruct(rvar.Value)
   223  				for i := range subs {
   224  					sub := &subs[i]
   225  					sub.Name = rvar.Name
   226  					sub.leaf = br.Leaf(sub.Leaf)
   227  				}
   228  				rvs = append(rvs, subs...)
   229  			default:
   230  				rvs = append(rvs, *rvar)
   231  			}
   232  		default:
   233  			lfc := leaf.LeafCount()
   234  			if lfc != nil {
   235  				rvar.count = lfc.Name()
   236  			}
   237  			rvs = append(rvs, *rvar)
   238  		}
   239  	}
   240  	return rvs, nil
   241  }
   242  
   243  type reader interface {
   244  	Close() error
   245  	rvars() []ReadVar
   246  
   247  	run(off, beg, end int64, f func(RCtx) error) error
   248  	start() error
   249  	stop()
   250  	reset()
   251  }
   252  
   253  // rtree reads a tree.
   254  type rtree struct {
   255  	tree *ttree
   256  	rvs  []ReadVar
   257  	brs  []rbranch
   258  	lvs  []rleaf
   259  }
   260  
   261  var (
   262  	_ reader = (*rtree)(nil)
   263  )
   264  
   265  func (r *rtree) rvars() []ReadVar { return r.rvs }
   266  
   267  func newReader(t Tree, rvars []ReadVar, n int, beg, end int64) reader {
   268  	rvars, err := sanitizeRVars(t, rvars)
   269  	if err != nil {
   270  		panic(err)
   271  	}
   272  
   273  	switch t := t.(type) {
   274  	case *ttree:
   275  		return newRTree(t, rvars, n, beg, end)
   276  	case *tntuple:
   277  		return newRTree(&t.ttree, rvars, n, beg, end)
   278  	case *tntupleD:
   279  		return newRTree(&t.ttree, rvars, n, beg, end)
   280  	case *chain:
   281  		return newRChain(t, rvars, n, beg, end)
   282  	case *join:
   283  		return newRJoin(t, rvars, n, beg, end)
   284  	default:
   285  		panic(fmt.Errorf("rtree: unknown Tree implementation %T", t))
   286  	}
   287  }
   288  
   289  func newRTree(t *ttree, rvars []ReadVar, n int, beg, end int64) *rtree {
   290  	r := &rtree{
   291  		tree: t,
   292  		rvs:  rvars,
   293  	}
   294  	usr := make(map[string]struct{}, len(rvars))
   295  	for _, rvar := range rvars {
   296  		usr[rvar.Name+"."+rvar.Leaf] = struct{}{}
   297  	}
   298  
   299  	var rcounts []ReadVar
   300  	for _, rvar := range rvars {
   301  		if rvar.count == "" {
   302  			continue
   303  		}
   304  		leaf := t.Branch(rvar.Name).Leaf(rvar.Leaf).LeafCount()
   305  		name := leaf.Branch().Name() + "." + leaf.Name()
   306  		if _, ok := usr[name]; !ok {
   307  			var ptr any
   308  			switch leaf := leaf.(type) {
   309  			case *LeafB:
   310  				ptr = new(int8)
   311  			case *LeafS:
   312  				ptr = new(int16)
   313  			case *LeafI:
   314  				ptr = new(int32)
   315  			case *LeafL:
   316  				ptr = new(int64)
   317  			default:
   318  				panic(fmt.Errorf("unknown Leaf count type %T", leaf))
   319  			}
   320  			rcounts = append(rcounts, ReadVar{
   321  				Name:  leaf.Branch().Name(),
   322  				Leaf:  leaf.Name(),
   323  				Value: ptr,
   324  				leaf:  leaf,
   325  			})
   326  		}
   327  	}
   328  	r.rvs = append(rcounts, r.rvs...)
   329  	r.rvs = bindRVarsTo(t, r.rvs)
   330  
   331  	r.lvs = make([]rleaf, 0, len(r.rvs))
   332  	for i := range r.rvs {
   333  		rv := r.rvs[i]
   334  		r.lvs = append(r.lvs, rleafFrom(rv.leaf, rv, r))
   335  	}
   336  
   337  	// regroup leaves by holding branch
   338  	set := make(map[string]int)
   339  	brs := make([][]rleaf, 0, len(r.lvs))
   340  	for _, leaf := range r.lvs {
   341  		br := leaf.Leaf().Branch().Name()
   342  		if _, ok := set[br]; !ok {
   343  			set[br] = len(brs)
   344  			brs = append(brs, []rleaf{})
   345  		}
   346  		id := set[br]
   347  		brs[id] = append(brs[id], leaf)
   348  	}
   349  
   350  	r.brs = make([]rbranch, len(brs))
   351  	for i, leaves := range brs {
   352  		branch := leaves[0].Leaf().Branch()
   353  		r.brs[i] = newRBranch(branch, n, beg, end, leaves, r)
   354  	}
   355  
   356  	return r
   357  }
   358  func (r *rtree) Close() error {
   359  	for i := range r.brs {
   360  		rb := &r.brs[i]
   361  		rb.rb.close()
   362  	}
   363  	return nil
   364  }
   365  
   366  func (r *rtree) start() error {
   367  	for i := range r.brs {
   368  		rb := &r.brs[i]
   369  		err := rb.start()
   370  		if err != nil {
   371  			if err == io.EOF {
   372  				// empty range.
   373  				return nil
   374  			}
   375  			return err
   376  		}
   377  	}
   378  	return nil
   379  }
   380  
   381  func (r *rtree) stop() {
   382  	for i := range r.brs {
   383  		rb := &r.brs[i]
   384  		_ = rb.stop()
   385  	}
   386  }
   387  
   388  func (r *rtree) reset() {
   389  	for i := range r.brs {
   390  		rb := &r.brs[i]
   391  		rb.reset()
   392  	}
   393  }
   394  
   395  func (r *rtree) rcountFunc(name string) func() int {
   396  	for _, leaf := range r.lvs {
   397  		n := leaf.Leaf().Name()
   398  		if n != name {
   399  			continue
   400  		}
   401  		switch leaf := leaf.(type) {
   402  		case *rleafValI8:
   403  			return leaf.ivalue
   404  		case *rleafValI16:
   405  			return leaf.ivalue
   406  		case *rleafValI32:
   407  			return leaf.ivalue
   408  		case *rleafValI64:
   409  			return leaf.ivalue
   410  		case *rleafValU8:
   411  			return leaf.ivalue
   412  		case *rleafValU16:
   413  			return leaf.ivalue
   414  		case *rleafValU32:
   415  			return leaf.ivalue
   416  		case *rleafValU64:
   417  			return leaf.ivalue
   418  		case *rleafElem:
   419  			leaf.bindCount()
   420  			return leaf.ivalue
   421  
   422  		default:
   423  			panic(fmt.Errorf("rleaf %T not implemented", leaf))
   424  		}
   425  	}
   426  	panic(fmt.Errorf("impossible: no leaf for %s", name))
   427  }
   428  
   429  func (r *rtree) rcountLeaf(name string) leafCount {
   430  	for _, leaf := range r.lvs {
   431  		n := leaf.Leaf().Name()
   432  		if n != name {
   433  			continue
   434  		}
   435  		return &rleafCount{
   436  			Leaf: leaf.Leaf(),
   437  			n:    r.rcountFunc(name),
   438  			leaf: leaf,
   439  		}
   440  	}
   441  	panic(fmt.Errorf("impossible: no leaf for %s", name))
   442  }
   443  
   444  func (r *rtree) run(off, beg, end int64, f func(RCtx) error) error {
   445  	var (
   446  		err  error
   447  		rctx RCtx
   448  	)
   449  
   450  	defer r.Close()
   451  
   452  	err = r.start()
   453  	if err != nil {
   454  		return err
   455  	}
   456  	defer r.stop()
   457  
   458  	for i := beg; i < end; i++ {
   459  		err = r.read(i)
   460  		if err != nil {
   461  			return fmt.Errorf("rtree: could not read entry %d: %w", i, err)
   462  		}
   463  		rctx.Entry = i + off
   464  		err = f(rctx)
   465  		if err != nil {
   466  			return fmt.Errorf("rtree: could not process entry %d: %w", i, err)
   467  		}
   468  	}
   469  
   470  	return err
   471  }
   472  
   473  func (r *rtree) read(ievt int64) error {
   474  	for i := range r.brs {
   475  		rb := &r.brs[i]
   476  		err := rb.read(ievt)
   477  		if err != nil {
   478  			return err
   479  		}
   480  	}
   481  	return nil
   482  }
   483  
   484  var (
   485  	_ rleafCtx = (*rtree)(nil)
   486  )