go-hep.org/x/hep@v0.38.1/groot/rarrow/reader.go (about)

     1  // Copyright ©2019 The go-hep Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package rarrow // import "go-hep.org/x/hep/groot/rarrow"
     6  
     7  import (
     8  	"fmt"
     9  	"sync/atomic"
    10  
    11  	"git.sr.ht/~sbinet/go-arrow"
    12  	"git.sr.ht/~sbinet/go-arrow/array"
    13  	"git.sr.ht/~sbinet/go-arrow/memory"
    14  	"go-hep.org/x/hep/groot/rtree"
    15  )
    16  
    17  // Record is an in-memory Arrow Record backed by a ROOT Tree.
    18  type Record struct {
    19  	refs int64
    20  
    21  	mem  memory.Allocator
    22  	tree rtree.Tree
    23  
    24  	schema *arrow.Schema
    25  	nrows  int64
    26  	ncols  int64
    27  	offset int64 // entries offset
    28  
    29  	cols []array.Interface
    30  }
    31  
    32  // NewRecord creates a new in-memory Arrow Record from the provided ROOT Tree.
    33  func NewRecord(t rtree.Tree, opts ...Option) *Record {
    34  	cfg := newConfig(opts)
    35  
    36  	if cfg.end < 0 {
    37  		cfg.end = t.Entries()
    38  	}
    39  
    40  	if cfg.beg <= 0 {
    41  		cfg.beg = 0
    42  	}
    43  
    44  	if cfg.beg > cfg.end {
    45  		panic("rarrow: invalid entry slice")
    46  	}
    47  
    48  	rec := &Record{
    49  		mem:    cfg.mem,
    50  		tree:   t,
    51  		refs:   1,
    52  		schema: SchemaFrom(t),
    53  		offset: cfg.beg,
    54  		nrows:  cfg.end - cfg.beg,
    55  		ncols:  int64(len(t.Branches())),
    56  		cols:   make([]array.Interface, len(t.Branches())),
    57  	}
    58  
    59  	rec.load(cfg.beg, cfg.end)
    60  
    61  	return rec
    62  }
    63  
    64  func (rec *Record) load(beg, end int64) {
    65  	var (
    66  		rvars  = rtree.NewReadVars(rec.tree)
    67  		r, err = rtree.NewReader(rec.tree, rvars, rtree.WithRange(beg, end))
    68  	)
    69  	if err != nil {
    70  		panic(fmt.Errorf("could not create reader from read-vars %#v: %+v", rvars, err))
    71  	}
    72  	defer r.Close()
    73  
    74  	blds := make([]array.Builder, rec.ncols)
    75  	for i, field := range rec.schema.Fields() {
    76  		blds[i] = builderFrom(rec.mem, field.Type, rec.nrows)
    77  		defer blds[i].Release()
    78  	}
    79  
    80  	err = r.Read(func(ctx rtree.RCtx) error {
    81  		for i, field := range rec.schema.Fields() {
    82  			appendData(blds[i], rvars[i], field.Type)
    83  		}
    84  		return nil
    85  	})
    86  	if err != nil {
    87  		panic(fmt.Errorf("could not read tree: %+v", err))
    88  	}
    89  
    90  	for i, bldr := range blds {
    91  		rec.cols[i] = bldr.NewArray()
    92  	}
    93  }
    94  
    95  // Retain increases the reference count by 1.
    96  // Retain may be called simultaneously from multiple goroutines.
    97  func (rec *Record) Retain() {
    98  	atomic.AddInt64(&rec.refs, 1)
    99  }
   100  
   101  // Release decreases the reference count by 1.
   102  // When the reference count goes to zero, the memory is freed.
   103  // Release may be called simultaneously from multiple goroutines.
   104  func (rec *Record) Release() {
   105  	if atomic.LoadInt64(&rec.refs) <= 0 {
   106  		panic("groot/rarrow: too many releases")
   107  	}
   108  
   109  	if atomic.AddInt64(&rec.refs, -1) == 0 {
   110  		for i := range rec.cols {
   111  			rec.cols[i].Release()
   112  		}
   113  		rec.cols = nil
   114  	}
   115  }
   116  
   117  func (rec *Record) Schema() *arrow.Schema        { return rec.schema }
   118  func (rec *Record) NumRows() int64               { return rec.nrows }
   119  func (rec *Record) NumCols() int64               { return rec.ncols }
   120  func (rec *Record) Columns() []array.Interface   { return rec.cols }
   121  func (rec *Record) Column(i int) array.Interface { return rec.cols[i] }
   122  func (rec *Record) ColumnName(i int) string      { return rec.schema.Field(i).Name }
   123  
   124  // NewSlice constructs a zero-copy slice of the record with the indicated
   125  // indices i and j, corresponding to array[i:j].
   126  // The returned record must be Release()'d after use.
   127  //
   128  // NewSlice panics if the slice is outside the valid range of the record array.
   129  // NewSlice panics if j < i.
   130  func (rec *Record) NewSlice(i, j int64) array.Record {
   131  	return NewRecord(rec.tree, WithStart(rec.offset+i), WithEnd(rec.offset+j))
   132  }
   133  
   134  // RecordReader is an ARROW RecordReader for ROOT Trees.
   135  //
   136  // RecordReader does not materialize more than one record at a time.
   137  // The number of rows (or entries, in ROOT speak) that record loads can be configured
   138  // at creation time with the WithChunk function.
   139  // The default is one entry per record.
   140  // One can pass -1 to WithChunk to create a record with all entries of the Tree or Chain.
   141  type RecordReader struct {
   142  	refs int64
   143  
   144  	mem    memory.Allocator
   145  	schema *arrow.Schema
   146  	tree   rtree.Tree
   147  
   148  	beg   int64 // first entry to read
   149  	end   int64 // last entry to read
   150  	cur   int64 // current entry
   151  	chunk int64 // number of entries to read for each record
   152  
   153  	rec *Record
   154  }
   155  
   156  // NewRecordReader creates a new ARROW RecordReader from the provided ROOT Tree.
   157  func NewRecordReader(tree rtree.Tree, opts ...Option) *RecordReader {
   158  	cfg := newConfig(opts)
   159  
   160  	r := &RecordReader{
   161  		refs:   1,
   162  		mem:    cfg.mem,
   163  		schema: SchemaFrom(tree),
   164  		tree:   tree,
   165  		beg:    cfg.beg,
   166  		end:    cfg.end,
   167  		chunk:  cfg.chunks,
   168  	}
   169  
   170  	if r.beg <= 0 {
   171  		r.beg = 0
   172  	}
   173  
   174  	if r.end <= 0 {
   175  		r.end = tree.Entries()
   176  	}
   177  
   178  	switch {
   179  	case r.chunk == 0:
   180  		r.chunk = 1
   181  	case r.chunk < 0:
   182  		r.chunk = tree.Entries()
   183  	}
   184  	r.cur = r.beg
   185  
   186  	return r
   187  }
   188  
   189  // Retain increases the reference count by 1.
   190  // Retain may be called simultaneously from multiple goroutines.
   191  func (r *RecordReader) Retain() {
   192  	atomic.AddInt64(&r.refs, 1)
   193  }
   194  
   195  // Release decreases the reference count by 1.
   196  // When the reference count goes to zero, the memory is freed.
   197  // Release may be called simultaneously from multiple goroutines.
   198  func (r *RecordReader) Release() {
   199  	if atomic.LoadInt64(&r.refs) <= 0 {
   200  		panic("groot/rarrow: too many releases")
   201  	}
   202  
   203  	if atomic.AddInt64(&r.refs, -1) == 0 {
   204  		if r.rec != nil {
   205  			r.rec.Release()
   206  		}
   207  	}
   208  }
   209  
   210  func (r *RecordReader) Schema() *arrow.Schema { return r.schema }
   211  func (r *RecordReader) Record() array.Record  { return r.rec }
   212  
   213  func (r *RecordReader) Next() bool {
   214  	if r.cur >= r.end {
   215  		return false
   216  	}
   217  
   218  	if r.rec != nil {
   219  		r.rec.Release()
   220  	}
   221  
   222  	end := minI64(r.cur+r.chunk, r.end)
   223  	r.load(r.cur, end)
   224  	r.cur += r.chunk
   225  	return true
   226  }
   227  
   228  func (r *RecordReader) load(beg, end int64) {
   229  	r.rec = NewRecord(r.tree, WithStart(beg), WithEnd(end), WithAllocator(r.mem))
   230  }
   231  
   232  var (
   233  	_ array.Record       = (*Record)(nil)
   234  	_ array.RecordReader = (*RecordReader)(nil)
   235  )
   236  
   237  func minI64(a, b int64) int64 {
   238  	if a < b {
   239  		return a
   240  	}
   241  	return b
   242  }