go-hep.org/x/hep@v0.38.1/groot/rarrow/reader.go (about) 1 // Copyright ©2019 The go-hep Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package rarrow // import "go-hep.org/x/hep/groot/rarrow" 6 7 import ( 8 "fmt" 9 "sync/atomic" 10 11 "git.sr.ht/~sbinet/go-arrow" 12 "git.sr.ht/~sbinet/go-arrow/array" 13 "git.sr.ht/~sbinet/go-arrow/memory" 14 "go-hep.org/x/hep/groot/rtree" 15 ) 16 17 // Record is an in-memory Arrow Record backed by a ROOT Tree. 18 type Record struct { 19 refs int64 20 21 mem memory.Allocator 22 tree rtree.Tree 23 24 schema *arrow.Schema 25 nrows int64 26 ncols int64 27 offset int64 // entries offset 28 29 cols []array.Interface 30 } 31 32 // NewRecord creates a new in-memory Arrow Record from the provided ROOT Tree. 33 func NewRecord(t rtree.Tree, opts ...Option) *Record { 34 cfg := newConfig(opts) 35 36 if cfg.end < 0 { 37 cfg.end = t.Entries() 38 } 39 40 if cfg.beg <= 0 { 41 cfg.beg = 0 42 } 43 44 if cfg.beg > cfg.end { 45 panic("rarrow: invalid entry slice") 46 } 47 48 rec := &Record{ 49 mem: cfg.mem, 50 tree: t, 51 refs: 1, 52 schema: SchemaFrom(t), 53 offset: cfg.beg, 54 nrows: cfg.end - cfg.beg, 55 ncols: int64(len(t.Branches())), 56 cols: make([]array.Interface, len(t.Branches())), 57 } 58 59 rec.load(cfg.beg, cfg.end) 60 61 return rec 62 } 63 64 func (rec *Record) load(beg, end int64) { 65 var ( 66 rvars = rtree.NewReadVars(rec.tree) 67 r, err = rtree.NewReader(rec.tree, rvars, rtree.WithRange(beg, end)) 68 ) 69 if err != nil { 70 panic(fmt.Errorf("could not create reader from read-vars %#v: %+v", rvars, err)) 71 } 72 defer r.Close() 73 74 blds := make([]array.Builder, rec.ncols) 75 for i, field := range rec.schema.Fields() { 76 blds[i] = builderFrom(rec.mem, field.Type, rec.nrows) 77 defer blds[i].Release() 78 } 79 80 err = r.Read(func(ctx rtree.RCtx) error { 81 for i, field := range rec.schema.Fields() { 82 appendData(blds[i], rvars[i], field.Type) 83 } 84 return nil 85 }) 86 if err != nil { 87 panic(fmt.Errorf("could not read tree: %+v", err)) 88 } 89 90 for i, bldr := range blds { 91 rec.cols[i] = bldr.NewArray() 92 } 93 } 94 95 // Retain increases the reference count by 1. 96 // Retain may be called simultaneously from multiple goroutines. 97 func (rec *Record) Retain() { 98 atomic.AddInt64(&rec.refs, 1) 99 } 100 101 // Release decreases the reference count by 1. 102 // When the reference count goes to zero, the memory is freed. 103 // Release may be called simultaneously from multiple goroutines. 104 func (rec *Record) Release() { 105 if atomic.LoadInt64(&rec.refs) <= 0 { 106 panic("groot/rarrow: too many releases") 107 } 108 109 if atomic.AddInt64(&rec.refs, -1) == 0 { 110 for i := range rec.cols { 111 rec.cols[i].Release() 112 } 113 rec.cols = nil 114 } 115 } 116 117 func (rec *Record) Schema() *arrow.Schema { return rec.schema } 118 func (rec *Record) NumRows() int64 { return rec.nrows } 119 func (rec *Record) NumCols() int64 { return rec.ncols } 120 func (rec *Record) Columns() []array.Interface { return rec.cols } 121 func (rec *Record) Column(i int) array.Interface { return rec.cols[i] } 122 func (rec *Record) ColumnName(i int) string { return rec.schema.Field(i).Name } 123 124 // NewSlice constructs a zero-copy slice of the record with the indicated 125 // indices i and j, corresponding to array[i:j]. 126 // The returned record must be Release()'d after use. 127 // 128 // NewSlice panics if the slice is outside the valid range of the record array. 129 // NewSlice panics if j < i. 130 func (rec *Record) NewSlice(i, j int64) array.Record { 131 return NewRecord(rec.tree, WithStart(rec.offset+i), WithEnd(rec.offset+j)) 132 } 133 134 // RecordReader is an ARROW RecordReader for ROOT Trees. 135 // 136 // RecordReader does not materialize more than one record at a time. 137 // The number of rows (or entries, in ROOT speak) that record loads can be configured 138 // at creation time with the WithChunk function. 139 // The default is one entry per record. 140 // One can pass -1 to WithChunk to create a record with all entries of the Tree or Chain. 141 type RecordReader struct { 142 refs int64 143 144 mem memory.Allocator 145 schema *arrow.Schema 146 tree rtree.Tree 147 148 beg int64 // first entry to read 149 end int64 // last entry to read 150 cur int64 // current entry 151 chunk int64 // number of entries to read for each record 152 153 rec *Record 154 } 155 156 // NewRecordReader creates a new ARROW RecordReader from the provided ROOT Tree. 157 func NewRecordReader(tree rtree.Tree, opts ...Option) *RecordReader { 158 cfg := newConfig(opts) 159 160 r := &RecordReader{ 161 refs: 1, 162 mem: cfg.mem, 163 schema: SchemaFrom(tree), 164 tree: tree, 165 beg: cfg.beg, 166 end: cfg.end, 167 chunk: cfg.chunks, 168 } 169 170 if r.beg <= 0 { 171 r.beg = 0 172 } 173 174 if r.end <= 0 { 175 r.end = tree.Entries() 176 } 177 178 switch { 179 case r.chunk == 0: 180 r.chunk = 1 181 case r.chunk < 0: 182 r.chunk = tree.Entries() 183 } 184 r.cur = r.beg 185 186 return r 187 } 188 189 // Retain increases the reference count by 1. 190 // Retain may be called simultaneously from multiple goroutines. 191 func (r *RecordReader) Retain() { 192 atomic.AddInt64(&r.refs, 1) 193 } 194 195 // Release decreases the reference count by 1. 196 // When the reference count goes to zero, the memory is freed. 197 // Release may be called simultaneously from multiple goroutines. 198 func (r *RecordReader) Release() { 199 if atomic.LoadInt64(&r.refs) <= 0 { 200 panic("groot/rarrow: too many releases") 201 } 202 203 if atomic.AddInt64(&r.refs, -1) == 0 { 204 if r.rec != nil { 205 r.rec.Release() 206 } 207 } 208 } 209 210 func (r *RecordReader) Schema() *arrow.Schema { return r.schema } 211 func (r *RecordReader) Record() array.Record { return r.rec } 212 213 func (r *RecordReader) Next() bool { 214 if r.cur >= r.end { 215 return false 216 } 217 218 if r.rec != nil { 219 r.rec.Release() 220 } 221 222 end := minI64(r.cur+r.chunk, r.end) 223 r.load(r.cur, end) 224 r.cur += r.chunk 225 return true 226 } 227 228 func (r *RecordReader) load(beg, end int64) { 229 r.rec = NewRecord(r.tree, WithStart(beg), WithEnd(end), WithAllocator(r.mem)) 230 } 231 232 var ( 233 _ array.Record = (*Record)(nil) 234 _ array.RecordReader = (*RecordReader)(nil) 235 ) 236 237 func minI64(a, b int64) int64 { 238 if a < b { 239 return a 240 } 241 return b 242 }