go-hep.org/x/hep@v0.38.1/groot/rtree/reader.go (about) 1 // Copyright ©2020 The go-hep Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package rtree 6 7 import ( 8 "fmt" 9 "io" 10 "reflect" 11 "strings" 12 13 "go-hep.org/x/hep/groot/rtree/rfunc" 14 ) 15 16 // Reader reads data from a Tree. 17 type Reader struct { 18 r reader 19 beg int64 20 end int64 21 nrab int // number of read-ahead baskets 22 23 tree Tree 24 rvars []ReadVar 25 26 evals []rfunc.Formula 27 dirty bool // whether we need to re-create scanner (if formula needed new branches) 28 } 29 30 // ReadOption configures how a ROOT tree should be traversed. 31 type ReadOption func(r *Reader) error 32 33 // WithRange specifies the half-open interval [beg, end) of entries 34 // a Tree reader will read through. 35 func WithRange(beg, end int64) ReadOption { 36 return func(r *Reader) error { 37 r.beg = beg 38 r.end = end 39 return nil 40 } 41 } 42 43 // WithPrefetchBaskets specifies the number of baskets to read-ahead, per branch. 44 // The default is 2. 45 // The number of prefetch baskets is cap'ed by the number of baskets, per branch. 46 func WithPrefetchBaskets(n int) ReadOption { 47 return func(r *Reader) error { 48 r.nrab = n 49 return nil 50 } 51 } 52 53 // NewReader creates a new Tree Reader from the provided ROOT Tree and 54 // the set of read-variables into which data will be read. 55 func NewReader(t Tree, rvars []ReadVar, opts ...ReadOption) (*Reader, error) { 56 r := Reader{tree: t} 57 58 err := r.setup(t, opts) 59 if err != nil { 60 return nil, err 61 } 62 63 rvars, err = sanitizeRVars(t, rvars) 64 if err != nil { 65 return nil, fmt.Errorf("rtree: could not create reader: %w", err) 66 } 67 68 r.r = newReader(t, rvars, r.nrab, r.beg, r.end) 69 r.rvars = r.r.rvars() 70 71 return &r, nil 72 } 73 74 func (r *Reader) setup(t Tree, opts []ReadOption) error { 75 r.beg = 0 76 r.end = -1 77 r.nrab = 2 78 79 for i, opt := range opts { 80 err := opt(r) 81 if err != nil { 82 return fmt.Errorf( 83 "rtree: could not set reader option %d: %w", 84 i, err, 85 ) 86 } 87 } 88 89 if r.end < 0 { 90 r.end = t.Entries() 91 } 92 93 if r.beg < 0 { 94 return fmt.Errorf("rtree: invalid event reader range [%d, %d) (start=%d < 0)", 95 r.beg, r.end, r.beg, 96 ) 97 } 98 99 if r.beg > r.end { 100 return fmt.Errorf("rtree: invalid event reader range [%d, %d) (start=%d > end=%d)", 101 r.beg, r.end, r.beg, r.end, 102 ) 103 } 104 105 if r.beg > t.Entries() { 106 return fmt.Errorf("rtree: invalid event reader range [%d, %d) (start=%d > tree-entries=%d)", 107 r.beg, r.end, r.beg, t.Entries(), 108 ) 109 } 110 111 if r.end > t.Entries() { 112 return fmt.Errorf("rtree: invalid event reader range [%d, %d) (end=%d > tree-entries=%d)", 113 r.beg, r.end, r.end, t.Entries(), 114 ) 115 } 116 117 return nil 118 } 119 120 // Close closes the Reader. 121 func (r *Reader) Close() error { 122 if r.r == nil { 123 return nil 124 } 125 err := r.r.Close() 126 r.r = nil 127 r.evals = nil 128 return err 129 } 130 131 // RCtx provides an entry-wise local context to the tree Reader. 132 type RCtx struct { 133 Entry int64 // Current tree entry. 134 } 135 136 // Read will read data from the underlying tree over the whole specified range. 137 // Read calls the provided user function f for each entry successfully read. 138 func (r *Reader) Read(f func(ctx RCtx) error) error { 139 if r.dirty { 140 r.dirty = false 141 _ = r.r.Close() 142 r.r = newReader(r.tree, r.rvars, r.nrab, r.beg, r.end) 143 } 144 r.r.reset() 145 146 const eoff = 0 // entry offset 147 return r.r.run(eoff, r.beg, r.end, f) 148 } 149 150 // Reset resets the current Reader with the provided options. 151 func (r *Reader) Reset(opts ...ReadOption) error { 152 if r.r != nil { 153 err := r.r.Close() 154 if err != nil { 155 return fmt.Errorf("rtree: could not reset internal reader: %w", err) 156 } 157 } 158 159 err := r.setup(r.tree, opts) 160 if err != nil { 161 return fmt.Errorf("rtree: could not reset reader options: %w", err) 162 } 163 164 r.r = newReader(r.tree, r.rvars, r.nrab, r.beg, r.end) 165 r.rvars = r.r.rvars() 166 167 return nil 168 } 169 170 // FormulaFunc creates a new formula based on the provided function and 171 // the list of branches as inputs. 172 func (r *Reader) FormulaFunc(branches []string, fct any) (rfunc.Formula, error) { 173 f, err := rfunc.NewGenericFormula(branches, fct) 174 if err != nil { 175 return nil, fmt.Errorf("rtree: could not create formula: %w", err) 176 } 177 return r.Formula(f) 178 } 179 180 // Formula creates a new formula based on the provided user provided formula. 181 // Formula binds the provided function with the requested list of leaves. 182 func (r *Reader) Formula(f rfunc.Formula) (rfunc.Formula, error) { 183 n := len(r.rvars) 184 f, err := newFormula(r, f) 185 if err != nil { 186 return nil, fmt.Errorf("rtree: could not create formula: %w", err) 187 } 188 189 r.evals = append(r.evals, f) 190 191 if n != len(r.rvars) { 192 // formula needed to auto-load new branches. 193 // mark reader as dirty to re-create its internal scanner 194 // before the event-loop. 195 r.dirty = true 196 } 197 return f, nil 198 } 199 200 func sanitizeRVars(t Tree, rvars []ReadVar) ([]ReadVar, error) { 201 rvs := make([]ReadVar, 0, len(rvars)) 202 for i := range rvars { 203 rvar := &rvars[i] 204 if rvar.Leaf == "" { 205 rvar.Leaf = rvar.Name 206 } 207 if rvar.count != "" { 208 rvs = append(rvs, *rvar) 209 continue 210 } 211 br := t.Branch(rvar.Name) 212 if br == nil { 213 return nil, fmt.Errorf("rtree: tree %q has no branch named %q", t.Name(), rvar.Name) 214 } 215 leaf := br.Leaf(rvar.Leaf) 216 switch { 217 case leaf == nil: 218 kind := reflect.ValueOf(rvar.Value).Elem().Kind() 219 switch { 220 case kind == reflect.Struct && strings.Contains(br.Title(), ":"): 221 // maybe a branch with a contiguous buffer of bytes. 222 subs := ReadVarsFromStruct(rvar.Value) 223 for i := range subs { 224 sub := &subs[i] 225 sub.Name = rvar.Name 226 sub.leaf = br.Leaf(sub.Leaf) 227 } 228 rvs = append(rvs, subs...) 229 default: 230 rvs = append(rvs, *rvar) 231 } 232 default: 233 lfc := leaf.LeafCount() 234 if lfc != nil { 235 rvar.count = lfc.Name() 236 } 237 rvs = append(rvs, *rvar) 238 } 239 } 240 return rvs, nil 241 } 242 243 type reader interface { 244 Close() error 245 rvars() []ReadVar 246 247 run(off, beg, end int64, f func(RCtx) error) error 248 start() error 249 stop() 250 reset() 251 } 252 253 // rtree reads a tree. 254 type rtree struct { 255 tree *ttree 256 rvs []ReadVar 257 brs []rbranch 258 lvs []rleaf 259 } 260 261 var ( 262 _ reader = (*rtree)(nil) 263 ) 264 265 func (r *rtree) rvars() []ReadVar { return r.rvs } 266 267 func newReader(t Tree, rvars []ReadVar, n int, beg, end int64) reader { 268 rvars, err := sanitizeRVars(t, rvars) 269 if err != nil { 270 panic(err) 271 } 272 273 switch t := t.(type) { 274 case *ttree: 275 return newRTree(t, rvars, n, beg, end) 276 case *tntuple: 277 return newRTree(&t.ttree, rvars, n, beg, end) 278 case *tntupleD: 279 return newRTree(&t.ttree, rvars, n, beg, end) 280 case *chain: 281 return newRChain(t, rvars, n, beg, end) 282 case *join: 283 return newRJoin(t, rvars, n, beg, end) 284 default: 285 panic(fmt.Errorf("rtree: unknown Tree implementation %T", t)) 286 } 287 } 288 289 func newRTree(t *ttree, rvars []ReadVar, n int, beg, end int64) *rtree { 290 r := &rtree{ 291 tree: t, 292 rvs: rvars, 293 } 294 usr := make(map[string]struct{}, len(rvars)) 295 for _, rvar := range rvars { 296 usr[rvar.Name+"."+rvar.Leaf] = struct{}{} 297 } 298 299 var rcounts []ReadVar 300 for _, rvar := range rvars { 301 if rvar.count == "" { 302 continue 303 } 304 leaf := t.Branch(rvar.Name).Leaf(rvar.Leaf).LeafCount() 305 name := leaf.Branch().Name() + "." + leaf.Name() 306 if _, ok := usr[name]; !ok { 307 var ptr any 308 switch leaf := leaf.(type) { 309 case *LeafB: 310 ptr = new(int8) 311 case *LeafS: 312 ptr = new(int16) 313 case *LeafI: 314 ptr = new(int32) 315 case *LeafL: 316 ptr = new(int64) 317 default: 318 panic(fmt.Errorf("unknown Leaf count type %T", leaf)) 319 } 320 rcounts = append(rcounts, ReadVar{ 321 Name: leaf.Branch().Name(), 322 Leaf: leaf.Name(), 323 Value: ptr, 324 leaf: leaf, 325 }) 326 } 327 } 328 r.rvs = append(rcounts, r.rvs...) 329 r.rvs = bindRVarsTo(t, r.rvs) 330 331 r.lvs = make([]rleaf, 0, len(r.rvs)) 332 for i := range r.rvs { 333 rv := r.rvs[i] 334 r.lvs = append(r.lvs, rleafFrom(rv.leaf, rv, r)) 335 } 336 337 // regroup leaves by holding branch 338 set := make(map[string]int) 339 brs := make([][]rleaf, 0, len(r.lvs)) 340 for _, leaf := range r.lvs { 341 br := leaf.Leaf().Branch().Name() 342 if _, ok := set[br]; !ok { 343 set[br] = len(brs) 344 brs = append(brs, []rleaf{}) 345 } 346 id := set[br] 347 brs[id] = append(brs[id], leaf) 348 } 349 350 r.brs = make([]rbranch, len(brs)) 351 for i, leaves := range brs { 352 branch := leaves[0].Leaf().Branch() 353 r.brs[i] = newRBranch(branch, n, beg, end, leaves, r) 354 } 355 356 return r 357 } 358 func (r *rtree) Close() error { 359 for i := range r.brs { 360 rb := &r.brs[i] 361 rb.rb.close() 362 } 363 return nil 364 } 365 366 func (r *rtree) start() error { 367 for i := range r.brs { 368 rb := &r.brs[i] 369 err := rb.start() 370 if err != nil { 371 if err == io.EOF { 372 // empty range. 373 return nil 374 } 375 return err 376 } 377 } 378 return nil 379 } 380 381 func (r *rtree) stop() { 382 for i := range r.brs { 383 rb := &r.brs[i] 384 _ = rb.stop() 385 } 386 } 387 388 func (r *rtree) reset() { 389 for i := range r.brs { 390 rb := &r.brs[i] 391 rb.reset() 392 } 393 } 394 395 func (r *rtree) rcountFunc(name string) func() int { 396 for _, leaf := range r.lvs { 397 n := leaf.Leaf().Name() 398 if n != name { 399 continue 400 } 401 switch leaf := leaf.(type) { 402 case *rleafValI8: 403 return leaf.ivalue 404 case *rleafValI16: 405 return leaf.ivalue 406 case *rleafValI32: 407 return leaf.ivalue 408 case *rleafValI64: 409 return leaf.ivalue 410 case *rleafValU8: 411 return leaf.ivalue 412 case *rleafValU16: 413 return leaf.ivalue 414 case *rleafValU32: 415 return leaf.ivalue 416 case *rleafValU64: 417 return leaf.ivalue 418 case *rleafElem: 419 leaf.bindCount() 420 return leaf.ivalue 421 422 default: 423 panic(fmt.Errorf("rleaf %T not implemented", leaf)) 424 } 425 } 426 panic(fmt.Errorf("impossible: no leaf for %s", name)) 427 } 428 429 func (r *rtree) rcountLeaf(name string) leafCount { 430 for _, leaf := range r.lvs { 431 n := leaf.Leaf().Name() 432 if n != name { 433 continue 434 } 435 return &rleafCount{ 436 Leaf: leaf.Leaf(), 437 n: r.rcountFunc(name), 438 leaf: leaf, 439 } 440 } 441 panic(fmt.Errorf("impossible: no leaf for %s", name)) 442 } 443 444 func (r *rtree) run(off, beg, end int64, f func(RCtx) error) error { 445 var ( 446 err error 447 rctx RCtx 448 ) 449 450 defer r.Close() 451 452 err = r.start() 453 if err != nil { 454 return err 455 } 456 defer r.stop() 457 458 for i := beg; i < end; i++ { 459 err = r.read(i) 460 if err != nil { 461 return fmt.Errorf("rtree: could not read entry %d: %w", i, err) 462 } 463 rctx.Entry = i + off 464 err = f(rctx) 465 if err != nil { 466 return fmt.Errorf("rtree: could not process entry %d: %w", i, err) 467 } 468 } 469 470 return err 471 } 472 473 func (r *rtree) read(ievt int64) error { 474 for i := range r.brs { 475 rb := &r.brs[i] 476 err := rb.read(ievt) 477 if err != nil { 478 return err 479 } 480 } 481 return nil 482 } 483 484 var ( 485 _ rleafCtx = (*rtree)(nil) 486 )