github.com/apache/arrow/go/v14@v14.0.1/parquet/pqarrow/path_builder.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package pqarrow 18 19 import ( 20 "fmt" 21 "sync/atomic" 22 "unsafe" 23 24 "github.com/apache/arrow/go/v14/arrow" 25 "github.com/apache/arrow/go/v14/arrow/array" 26 "github.com/apache/arrow/go/v14/arrow/memory" 27 "github.com/apache/arrow/go/v14/internal/bitutils" 28 "github.com/apache/arrow/go/v14/parquet/internal/encoding" 29 "golang.org/x/xerrors" 30 ) 31 32 type iterResult int8 33 34 const ( 35 iterDone iterResult = -1 36 iterNext iterResult = 1 37 ) 38 39 type elemRange struct { 40 start int64 41 end int64 42 } 43 44 func (e elemRange) empty() bool { return e.start == e.end } 45 func (e elemRange) size() int64 { return e.end - e.start } 46 47 type rangeSelector interface { 48 GetRange(idx int64) elemRange 49 } 50 51 type varRangeSelector struct { 52 offsets []int32 53 } 54 55 func (v varRangeSelector) GetRange(idx int64) elemRange { 56 return elemRange{int64(v.offsets[idx]), int64(v.offsets[idx+1])} 57 } 58 59 type fixedSizeRangeSelector struct { 60 listSize int32 61 } 62 63 func (f fixedSizeRangeSelector) GetRange(idx int64) elemRange { 64 start := idx * int64(f.listSize) 65 return elemRange{start, start + int64(f.listSize)} 66 } 67 68 type pathNode interface { 69 clone() pathNode 70 } 71 72 type allPresentTerminalNode struct { 73 defLevel int16 74 } 75 76 func (n *allPresentTerminalNode) clone() pathNode { 77 ret := *n 78 return &ret 79 } 80 81 func (n *allPresentTerminalNode) run(rng elemRange, ctx *pathWriteCtx) iterResult { 82 return ctx.AppendDefLevels(int(rng.size()), n.defLevel) 83 } 84 85 type allNullsTerminalNode struct { 86 defLevel int16 87 repLevel int16 88 } 89 90 func (n *allNullsTerminalNode) clone() pathNode { 91 ret := *n 92 return &ret 93 } 94 95 func (n *allNullsTerminalNode) run(rng elemRange, ctx *pathWriteCtx) iterResult { 96 fillRepLevels(int(rng.size()), n.repLevel, ctx) 97 return ctx.AppendDefLevels(int(rng.size()), n.defLevel) 98 } 99 100 type nullableTerminalNode struct { 101 bitmap []byte 102 elemOffset int64 103 defLevelIfPresent int16 104 defLevelIfNull int16 105 } 106 107 func (n *nullableTerminalNode) clone() pathNode { 108 ret := *n 109 return &ret 110 } 111 112 func (n *nullableTerminalNode) run(rng elemRange, ctx *pathWriteCtx) iterResult { 113 elems := rng.size() 114 ctx.ReserveDefLevels(int(elems)) 115 116 var ( 117 present = (*(*[2]byte)(unsafe.Pointer(&n.defLevelIfPresent)))[:] 118 null = (*(*[2]byte)(unsafe.Pointer(&n.defLevelIfNull)))[:] 119 ) 120 rdr := bitutils.NewBitRunReader(n.bitmap, n.elemOffset+rng.start, elems) 121 for { 122 run := rdr.NextRun() 123 if run.Len == 0 { 124 break 125 } 126 if run.Set { 127 ctx.defLevels.UnsafeWriteCopy(int(run.Len), present) 128 } else { 129 ctx.defLevels.UnsafeWriteCopy(int(run.Len), null) 130 } 131 } 132 return iterDone 133 } 134 135 type listNode struct { 136 selector rangeSelector 137 prevRepLevel int16 138 repLevel int16 139 defLevelIfEmpty int16 140 isLast bool 141 } 142 143 func (n *listNode) clone() pathNode { 144 ret := *n 145 return &ret 146 } 147 148 func (n *listNode) run(rng, childRng *elemRange, ctx *pathWriteCtx) iterResult { 149 if rng.empty() { 150 return iterDone 151 } 152 153 // find the first non-empty list (skipping a run of empties) 154 start := rng.start 155 for { 156 // retrieve the range of elements that this list contains 157 *childRng = n.selector.GetRange(rng.start) 158 if !childRng.empty() { 159 break 160 } 161 rng.start++ 162 if rng.empty() { 163 break 164 } 165 } 166 167 // loops post-condition: 168 // * rng is either empty (we're done processing this node) 169 // or start corresponds to a non-empty list 170 // * if rng is non-empty, childRng contains the bounds of the non-empty list 171 172 // handle any skipped over empty lists 173 emptyElems := rng.start - start 174 if emptyElems > 0 { 175 fillRepLevels(int(emptyElems), n.prevRepLevel, ctx) 176 ctx.AppendDefLevels(int(emptyElems), n.defLevelIfEmpty) 177 } 178 179 // start of a new list, note that for nested lists adding the element 180 // here effectively suppresses this code until we either encounter null 181 // elements or empty lists between here and the innermost list (since we 182 // make the rep levels repetition and definition levels unequal). 183 // similarly when we are backtracking up the stack, the repetition 184 // and definition levels are again equal so if we encounter an intermediate 185 // list, with more elements, this will detect it as a new list 186 if ctx.equalRepDeflevlsLen() && !rng.empty() { 187 ctx.AppendRepLevel(n.prevRepLevel) 188 } 189 190 if rng.empty() { 191 return iterDone 192 } 193 194 rng.start++ 195 if n.isLast { 196 // if this is the last repeated node, we can try 197 // to extend the child range as wide as possible, 198 // before continuing to the next node 199 return n.fillForLast(rng, childRng, ctx) 200 } 201 202 return iterNext 203 } 204 205 func (n *listNode) fillForLast(rng, childRng *elemRange, ctx *pathWriteCtx) iterResult { 206 fillRepLevels(int(childRng.size()), n.repLevel, ctx) 207 // once we've reached this point the following preconditions should hold: 208 // 1. there are no more repeated path nodes to deal with 209 // 2. all elements in |range| reperesent contiguous elements in the child 210 // array (null values would have shortened the range to ensure all 211 // remaining list elements are present, though they may be empty) 212 // 3. no element of range spans a parent list (intermediate list nodes 213 // only handle one list entry at a time) 214 // 215 // given these preconditions, it should be safe to fill runs on non-empty lists 216 // here and expand the range in the child node accordingly 217 for !rng.empty() { 218 sizeCheck := n.selector.GetRange(rng.start) 219 if sizeCheck.empty() { 220 // the empty range will need to be handled after we pass down the accumulated 221 // range because it affects def level placement and we need to get the children 222 // def levels entered first 223 break 224 } 225 226 // this is the start of a new list. we can be sure that it only applies to the 227 // previous list (and doesn't jump to the start of any list further up in nesting 228 // due to the contraints mentioned earlier) 229 ctx.AppendRepLevel(n.prevRepLevel) 230 ctx.AppendRepLevels(int(sizeCheck.size())-1, n.repLevel) 231 childRng.end = sizeCheck.end 232 rng.start++ 233 } 234 235 // do book-keeping to track the elements of the arrays that are actually visited 236 // beyond this point. this is necessary to identify "gaps" in values that should 237 // not be processed (written out to parquet) 238 ctx.recordPostListVisit(*childRng) 239 return iterNext 240 } 241 242 type nullableNode struct { 243 bitmap []byte 244 entryOffset int64 245 repLevelIfNull int16 246 defLevelIfNull int16 247 248 validBitsReader bitutils.BitRunReader 249 newRange bool 250 } 251 252 func (n *nullableNode) clone() pathNode { 253 var ret nullableNode = *n 254 return &ret 255 } 256 257 func (n *nullableNode) run(rng, childRng *elemRange, ctx *pathWriteCtx) iterResult { 258 if n.newRange { 259 n.validBitsReader = bitutils.NewBitRunReader(n.bitmap, n.entryOffset+rng.start, rng.size()) 260 } 261 childRng.start = rng.start 262 run := n.validBitsReader.NextRun() 263 if !run.Set { 264 rng.start += run.Len 265 fillRepLevels(int(run.Len), n.repLevelIfNull, ctx) 266 ctx.AppendDefLevels(int(run.Len), n.defLevelIfNull) 267 run = n.validBitsReader.NextRun() 268 } 269 270 if rng.empty() { 271 n.newRange = true 272 return iterDone 273 } 274 childRng.start = rng.start 275 childRng.end = childRng.start 276 childRng.end += run.Len 277 rng.start += childRng.size() 278 n.newRange = false 279 return iterNext 280 } 281 282 type pathInfo struct { 283 path []pathNode 284 primitiveArr arrow.Array 285 maxDefLevel int16 286 maxRepLevel int16 287 leafIsNullable bool 288 } 289 290 func (p pathInfo) clone() pathInfo { 291 ret := p 292 ret.path = make([]pathNode, len(p.path)) 293 for idx, n := range p.path { 294 ret.path[idx] = n.clone() 295 } 296 return ret 297 } 298 299 type pathBuilder struct { 300 info pathInfo 301 paths []pathInfo 302 nullableInParent bool 303 304 refCount int64 305 } 306 307 func (p *pathBuilder) Retain() { 308 atomic.AddInt64(&p.refCount, 1) 309 } 310 311 func (p *pathBuilder) Release() { 312 if atomic.AddInt64(&p.refCount, -1) == 0 { 313 for idx := range p.paths { 314 p.paths[idx].primitiveArr.Release() 315 p.paths[idx].primitiveArr = nil 316 } 317 } 318 } 319 320 // calling NullN on the arr directly will compute the nulls 321 // if we have "UnknownNullCount", calling NullN on the data 322 // object directly will just return the value the data has. 323 // thus we might bet array.UnknownNullCount as the result here. 324 func lazyNullCount(arr arrow.Array) int64 { 325 return int64(arr.Data().NullN()) 326 } 327 328 func lazyNoNulls(arr arrow.Array) bool { 329 nulls := lazyNullCount(arr) 330 return nulls == 0 || (nulls == array.UnknownNullCount && arr.NullBitmapBytes() == nil) 331 } 332 333 type fixupVisitor struct { 334 maxRepLevel int 335 repLevelIfNull int16 336 } 337 338 func (f *fixupVisitor) visit(n pathNode) { 339 switch n := n.(type) { 340 case *listNode: 341 if n.repLevel == int16(f.maxRepLevel) { 342 n.isLast = true 343 f.repLevelIfNull = -1 344 } else { 345 f.repLevelIfNull = n.repLevel 346 } 347 case *nullableTerminalNode: 348 case *allPresentTerminalNode: 349 case *allNullsTerminalNode: 350 if f.repLevelIfNull != -1 { 351 n.repLevel = f.repLevelIfNull 352 } 353 case *nullableNode: 354 if f.repLevelIfNull != -1 { 355 n.repLevelIfNull = f.repLevelIfNull 356 } 357 } 358 } 359 360 func fixup(info pathInfo) pathInfo { 361 // we only need to fixup the path if there were repeated elems 362 if info.maxRepLevel == 0 { 363 return info 364 } 365 366 visitor := fixupVisitor{maxRepLevel: int(info.maxRepLevel)} 367 if visitor.maxRepLevel > 0 { 368 visitor.repLevelIfNull = 0 369 } else { 370 visitor.repLevelIfNull = -1 371 } 372 373 for _, p := range info.path { 374 visitor.visit(p) 375 } 376 return info 377 } 378 379 func (p *pathBuilder) Visit(arr arrow.Array) error { 380 switch arr.DataType().ID() { 381 case arrow.LIST, arrow.MAP: 382 p.maybeAddNullable(arr) 383 // increment necessary due to empty lists 384 p.info.maxDefLevel++ 385 p.info.maxRepLevel++ 386 larr, ok := arr.(*array.List) 387 if !ok { 388 larr = arr.(*array.Map).List 389 } 390 391 p.info.path = append(p.info.path, &listNode{ 392 selector: varRangeSelector{larr.Offsets()[larr.Data().Offset():]}, 393 prevRepLevel: p.info.maxRepLevel - 1, 394 repLevel: p.info.maxRepLevel, 395 defLevelIfEmpty: p.info.maxDefLevel - 1, 396 }) 397 p.nullableInParent = ok 398 return p.Visit(larr.ListValues()) 399 case arrow.FIXED_SIZE_LIST: 400 p.maybeAddNullable(arr) 401 larr := arr.(*array.FixedSizeList) 402 listSize := larr.DataType().(*arrow.FixedSizeListType).Len() 403 // technically we could encode fixed sized lists with two level encodings 404 // but we always use 3 level encoding, so we increment def levels as well 405 p.info.maxDefLevel++ 406 p.info.maxRepLevel++ 407 p.info.path = append(p.info.path, &listNode{ 408 selector: fixedSizeRangeSelector{listSize}, 409 prevRepLevel: p.info.maxRepLevel - 1, 410 repLevel: p.info.maxRepLevel, 411 defLevelIfEmpty: p.info.maxDefLevel, 412 }) 413 // if arr.data.offset > 0, slice? 414 return p.Visit(larr.ListValues()) 415 case arrow.DICTIONARY: 416 // only currently handle dictionaryarray where the dictionary 417 // is a primitive type 418 dictArr := arr.(*array.Dictionary) 419 valType := dictArr.DataType().(*arrow.DictionaryType).ValueType 420 if _, ok := valType.(arrow.NestedType); ok { 421 return fmt.Errorf("%w: writing DictionaryArray with nested dictionary type not yet supported", 422 arrow.ErrNotImplemented) 423 } 424 if dictArr.Dictionary().NullN() > 0 { 425 return fmt.Errorf("%w: writing DictionaryArray with null encoded in dictionary not yet supported", 426 arrow.ErrNotImplemented) 427 } 428 p.addTerminalInfo(arr) 429 return nil 430 case arrow.STRUCT: 431 p.maybeAddNullable(arr) 432 infoBackup := p.info 433 dt := arr.DataType().(*arrow.StructType) 434 for idx, f := range dt.Fields() { 435 p.nullableInParent = f.Nullable 436 if err := p.Visit(arr.(*array.Struct).Field(idx)); err != nil { 437 return err 438 } 439 p.info = infoBackup 440 } 441 return nil 442 case arrow.EXTENSION: 443 return p.Visit(arr.(array.ExtensionArray).Storage()) 444 case arrow.SPARSE_UNION, arrow.DENSE_UNION: 445 return xerrors.New("union types aren't supported in parquet") 446 default: 447 p.addTerminalInfo(arr) 448 return nil 449 } 450 } 451 452 func (p *pathBuilder) addTerminalInfo(arr arrow.Array) { 453 p.info.leafIsNullable = p.nullableInParent 454 if p.nullableInParent { 455 p.info.maxDefLevel++ 456 } 457 458 // we don't use null_count because if the null_count isn't known 459 // and the array does in fact contain nulls, we will end up traversing 460 // the null bitmap twice. 461 if lazyNoNulls(arr) { 462 p.info.path = append(p.info.path, &allPresentTerminalNode{p.info.maxDefLevel}) 463 p.info.leafIsNullable = false 464 } else if lazyNullCount(arr) == int64(arr.Len()) { 465 p.info.path = append(p.info.path, &allNullsTerminalNode{p.info.maxDefLevel - 1, -1}) 466 } else { 467 p.info.path = append(p.info.path, &nullableTerminalNode{bitmap: arr.NullBitmapBytes(), elemOffset: int64(arr.Data().Offset()), defLevelIfPresent: p.info.maxDefLevel, defLevelIfNull: p.info.maxDefLevel - 1}) 468 } 469 arr.Retain() 470 p.info.primitiveArr = arr 471 p.paths = append(p.paths, fixup(p.info.clone())) 472 } 473 474 func (p *pathBuilder) maybeAddNullable(arr arrow.Array) { 475 if !p.nullableInParent { 476 return 477 } 478 479 p.info.maxDefLevel++ 480 if lazyNoNulls(arr) { 481 return 482 } 483 484 if lazyNullCount(arr) == int64(arr.Len()) { 485 p.info.path = append(p.info.path, &allNullsTerminalNode{p.info.maxDefLevel - 1, -1}) 486 return 487 } 488 489 p.info.path = append(p.info.path, &nullableNode{ 490 bitmap: arr.NullBitmapBytes(), entryOffset: int64(arr.Data().Offset()), 491 defLevelIfNull: p.info.maxDefLevel - 1, repLevelIfNull: -1, 492 newRange: true, 493 }) 494 } 495 496 type multipathLevelBuilder struct { 497 rootRange elemRange 498 data arrow.ArrayData 499 builder pathBuilder 500 501 refCount int64 502 } 503 504 func (m *multipathLevelBuilder) Retain() { 505 atomic.AddInt64(&m.refCount, 1) 506 } 507 508 func (m *multipathLevelBuilder) Release() { 509 if atomic.AddInt64(&m.refCount, -1) == 0 { 510 m.data.Release() 511 m.data = nil 512 m.builder.Release() 513 m.builder = pathBuilder{} 514 } 515 } 516 517 func newMultipathLevelBuilder(arr arrow.Array, fieldNullable bool) (*multipathLevelBuilder, error) { 518 ret := &multipathLevelBuilder{ 519 refCount: 1, 520 rootRange: elemRange{int64(0), int64(arr.Data().Len())}, 521 data: arr.Data(), 522 builder: pathBuilder{nullableInParent: fieldNullable, paths: make([]pathInfo, 0), refCount: 1}, 523 } 524 if err := ret.builder.Visit(arr); err != nil { 525 return nil, err 526 } 527 arr.Data().Retain() 528 return ret, nil 529 } 530 531 func (m *multipathLevelBuilder) leafCount() int { 532 return len(m.builder.paths) 533 } 534 535 func (m *multipathLevelBuilder) write(leafIdx int, ctx *arrowWriteContext) (multipathLevelResult, error) { 536 return writePath(m.rootRange, &m.builder.paths[leafIdx], ctx) 537 } 538 539 func (m *multipathLevelBuilder) writeAll(ctx *arrowWriteContext) (res []multipathLevelResult, err error) { 540 res = make([]multipathLevelResult, m.leafCount()) 541 for idx := range res { 542 res[idx], err = m.write(idx, ctx) 543 if err != nil { 544 break 545 } 546 } 547 return 548 } 549 550 type multipathLevelResult struct { 551 leafArr arrow.Array 552 defLevels []int16 553 defLevelsBuffer encoding.Buffer 554 repLevels []int16 555 repLevelsBuffer encoding.Buffer 556 // contains the element ranges of the required visiting on the descendants of the 557 // final list ancestor for any leaf node. 558 // 559 // the algorithm will attempt to consolidate the visited ranges into the smallest number 560 // 561 // this data is necessary to pass along because after producing the def-rep levels for each 562 // leaf array, it is impossible to determine which values have to be sent to parquet when a 563 // null list value in a nullable listarray is non-empty 564 // 565 // this allows for the parquet writing to determine which values ultimately need to be written 566 postListVisitedElems []elemRange 567 568 leafIsNullable bool 569 } 570 571 func (m *multipathLevelResult) Release() { 572 m.defLevels = nil 573 if m.defLevelsBuffer != nil { 574 m.defLevelsBuffer.Release() 575 } 576 if m.repLevels != nil { 577 m.repLevels = nil 578 m.repLevelsBuffer.Release() 579 } 580 } 581 582 type pathWriteCtx struct { 583 mem memory.Allocator 584 defLevels *int16BufferBuilder 585 repLevels *int16BufferBuilder 586 visitedElems []elemRange 587 } 588 589 func (p *pathWriteCtx) ReserveDefLevels(elems int) iterResult { 590 p.defLevels.Reserve(elems) 591 return iterDone 592 } 593 594 func (p *pathWriteCtx) AppendDefLevel(lvl int16) iterResult { 595 p.defLevels.Append(lvl) 596 return iterDone 597 } 598 599 func (p *pathWriteCtx) AppendDefLevels(count int, defLevel int16) iterResult { 600 p.defLevels.AppendCopies(count, defLevel) 601 return iterDone 602 } 603 604 func (p *pathWriteCtx) UnsafeAppendDefLevel(v int16) iterResult { 605 p.defLevels.UnsafeAppend(v) 606 return iterDone 607 } 608 609 func (p *pathWriteCtx) AppendRepLevel(lvl int16) iterResult { 610 p.repLevels.Append(lvl) 611 return iterDone 612 } 613 614 func (p *pathWriteCtx) AppendRepLevels(count int, lvl int16) iterResult { 615 p.repLevels.AppendCopies(count, lvl) 616 return iterDone 617 } 618 619 func (p *pathWriteCtx) equalRepDeflevlsLen() bool { return p.defLevels.Len() == p.repLevels.Len() } 620 621 func (p *pathWriteCtx) recordPostListVisit(rng elemRange) { 622 if len(p.visitedElems) > 0 && rng.start == p.visitedElems[len(p.visitedElems)-1].end { 623 p.visitedElems[len(p.visitedElems)-1].end = rng.end 624 return 625 } 626 p.visitedElems = append(p.visitedElems, rng) 627 } 628 629 type int16BufferBuilder struct { 630 *encoding.PooledBufferWriter 631 } 632 633 func (b *int16BufferBuilder) Values() []int16 { 634 return arrow.Int16Traits.CastFromBytes(b.PooledBufferWriter.Bytes()) 635 } 636 637 func (b *int16BufferBuilder) Value(i int) int16 { 638 return b.Values()[i] 639 } 640 641 func (b *int16BufferBuilder) Reserve(n int) { 642 b.PooledBufferWriter.Reserve(n * arrow.Int16SizeBytes) 643 } 644 645 func (b *int16BufferBuilder) Len() int { return b.PooledBufferWriter.Len() / arrow.Int16SizeBytes } 646 647 func (b *int16BufferBuilder) AppendCopies(count int, val int16) { 648 b.Reserve(count) 649 b.UnsafeWriteCopy(count, (*(*[2]byte)(unsafe.Pointer(&val)))[:]) 650 } 651 652 func (b *int16BufferBuilder) UnsafeAppend(v int16) { 653 b.PooledBufferWriter.UnsafeWrite((*(*[2]byte)(unsafe.Pointer(&v)))[:]) 654 } 655 656 func (b *int16BufferBuilder) Append(v int16) { 657 b.PooledBufferWriter.Reserve(arrow.Int16SizeBytes) 658 b.PooledBufferWriter.Write((*(*[2]byte)(unsafe.Pointer(&v)))[:]) 659 } 660 661 func fillRepLevels(count int, repLvl int16, ctx *pathWriteCtx) { 662 if repLvl == -1 { 663 return 664 } 665 666 fillCount := count 667 // this condition occurs (rep and def levels equals), in one of a few cases: 668 // 1. before any list is encounted 669 // 2. after rep-level has been filled in due to null/empty values above 670 // 3. after finishing a list 671 if !ctx.equalRepDeflevlsLen() { 672 fillCount-- 673 } 674 ctx.AppendRepLevels(fillCount, repLvl) 675 } 676 677 func writePath(rootRange elemRange, info *pathInfo, arrCtx *arrowWriteContext) (multipathLevelResult, error) { 678 stack := make([]elemRange, len(info.path)) 679 buildResult := multipathLevelResult{ 680 leafArr: info.primitiveArr, 681 leafIsNullable: info.leafIsNullable, 682 } 683 684 if info.maxDefLevel == 0 { 685 // this case only occurs when there are no nullable or repeated columns in the path from the root to the leaf 686 leafLen := buildResult.leafArr.Len() 687 buildResult.postListVisitedElems = []elemRange{{0, int64(leafLen)}} 688 return buildResult, nil 689 } 690 691 stack[0] = rootRange 692 if arrCtx.defLevelsBuffer != nil { 693 arrCtx.defLevelsBuffer.Release() 694 arrCtx.defLevelsBuffer = nil 695 } 696 if arrCtx.repLevelsBuffer != nil { 697 arrCtx.repLevelsBuffer.Release() 698 arrCtx.repLevelsBuffer = nil 699 } 700 701 ctx := pathWriteCtx{arrCtx.props.mem, 702 &int16BufferBuilder{encoding.NewPooledBufferWriter(0)}, 703 &int16BufferBuilder{encoding.NewPooledBufferWriter(0)}, 704 make([]elemRange, 0)} 705 706 ctx.defLevels.Reserve(int(rootRange.size())) 707 if info.maxRepLevel > 0 { 708 ctx.repLevels.Reserve(int(rootRange.size())) 709 } 710 711 stackBase := 0 712 stackPos := stackBase 713 for stackPos >= stackBase { 714 var res iterResult 715 switch n := info.path[stackPos].(type) { 716 case *nullableNode: 717 res = n.run(&stack[stackPos], &stack[stackPos+1], &ctx) 718 case *listNode: 719 res = n.run(&stack[stackPos], &stack[stackPos+1], &ctx) 720 case *nullableTerminalNode: 721 res = n.run(stack[stackPos], &ctx) 722 case *allPresentTerminalNode: 723 res = n.run(stack[stackPos], &ctx) 724 case *allNullsTerminalNode: 725 res = n.run(stack[stackPos], &ctx) 726 } 727 stackPos += int(res) 728 } 729 730 if ctx.repLevels.Len() > 0 { 731 // this case only occurs when there was a repeated element somewhere 732 buildResult.repLevels = ctx.repLevels.Values() 733 buildResult.repLevelsBuffer = ctx.repLevels.Finish() 734 735 buildResult.postListVisitedElems, ctx.visitedElems = ctx.visitedElems, buildResult.postListVisitedElems 736 // if it is possible when processing lists that all lists were empty. in this 737 // case, no elements would have been added to the postListVisitedElements. by 738 // adding an empty element, we avoid special casing later 739 if len(buildResult.postListVisitedElems) == 0 { 740 buildResult.postListVisitedElems = append(buildResult.postListVisitedElems, elemRange{0, 0}) 741 } 742 } else { 743 buildResult.postListVisitedElems = append(buildResult.postListVisitedElems, elemRange{0, int64(buildResult.leafArr.Len())}) 744 buildResult.repLevels = nil 745 } 746 747 buildResult.defLevels = ctx.defLevels.Values() 748 buildResult.defLevelsBuffer = ctx.defLevels.Finish() 749 return buildResult, nil 750 }