github.com/apache/arrow/go/v7@v7.0.1/parquet/pqarrow/path_builder.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package pqarrow
    18  
    19  import (
    20  	"sync/atomic"
    21  	"unsafe"
    22  
    23  	"github.com/apache/arrow/go/v7/arrow"
    24  	"github.com/apache/arrow/go/v7/arrow/array"
    25  	"github.com/apache/arrow/go/v7/arrow/memory"
    26  	"github.com/apache/arrow/go/v7/parquet/internal/encoding"
    27  	"github.com/apache/arrow/go/v7/parquet/internal/utils"
    28  	"golang.org/x/xerrors"
    29  )
    30  
    31  type iterResult int8
    32  
    33  const (
    34  	iterDone iterResult = -1
    35  	iterNext iterResult = 1
    36  )
    37  
    38  type elemRange struct {
    39  	start int64
    40  	end   int64
    41  }
    42  
    43  func (e elemRange) empty() bool { return e.start == e.end }
    44  func (e elemRange) size() int64 { return e.end - e.start }
    45  
    46  type rangeSelector interface {
    47  	GetRange(idx int64) elemRange
    48  }
    49  
    50  type varRangeSelector struct {
    51  	offsets []int32
    52  }
    53  
    54  func (v varRangeSelector) GetRange(idx int64) elemRange {
    55  	return elemRange{int64(v.offsets[idx]), int64(v.offsets[idx+1])}
    56  }
    57  
    58  type fixedSizeRangeSelector struct {
    59  	listSize int32
    60  }
    61  
    62  func (f fixedSizeRangeSelector) GetRange(idx int64) elemRange {
    63  	start := idx * int64(f.listSize)
    64  	return elemRange{start, start + int64(f.listSize)}
    65  }
    66  
    67  type pathNode interface {
    68  	clone() pathNode
    69  }
    70  
    71  type allPresentTerminalNode struct {
    72  	defLevel int16
    73  }
    74  
    75  func (n *allPresentTerminalNode) clone() pathNode {
    76  	ret := *n
    77  	return &ret
    78  }
    79  
    80  func (n *allPresentTerminalNode) run(rng elemRange, ctx *pathWriteCtx) iterResult {
    81  	return ctx.AppendDefLevels(int(rng.size()), n.defLevel)
    82  }
    83  
    84  type allNullsTerminalNode struct {
    85  	defLevel int16
    86  	repLevel int16
    87  }
    88  
    89  func (n *allNullsTerminalNode) clone() pathNode {
    90  	ret := *n
    91  	return &ret
    92  }
    93  
    94  func (n *allNullsTerminalNode) run(rng elemRange, ctx *pathWriteCtx) iterResult {
    95  	fillRepLevels(int(rng.size()), n.repLevel, ctx)
    96  	return ctx.AppendDefLevels(int(rng.size()), n.defLevel)
    97  }
    98  
    99  type nullableTerminalNode struct {
   100  	bitmap            []byte
   101  	elemOffset        int64
   102  	defLevelIfPresent int16
   103  	defLevelIfNull    int16
   104  }
   105  
   106  func (n *nullableTerminalNode) clone() pathNode {
   107  	ret := *n
   108  	return &ret
   109  }
   110  
   111  func (n *nullableTerminalNode) run(rng elemRange, ctx *pathWriteCtx) iterResult {
   112  	elems := rng.size()
   113  	ctx.ReserveDefLevels(int(elems))
   114  
   115  	var (
   116  		present = (*(*[2]byte)(unsafe.Pointer(&n.defLevelIfPresent)))[:]
   117  		null    = (*(*[2]byte)(unsafe.Pointer(&n.defLevelIfNull)))[:]
   118  	)
   119  	rdr := utils.NewBitRunReader(n.bitmap, n.elemOffset+rng.start, elems)
   120  	for {
   121  		run := rdr.NextRun()
   122  		if run.Len == 0 {
   123  			break
   124  		}
   125  		if run.Set {
   126  			ctx.defLevels.UnsafeWriteCopy(int(run.Len), present)
   127  		} else {
   128  			ctx.defLevels.UnsafeWriteCopy(int(run.Len), null)
   129  		}
   130  	}
   131  	return iterDone
   132  }
   133  
   134  type listNode struct {
   135  	selector        rangeSelector
   136  	prevRepLevel    int16
   137  	repLevel        int16
   138  	defLevelIfEmpty int16
   139  	isLast          bool
   140  }
   141  
   142  func (n *listNode) clone() pathNode {
   143  	ret := *n
   144  	return &ret
   145  }
   146  
   147  func (n *listNode) run(rng, childRng *elemRange, ctx *pathWriteCtx) iterResult {
   148  	if rng.empty() {
   149  		return iterDone
   150  	}
   151  
   152  	// find the first non-empty list (skipping a run of empties)
   153  	start := rng.start
   154  	for {
   155  		// retrieve the range of elements that this list contains
   156  		*childRng = n.selector.GetRange(rng.start)
   157  		if !childRng.empty() {
   158  			break
   159  		}
   160  		rng.start++
   161  		if rng.empty() {
   162  			break
   163  		}
   164  	}
   165  
   166  	// loops post-condition:
   167  	// * rng is either empty (we're done processing this node)
   168  	//     or start corresponds to a non-empty list
   169  	// * if rng is non-empty, childRng contains the bounds of the non-empty list
   170  
   171  	// handle any skipped over empty lists
   172  	emptyElems := rng.start - start
   173  	if emptyElems > 0 {
   174  		fillRepLevels(int(emptyElems), n.prevRepLevel, ctx)
   175  		ctx.AppendDefLevels(int(emptyElems), n.defLevelIfEmpty)
   176  	}
   177  
   178  	// start of a new list, note that for nested lists adding the element
   179  	// here effectively suppresses this code until we either encounter null
   180  	// elements or empty lists between here and the innermost list (since we
   181  	// make the rep levels repetition and definition levels unequal).
   182  	// similarly when we are backtracking up the stack, the repetition
   183  	// and definition levels are again equal so if we encounter an intermediate
   184  	// list, with more elements, this will detect it as a new list
   185  	if ctx.equalRepDeflevlsLen() && !rng.empty() {
   186  		ctx.AppendRepLevel(n.prevRepLevel)
   187  	}
   188  
   189  	if rng.empty() {
   190  		return iterDone
   191  	}
   192  
   193  	rng.start++
   194  	if n.isLast {
   195  		// if this is the last repeated node, we can try
   196  		// to extend the child range as wide as possible,
   197  		// before continuing to the next node
   198  		return n.fillForLast(rng, childRng, ctx)
   199  	}
   200  
   201  	return iterNext
   202  }
   203  
   204  func (n *listNode) fillForLast(rng, childRng *elemRange, ctx *pathWriteCtx) iterResult {
   205  	fillRepLevels(int(childRng.size()), n.repLevel, ctx)
   206  	// once we've reached this point the following preconditions should hold:
   207  	// 1. there are no more repeated path nodes to deal with
   208  	// 2. all elements in |range| reperesent contiguous elements in the child
   209  	//    array (null values would have shortened the range to ensure all
   210  	//    remaining list elements are present, though they may be empty)
   211  	// 3. no element of range spans a parent list (intermediate list nodes
   212  	//    only handle one list entry at a time)
   213  	//
   214  	// given these preconditions, it should be safe to fill runs on non-empty lists
   215  	// here and expand the range in the child node accordingly
   216  	for !rng.empty() {
   217  		sizeCheck := n.selector.GetRange(rng.start)
   218  		if sizeCheck.empty() {
   219  			// the empty range will need to be handled after we pass down the accumulated
   220  			// range because it affects def level placement and we need to get the children
   221  			// def levels entered first
   222  			break
   223  		}
   224  
   225  		// this is the start of a new list. we can be sure that it only applies to the
   226  		// previous list (and doesn't jump to the start of any list further up in nesting
   227  		// due to the contraints mentioned earlier)
   228  		ctx.AppendRepLevel(n.prevRepLevel)
   229  		ctx.AppendRepLevels(int(sizeCheck.size())-1, n.repLevel)
   230  		childRng.end = sizeCheck.end
   231  		rng.start++
   232  	}
   233  
   234  	// do book-keeping to track the elements of the arrays that are actually visited
   235  	// beyond this point. this is necessary to identify "gaps" in values that should
   236  	// not be processed (written out to parquet)
   237  	ctx.recordPostListVisit(*childRng)
   238  	return iterNext
   239  }
   240  
   241  type nullableNode struct {
   242  	bitmap         []byte
   243  	entryOffset    int64
   244  	repLevelIfNull int16
   245  	defLevelIfNull int16
   246  
   247  	validBitsReader utils.BitRunReader
   248  	newRange        bool
   249  }
   250  
   251  func (n *nullableNode) clone() pathNode {
   252  	var ret nullableNode = *n
   253  	return &ret
   254  }
   255  
   256  func (n *nullableNode) run(rng, childRng *elemRange, ctx *pathWriteCtx) iterResult {
   257  	if n.newRange {
   258  		n.validBitsReader = utils.NewBitRunReader(n.bitmap, n.entryOffset+rng.start, rng.size())
   259  	}
   260  	childRng.start = rng.start
   261  	run := n.validBitsReader.NextRun()
   262  	if !run.Set {
   263  		rng.start += run.Len
   264  		fillRepLevels(int(run.Len), n.repLevelIfNull, ctx)
   265  		ctx.AppendDefLevels(int(run.Len), n.defLevelIfNull)
   266  		run = n.validBitsReader.NextRun()
   267  	}
   268  
   269  	if rng.empty() {
   270  		n.newRange = true
   271  		return iterDone
   272  	}
   273  	childRng.start = rng.start
   274  	childRng.end = childRng.start
   275  	childRng.end += run.Len
   276  	rng.start += childRng.size()
   277  	n.newRange = false
   278  	return iterNext
   279  }
   280  
   281  type pathInfo struct {
   282  	path           []pathNode
   283  	primitiveArr   arrow.Array
   284  	maxDefLevel    int16
   285  	maxRepLevel    int16
   286  	leafIsNullable bool
   287  }
   288  
   289  func (p pathInfo) clone() pathInfo {
   290  	ret := p
   291  	ret.path = make([]pathNode, len(p.path))
   292  	for idx, n := range p.path {
   293  		ret.path[idx] = n.clone()
   294  	}
   295  	return ret
   296  }
   297  
   298  type pathBuilder struct {
   299  	info             pathInfo
   300  	paths            []pathInfo
   301  	nullableInParent bool
   302  
   303  	refCount int64
   304  }
   305  
   306  func (p *pathBuilder) Retain() {
   307  	atomic.AddInt64(&p.refCount, 1)
   308  }
   309  
   310  func (p *pathBuilder) Release() {
   311  	if atomic.AddInt64(&p.refCount, -1) == 0 {
   312  		for idx := range p.paths {
   313  			p.paths[idx].primitiveArr.Release()
   314  			p.paths[idx].primitiveArr = nil
   315  		}
   316  	}
   317  }
   318  
   319  // calling NullN on the arr directly will compute the nulls
   320  // if we have "UnknownNullCount", calling NullN on the data
   321  // object directly will just return the value the data has.
   322  // thus we might bet array.UnknownNullCount as the result here.
   323  func lazyNullCount(arr arrow.Array) int64 {
   324  	return int64(arr.Data().NullN())
   325  }
   326  
   327  func lazyNoNulls(arr arrow.Array) bool {
   328  	nulls := lazyNullCount(arr)
   329  	return nulls == 0 || (nulls == array.UnknownNullCount && arr.NullBitmapBytes() == nil)
   330  }
   331  
   332  type fixupVisitor struct {
   333  	maxRepLevel    int
   334  	repLevelIfNull int16
   335  }
   336  
   337  func (f *fixupVisitor) visit(n pathNode) {
   338  	switch n := n.(type) {
   339  	case *listNode:
   340  		if n.repLevel == int16(f.maxRepLevel) {
   341  			n.isLast = true
   342  			f.repLevelIfNull = -1
   343  		} else {
   344  			f.repLevelIfNull = n.repLevel
   345  		}
   346  	case *nullableTerminalNode:
   347  	case *allPresentTerminalNode:
   348  	case *allNullsTerminalNode:
   349  		if f.repLevelIfNull != -1 {
   350  			n.repLevel = f.repLevelIfNull
   351  		}
   352  	case *nullableNode:
   353  		if f.repLevelIfNull != -1 {
   354  			n.repLevelIfNull = f.repLevelIfNull
   355  		}
   356  	}
   357  }
   358  
   359  func fixup(info pathInfo) pathInfo {
   360  	// we only need to fixup the path if there were repeated elems
   361  	if info.maxRepLevel == 0 {
   362  		return info
   363  	}
   364  
   365  	visitor := fixupVisitor{maxRepLevel: int(info.maxRepLevel)}
   366  	if visitor.maxRepLevel > 0 {
   367  		visitor.repLevelIfNull = 0
   368  	} else {
   369  		visitor.repLevelIfNull = -1
   370  	}
   371  
   372  	for _, p := range info.path {
   373  		visitor.visit(p)
   374  	}
   375  	return info
   376  }
   377  
   378  func (p *pathBuilder) Visit(arr arrow.Array) error {
   379  	switch arr.DataType().ID() {
   380  	case arrow.LIST, arrow.MAP:
   381  		p.maybeAddNullable(arr)
   382  		// increment necessary due to empty lists
   383  		p.info.maxDefLevel++
   384  		p.info.maxRepLevel++
   385  		larr, ok := arr.(*array.List)
   386  		if !ok {
   387  			larr = arr.(*array.Map).List
   388  		}
   389  
   390  		p.info.path = append(p.info.path, &listNode{
   391  			selector:        varRangeSelector{larr.Offsets()[larr.Data().Offset():]},
   392  			prevRepLevel:    p.info.maxRepLevel - 1,
   393  			repLevel:        p.info.maxRepLevel,
   394  			defLevelIfEmpty: p.info.maxDefLevel - 1,
   395  		})
   396  		p.nullableInParent = ok
   397  		return p.Visit(larr.ListValues())
   398  	case arrow.FIXED_SIZE_LIST:
   399  		p.maybeAddNullable(arr)
   400  		larr := arr.(*array.FixedSizeList)
   401  		listSize := larr.DataType().(*arrow.FixedSizeListType).Len()
   402  		// technically we could encoded fixed sized lists with two level encodings
   403  		// but we always use 3 level encoding, so we increment def levels as well
   404  		p.info.maxDefLevel++
   405  		p.info.maxRepLevel++
   406  		p.info.path = append(p.info.path, &listNode{
   407  			selector:        fixedSizeRangeSelector{listSize},
   408  			prevRepLevel:    p.info.maxRepLevel - 1,
   409  			repLevel:        p.info.maxRepLevel,
   410  			defLevelIfEmpty: p.info.maxDefLevel,
   411  		})
   412  		// if arr.data.offset > 0, slice?
   413  		return p.Visit(larr.ListValues())
   414  	case arrow.DICTIONARY:
   415  		return xerrors.New("dictionary types not implemented yet")
   416  	case arrow.STRUCT:
   417  		p.maybeAddNullable(arr)
   418  		infoBackup := p.info
   419  		dt := arr.DataType().(*arrow.StructType)
   420  		for idx, f := range dt.Fields() {
   421  			p.nullableInParent = f.Nullable
   422  			if err := p.Visit(arr.(*array.Struct).Field(idx)); err != nil {
   423  				return err
   424  			}
   425  			p.info = infoBackup
   426  		}
   427  		return nil
   428  	case arrow.EXTENSION:
   429  		return xerrors.New("extension types not implemented yet")
   430  	case arrow.SPARSE_UNION, arrow.DENSE_UNION:
   431  		return xerrors.New("union types aren't supported in parquet")
   432  	default:
   433  		p.addTerminalInfo(arr)
   434  		return nil
   435  	}
   436  }
   437  
   438  func (p *pathBuilder) addTerminalInfo(arr arrow.Array) {
   439  	p.info.leafIsNullable = p.nullableInParent
   440  	if p.nullableInParent {
   441  		p.info.maxDefLevel++
   442  	}
   443  
   444  	// we don't use null_count because if the null_count isn't known
   445  	// and the array does in fact contain nulls, we will end up traversing
   446  	// the null bitmap twice.
   447  	if lazyNoNulls(arr) {
   448  		p.info.path = append(p.info.path, &allPresentTerminalNode{p.info.maxDefLevel})
   449  		p.info.leafIsNullable = false
   450  	} else if lazyNullCount(arr) == int64(arr.Len()) {
   451  		p.info.path = append(p.info.path, &allNullsTerminalNode{p.info.maxDefLevel - 1, -1})
   452  	} else {
   453  		p.info.path = append(p.info.path, &nullableTerminalNode{bitmap: arr.NullBitmapBytes(), elemOffset: int64(arr.Data().Offset()), defLevelIfPresent: p.info.maxDefLevel, defLevelIfNull: p.info.maxDefLevel - 1})
   454  	}
   455  	arr.Retain()
   456  	p.info.primitiveArr = arr
   457  	p.paths = append(p.paths, fixup(p.info.clone()))
   458  }
   459  
   460  func (p *pathBuilder) maybeAddNullable(arr arrow.Array) {
   461  	if !p.nullableInParent {
   462  		return
   463  	}
   464  
   465  	p.info.maxDefLevel++
   466  	if lazyNoNulls(arr) {
   467  		return
   468  	}
   469  
   470  	if lazyNullCount(arr) == int64(arr.Len()) {
   471  		p.info.path = append(p.info.path, &allNullsTerminalNode{p.info.maxDefLevel - 1, -1})
   472  		return
   473  	}
   474  
   475  	p.info.path = append(p.info.path, &nullableNode{
   476  		bitmap: arr.NullBitmapBytes(), entryOffset: int64(arr.Data().Offset()),
   477  		defLevelIfNull: p.info.maxDefLevel - 1, repLevelIfNull: -1,
   478  		newRange: true,
   479  	})
   480  }
   481  
   482  type multipathLevelBuilder struct {
   483  	rootRange elemRange
   484  	data      arrow.ArrayData
   485  	builder   pathBuilder
   486  
   487  	refCount int64
   488  }
   489  
   490  func (m *multipathLevelBuilder) Retain() {
   491  	atomic.AddInt64(&m.refCount, 1)
   492  }
   493  
   494  func (m *multipathLevelBuilder) Release() {
   495  	if atomic.AddInt64(&m.refCount, -1) == 0 {
   496  		m.data.Release()
   497  		m.data = nil
   498  		m.builder.Release()
   499  		m.builder = pathBuilder{}
   500  	}
   501  }
   502  
   503  func newMultipathLevelBuilder(arr arrow.Array, fieldNullable bool) (*multipathLevelBuilder, error) {
   504  	ret := &multipathLevelBuilder{
   505  		refCount:  1,
   506  		rootRange: elemRange{int64(0), int64(arr.Data().Len())},
   507  		data:      arr.Data(),
   508  		builder:   pathBuilder{nullableInParent: fieldNullable, paths: make([]pathInfo, 0), refCount: 1},
   509  	}
   510  	if err := ret.builder.Visit(arr); err != nil {
   511  		return nil, err
   512  	}
   513  	arr.Data().Retain()
   514  	return ret, nil
   515  }
   516  
   517  func (m *multipathLevelBuilder) leafCount() int {
   518  	return len(m.builder.paths)
   519  }
   520  
   521  func (m *multipathLevelBuilder) write(leafIdx int, ctx *arrowWriteContext) (multipathLevelResult, error) {
   522  	return writePath(m.rootRange, &m.builder.paths[leafIdx], ctx)
   523  }
   524  
   525  func (m *multipathLevelBuilder) writeAll(ctx *arrowWriteContext) (res []multipathLevelResult, err error) {
   526  	res = make([]multipathLevelResult, m.leafCount())
   527  	for idx := range res {
   528  		res[idx], err = m.write(idx, ctx)
   529  		if err != nil {
   530  			break
   531  		}
   532  	}
   533  	return
   534  }
   535  
   536  type multipathLevelResult struct {
   537  	leafArr         arrow.Array
   538  	defLevels       []int16
   539  	defLevelsBuffer encoding.Buffer
   540  	repLevels       []int16
   541  	repLevelsBuffer encoding.Buffer
   542  	// contains the element ranges of the required visiting on the descendants of the
   543  	// final list ancestor for any leaf node.
   544  	//
   545  	// the algorithm will attempt to consolidate the visited ranges into the smallest number
   546  	//
   547  	// this data is necessary to pass along because after producing the def-rep levels for each
   548  	// leaf array, it is impossible to determine which values have to be sent to parquet when a
   549  	// null list value in a nullable listarray is non-empty
   550  	//
   551  	// this allows for the parquet writing to determine which values ultimately need to be written
   552  	postListVisitedElems []elemRange
   553  
   554  	leafIsNullable bool
   555  }
   556  
   557  func (m *multipathLevelResult) Release() {
   558  	m.leafArr.Release()
   559  	m.defLevels = nil
   560  	if m.defLevelsBuffer != nil {
   561  		m.defLevelsBuffer.Release()
   562  	}
   563  	if m.repLevels != nil {
   564  		m.repLevels = nil
   565  		m.repLevelsBuffer.Release()
   566  	}
   567  }
   568  
   569  type pathWriteCtx struct {
   570  	mem          memory.Allocator
   571  	defLevels    *int16BufferBuilder
   572  	repLevels    *int16BufferBuilder
   573  	visitedElems []elemRange
   574  }
   575  
   576  func (p *pathWriteCtx) ReserveDefLevels(elems int) iterResult {
   577  	p.defLevels.Reserve(elems)
   578  	return iterDone
   579  }
   580  
   581  func (p *pathWriteCtx) AppendDefLevel(lvl int16) iterResult {
   582  	p.defLevels.Append(lvl)
   583  	return iterDone
   584  }
   585  
   586  func (p *pathWriteCtx) AppendDefLevels(count int, defLevel int16) iterResult {
   587  	p.defLevels.AppendCopies(count, defLevel)
   588  	return iterDone
   589  }
   590  
   591  func (p *pathWriteCtx) UnsafeAppendDefLevel(v int16) iterResult {
   592  	p.defLevels.UnsafeAppend(v)
   593  	return iterDone
   594  }
   595  
   596  func (p *pathWriteCtx) AppendRepLevel(lvl int16) iterResult {
   597  	p.repLevels.Append(lvl)
   598  	return iterDone
   599  }
   600  
   601  func (p *pathWriteCtx) AppendRepLevels(count int, lvl int16) iterResult {
   602  	p.repLevels.AppendCopies(count, lvl)
   603  	return iterDone
   604  }
   605  
   606  func (p *pathWriteCtx) equalRepDeflevlsLen() bool { return p.defLevels.Len() == p.repLevels.Len() }
   607  
   608  func (p *pathWriteCtx) recordPostListVisit(rng elemRange) {
   609  	if len(p.visitedElems) > 0 && rng.start == p.visitedElems[len(p.visitedElems)-1].end {
   610  		p.visitedElems[len(p.visitedElems)-1].end = rng.end
   611  		return
   612  	}
   613  	p.visitedElems = append(p.visitedElems, rng)
   614  }
   615  
   616  type int16BufferBuilder struct {
   617  	*encoding.PooledBufferWriter
   618  }
   619  
   620  func (b *int16BufferBuilder) Values() []int16 {
   621  	return arrow.Int16Traits.CastFromBytes(b.PooledBufferWriter.Bytes())
   622  }
   623  
   624  func (b *int16BufferBuilder) Value(i int) int16 {
   625  	return b.Values()[i]
   626  }
   627  
   628  func (b *int16BufferBuilder) Reserve(n int) {
   629  	b.PooledBufferWriter.Reserve(n * arrow.Int16SizeBytes)
   630  }
   631  
   632  func (b *int16BufferBuilder) Len() int { return b.PooledBufferWriter.Len() / arrow.Int16SizeBytes }
   633  
   634  func (b *int16BufferBuilder) AppendCopies(count int, val int16) {
   635  	b.Reserve(count)
   636  	b.UnsafeWriteCopy(count, (*(*[2]byte)(unsafe.Pointer(&val)))[:])
   637  }
   638  
   639  func (b *int16BufferBuilder) UnsafeAppend(v int16) {
   640  	b.PooledBufferWriter.UnsafeWrite((*(*[2]byte)(unsafe.Pointer(&v)))[:])
   641  }
   642  
   643  func (b *int16BufferBuilder) Append(v int16) {
   644  	b.PooledBufferWriter.Reserve(arrow.Int16SizeBytes)
   645  	b.PooledBufferWriter.Write((*(*[2]byte)(unsafe.Pointer(&v)))[:])
   646  }
   647  
   648  func fillRepLevels(count int, repLvl int16, ctx *pathWriteCtx) {
   649  	if repLvl == -1 {
   650  		return
   651  	}
   652  
   653  	fillCount := count
   654  	// this condition occurs (rep and def levels equals), in one of a few cases:
   655  	// 1. before any list is encounted
   656  	// 2. after rep-level has been filled in due to null/empty values above
   657  	// 3. after finishing a list
   658  	if !ctx.equalRepDeflevlsLen() {
   659  		fillCount--
   660  	}
   661  	ctx.AppendRepLevels(fillCount, repLvl)
   662  }
   663  
   664  func writePath(rootRange elemRange, info *pathInfo, arrCtx *arrowWriteContext) (multipathLevelResult, error) {
   665  	stack := make([]elemRange, len(info.path))
   666  	buildResult := multipathLevelResult{
   667  		leafArr:        info.primitiveArr,
   668  		leafIsNullable: info.leafIsNullable,
   669  	}
   670  
   671  	if info.maxDefLevel == 0 {
   672  		// this case only occurs when there are no nullable or repeated columns in the path from the root to the leaf
   673  		leafLen := buildResult.leafArr.Len()
   674  		buildResult.postListVisitedElems = []elemRange{{0, int64(leafLen)}}
   675  		return buildResult, nil
   676  	}
   677  
   678  	stack[0] = rootRange
   679  	if arrCtx.defLevelsBuffer != nil {
   680  		arrCtx.defLevelsBuffer.Release()
   681  		arrCtx.defLevelsBuffer = nil
   682  	}
   683  	if arrCtx.repLevelsBuffer != nil {
   684  		arrCtx.repLevelsBuffer.Release()
   685  		arrCtx.repLevelsBuffer = nil
   686  	}
   687  
   688  	ctx := pathWriteCtx{arrCtx.props.mem,
   689  		&int16BufferBuilder{encoding.NewPooledBufferWriter(0)},
   690  		&int16BufferBuilder{encoding.NewPooledBufferWriter(0)},
   691  		make([]elemRange, 0)}
   692  
   693  	ctx.defLevels.Reserve(int(rootRange.size()))
   694  	if info.maxRepLevel > 0 {
   695  		ctx.repLevels.Reserve(int(rootRange.size()))
   696  	}
   697  
   698  	stackBase := 0
   699  	stackPos := stackBase
   700  	for stackPos >= stackBase {
   701  		var res iterResult
   702  		switch n := info.path[stackPos].(type) {
   703  		case *nullableNode:
   704  			res = n.run(&stack[stackPos], &stack[stackPos+1], &ctx)
   705  		case *listNode:
   706  			res = n.run(&stack[stackPos], &stack[stackPos+1], &ctx)
   707  		case *nullableTerminalNode:
   708  			res = n.run(stack[stackPos], &ctx)
   709  		case *allPresentTerminalNode:
   710  			res = n.run(stack[stackPos], &ctx)
   711  		case *allNullsTerminalNode:
   712  			res = n.run(stack[stackPos], &ctx)
   713  		}
   714  		stackPos += int(res)
   715  	}
   716  
   717  	if ctx.repLevels.Len() > 0 {
   718  		// this case only occurs when there was a repeated element somewhere
   719  		buildResult.repLevels = ctx.repLevels.Values()
   720  		buildResult.repLevelsBuffer = ctx.repLevels.Finish()
   721  
   722  		buildResult.postListVisitedElems, ctx.visitedElems = ctx.visitedElems, buildResult.postListVisitedElems
   723  		// if it is possible when processing lists that all lists were empty. in this
   724  		// case, no elements would have been added to the postListVisitedElements. by
   725  		// adding an empty element, we avoid special casing later
   726  		if len(buildResult.postListVisitedElems) == 0 {
   727  			buildResult.postListVisitedElems = append(buildResult.postListVisitedElems, elemRange{0, 0})
   728  		}
   729  	} else {
   730  		buildResult.postListVisitedElems = append(buildResult.postListVisitedElems, elemRange{0, int64(buildResult.leafArr.Len())})
   731  		buildResult.repLevels = nil
   732  	}
   733  
   734  	buildResult.defLevels = ctx.defLevels.Values()
   735  	buildResult.defLevelsBuffer = ctx.defLevels.Finish()
   736  	return buildResult, nil
   737  }