github.com/apache/arrow/go/v16@v16.1.0/arrow/array/bufferbuilder.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package array
    18  
    19  import (
    20  	"sync/atomic"
    21  	"unsafe"
    22  
    23  	"github.com/apache/arrow/go/v16/arrow"
    24  	"github.com/apache/arrow/go/v16/arrow/bitutil"
    25  	"github.com/apache/arrow/go/v16/arrow/internal/debug"
    26  	"github.com/apache/arrow/go/v16/arrow/memory"
    27  )
    28  
    29  type bufBuilder interface {
    30  	Retain()
    31  	Release()
    32  	Len() int
    33  	Cap() int
    34  	Bytes() []byte
    35  	resize(int)
    36  	Advance(int)
    37  	SetLength(int)
    38  	Append([]byte)
    39  	Reset()
    40  	Finish() *memory.Buffer
    41  }
    42  
    43  // A bufferBuilder provides common functionality for populating memory with a sequence of type-specific values.
    44  // Specialized implementations provide type-safe APIs for appending and accessing the memory.
    45  type bufferBuilder struct {
    46  	refCount int64
    47  	mem      memory.Allocator
    48  	buffer   *memory.Buffer
    49  	length   int
    50  	capacity int
    51  
    52  	bytes []byte
    53  }
    54  
    55  // Retain increases the reference count by 1.
    56  // Retain may be called simultaneously from multiple goroutines.
    57  func (b *bufferBuilder) Retain() {
    58  	atomic.AddInt64(&b.refCount, 1)
    59  }
    60  
    61  // Release decreases the reference count by 1.
    62  // When the reference count goes to zero, the memory is freed.
    63  // Release may be called simultaneously from multiple goroutines.
    64  func (b *bufferBuilder) Release() {
    65  	debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases")
    66  
    67  	if atomic.AddInt64(&b.refCount, -1) == 0 {
    68  		if b.buffer != nil {
    69  			b.buffer.Release()
    70  			b.buffer, b.bytes = nil, nil
    71  		}
    72  	}
    73  }
    74  
    75  // Len returns the length of the memory buffer in bytes.
    76  func (b *bufferBuilder) Len() int { return b.length }
    77  
    78  // Cap returns the total number of bytes that can be stored without allocating additional memory.
    79  func (b *bufferBuilder) Cap() int { return b.capacity }
    80  
    81  // Bytes returns a slice of length b.Len().
    82  // The slice is only valid for use until the next buffer modification. That is, until the next call
    83  // to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next
    84  // buffer modification.
    85  func (b *bufferBuilder) Bytes() []byte { return b.bytes[:b.length] }
    86  
    87  func (b *bufferBuilder) resize(elements int) {
    88  	if b.buffer == nil {
    89  		b.buffer = memory.NewResizableBuffer(b.mem)
    90  	}
    91  
    92  	b.buffer.ResizeNoShrink(elements)
    93  	oldCapacity := b.capacity
    94  	b.capacity = b.buffer.Cap()
    95  	b.bytes = b.buffer.Buf()
    96  
    97  	if b.capacity > oldCapacity {
    98  		memory.Set(b.bytes[oldCapacity:], 0)
    99  	}
   100  }
   101  
   102  func (b *bufferBuilder) SetLength(length int) {
   103  	if length > b.length {
   104  		b.Advance(length)
   105  		return
   106  	}
   107  
   108  	b.length = length
   109  }
   110  
   111  // Advance increases the buffer by length and initializes the skipped bytes to zero.
   112  func (b *bufferBuilder) Advance(length int) {
   113  	if b.capacity < b.length+length {
   114  		newCapacity := bitutil.NextPowerOf2(b.length + length)
   115  		b.resize(newCapacity)
   116  	}
   117  	b.length += length
   118  }
   119  
   120  // Append appends the contents of v to the buffer, resizing it if necessary.
   121  func (b *bufferBuilder) Append(v []byte) {
   122  	if b.capacity < b.length+len(v) {
   123  		newCapacity := bitutil.NextPowerOf2(b.length + len(v))
   124  		b.resize(newCapacity)
   125  	}
   126  	b.unsafeAppend(v)
   127  }
   128  
   129  // Reset returns the buffer to an empty state. Reset releases the memory and sets the length and capacity to zero.
   130  func (b *bufferBuilder) Reset() {
   131  	if b.buffer != nil {
   132  		b.buffer.Release()
   133  	}
   134  	b.buffer, b.bytes = nil, nil
   135  	b.capacity, b.length = 0, 0
   136  }
   137  
   138  // Finish TODO(sgc)
   139  func (b *bufferBuilder) Finish() (buffer *memory.Buffer) {
   140  	if b.length > 0 {
   141  		b.buffer.ResizeNoShrink(b.length)
   142  	}
   143  	buffer = b.buffer
   144  	b.buffer = nil
   145  	b.Reset()
   146  	if buffer == nil {
   147  		buffer = memory.NewBufferBytes(nil)
   148  	}
   149  	return
   150  }
   151  
   152  func (b *bufferBuilder) unsafeAppend(data []byte) {
   153  	copy(b.bytes[b.length:], data)
   154  	b.length += len(data)
   155  }
   156  
   157  type multiBufferBuilder struct {
   158  	refCount  int64
   159  	blockSize int
   160  
   161  	mem              memory.Allocator
   162  	blocks           []*memory.Buffer
   163  	currentOutBuffer int
   164  }
   165  
   166  // Retain increases the reference count by 1.
   167  // Retain may be called simultaneously from multiple goroutines.
   168  func (b *multiBufferBuilder) Retain() {
   169  	atomic.AddInt64(&b.refCount, 1)
   170  }
   171  
   172  // Release decreases the reference count by 1.
   173  // When the reference count goes to zero, the memory is freed.
   174  // Release may be called simultaneously from multiple goroutines.
   175  func (b *multiBufferBuilder) Release() {
   176  	debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases")
   177  
   178  	if atomic.AddInt64(&b.refCount, -1) == 0 {
   179  		b.Reset()
   180  	}
   181  }
   182  
   183  func (b *multiBufferBuilder) Reserve(nbytes int) {
   184  	if len(b.blocks) == 0 {
   185  		out := memory.NewResizableBuffer(b.mem)
   186  		if nbytes < b.blockSize {
   187  			nbytes = b.blockSize
   188  		}
   189  		out.Reserve(nbytes)
   190  		b.currentOutBuffer = 0
   191  		b.blocks = []*memory.Buffer{out}
   192  		return
   193  	}
   194  
   195  	curBuf := b.blocks[b.currentOutBuffer]
   196  	remain := curBuf.Cap() - curBuf.Len()
   197  	if nbytes <= remain {
   198  		return
   199  	}
   200  
   201  	// search for underfull block that has enough bytes
   202  	for i, block := range b.blocks {
   203  		remaining := block.Cap() - block.Len()
   204  		if nbytes <= remaining {
   205  			b.currentOutBuffer = i
   206  			return
   207  		}
   208  	}
   209  
   210  	// current buffer doesn't have enough space, no underfull buffers
   211  	// make new buffer and set that as our current.
   212  	newBuf := memory.NewResizableBuffer(b.mem)
   213  	if nbytes < b.blockSize {
   214  		nbytes = b.blockSize
   215  	}
   216  
   217  	newBuf.Reserve(nbytes)
   218  	b.currentOutBuffer = len(b.blocks)
   219  	b.blocks = append(b.blocks, newBuf)
   220  }
   221  
   222  func (b *multiBufferBuilder) RemainingBytes() int {
   223  	if len(b.blocks) == 0 {
   224  		return 0
   225  	}
   226  
   227  	buf := b.blocks[b.currentOutBuffer]
   228  	return buf.Cap() - buf.Len()
   229  }
   230  
   231  func (b *multiBufferBuilder) Reset() {
   232  	b.currentOutBuffer = 0
   233  	for _, block := range b.Finish() {
   234  		block.Release()
   235  	}
   236  }
   237  
   238  func (b *multiBufferBuilder) UnsafeAppend(hdr *arrow.ViewHeader, val []byte) {
   239  	buf := b.blocks[b.currentOutBuffer]
   240  	idx, offset := b.currentOutBuffer, buf.Len()
   241  	hdr.SetIndexOffset(int32(idx), int32(offset))
   242  
   243  	n := copy(buf.Buf()[offset:], val)
   244  	buf.ResizeNoShrink(offset + n)
   245  }
   246  
   247  func (b *multiBufferBuilder) UnsafeAppendString(hdr *arrow.ViewHeader, val string) {
   248  	// create a byte slice with zero-copies
   249  	// in go1.20 this would be equivalent to unsafe.StringData
   250  	v := *(*[]byte)(unsafe.Pointer(&struct {
   251  		string
   252  		int
   253  	}{val, len(val)}))
   254  	b.UnsafeAppend(hdr, v)
   255  }
   256  
   257  func (b *multiBufferBuilder) Finish() (out []*memory.Buffer) {
   258  	b.currentOutBuffer = 0
   259  	out, b.blocks = b.blocks, nil
   260  	return
   261  }