github.com/apache/arrow/go/v16@v16.1.0/arrow/array/bufferbuilder.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package array 18 19 import ( 20 "sync/atomic" 21 "unsafe" 22 23 "github.com/apache/arrow/go/v16/arrow" 24 "github.com/apache/arrow/go/v16/arrow/bitutil" 25 "github.com/apache/arrow/go/v16/arrow/internal/debug" 26 "github.com/apache/arrow/go/v16/arrow/memory" 27 ) 28 29 type bufBuilder interface { 30 Retain() 31 Release() 32 Len() int 33 Cap() int 34 Bytes() []byte 35 resize(int) 36 Advance(int) 37 SetLength(int) 38 Append([]byte) 39 Reset() 40 Finish() *memory.Buffer 41 } 42 43 // A bufferBuilder provides common functionality for populating memory with a sequence of type-specific values. 44 // Specialized implementations provide type-safe APIs for appending and accessing the memory. 45 type bufferBuilder struct { 46 refCount int64 47 mem memory.Allocator 48 buffer *memory.Buffer 49 length int 50 capacity int 51 52 bytes []byte 53 } 54 55 // Retain increases the reference count by 1. 56 // Retain may be called simultaneously from multiple goroutines. 57 func (b *bufferBuilder) Retain() { 58 atomic.AddInt64(&b.refCount, 1) 59 } 60 61 // Release decreases the reference count by 1. 62 // When the reference count goes to zero, the memory is freed. 63 // Release may be called simultaneously from multiple goroutines. 64 func (b *bufferBuilder) Release() { 65 debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") 66 67 if atomic.AddInt64(&b.refCount, -1) == 0 { 68 if b.buffer != nil { 69 b.buffer.Release() 70 b.buffer, b.bytes = nil, nil 71 } 72 } 73 } 74 75 // Len returns the length of the memory buffer in bytes. 76 func (b *bufferBuilder) Len() int { return b.length } 77 78 // Cap returns the total number of bytes that can be stored without allocating additional memory. 79 func (b *bufferBuilder) Cap() int { return b.capacity } 80 81 // Bytes returns a slice of length b.Len(). 82 // The slice is only valid for use until the next buffer modification. That is, until the next call 83 // to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next 84 // buffer modification. 85 func (b *bufferBuilder) Bytes() []byte { return b.bytes[:b.length] } 86 87 func (b *bufferBuilder) resize(elements int) { 88 if b.buffer == nil { 89 b.buffer = memory.NewResizableBuffer(b.mem) 90 } 91 92 b.buffer.ResizeNoShrink(elements) 93 oldCapacity := b.capacity 94 b.capacity = b.buffer.Cap() 95 b.bytes = b.buffer.Buf() 96 97 if b.capacity > oldCapacity { 98 memory.Set(b.bytes[oldCapacity:], 0) 99 } 100 } 101 102 func (b *bufferBuilder) SetLength(length int) { 103 if length > b.length { 104 b.Advance(length) 105 return 106 } 107 108 b.length = length 109 } 110 111 // Advance increases the buffer by length and initializes the skipped bytes to zero. 112 func (b *bufferBuilder) Advance(length int) { 113 if b.capacity < b.length+length { 114 newCapacity := bitutil.NextPowerOf2(b.length + length) 115 b.resize(newCapacity) 116 } 117 b.length += length 118 } 119 120 // Append appends the contents of v to the buffer, resizing it if necessary. 121 func (b *bufferBuilder) Append(v []byte) { 122 if b.capacity < b.length+len(v) { 123 newCapacity := bitutil.NextPowerOf2(b.length + len(v)) 124 b.resize(newCapacity) 125 } 126 b.unsafeAppend(v) 127 } 128 129 // Reset returns the buffer to an empty state. Reset releases the memory and sets the length and capacity to zero. 130 func (b *bufferBuilder) Reset() { 131 if b.buffer != nil { 132 b.buffer.Release() 133 } 134 b.buffer, b.bytes = nil, nil 135 b.capacity, b.length = 0, 0 136 } 137 138 // Finish TODO(sgc) 139 func (b *bufferBuilder) Finish() (buffer *memory.Buffer) { 140 if b.length > 0 { 141 b.buffer.ResizeNoShrink(b.length) 142 } 143 buffer = b.buffer 144 b.buffer = nil 145 b.Reset() 146 if buffer == nil { 147 buffer = memory.NewBufferBytes(nil) 148 } 149 return 150 } 151 152 func (b *bufferBuilder) unsafeAppend(data []byte) { 153 copy(b.bytes[b.length:], data) 154 b.length += len(data) 155 } 156 157 type multiBufferBuilder struct { 158 refCount int64 159 blockSize int 160 161 mem memory.Allocator 162 blocks []*memory.Buffer 163 currentOutBuffer int 164 } 165 166 // Retain increases the reference count by 1. 167 // Retain may be called simultaneously from multiple goroutines. 168 func (b *multiBufferBuilder) Retain() { 169 atomic.AddInt64(&b.refCount, 1) 170 } 171 172 // Release decreases the reference count by 1. 173 // When the reference count goes to zero, the memory is freed. 174 // Release may be called simultaneously from multiple goroutines. 175 func (b *multiBufferBuilder) Release() { 176 debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") 177 178 if atomic.AddInt64(&b.refCount, -1) == 0 { 179 b.Reset() 180 } 181 } 182 183 func (b *multiBufferBuilder) Reserve(nbytes int) { 184 if len(b.blocks) == 0 { 185 out := memory.NewResizableBuffer(b.mem) 186 if nbytes < b.blockSize { 187 nbytes = b.blockSize 188 } 189 out.Reserve(nbytes) 190 b.currentOutBuffer = 0 191 b.blocks = []*memory.Buffer{out} 192 return 193 } 194 195 curBuf := b.blocks[b.currentOutBuffer] 196 remain := curBuf.Cap() - curBuf.Len() 197 if nbytes <= remain { 198 return 199 } 200 201 // search for underfull block that has enough bytes 202 for i, block := range b.blocks { 203 remaining := block.Cap() - block.Len() 204 if nbytes <= remaining { 205 b.currentOutBuffer = i 206 return 207 } 208 } 209 210 // current buffer doesn't have enough space, no underfull buffers 211 // make new buffer and set that as our current. 212 newBuf := memory.NewResizableBuffer(b.mem) 213 if nbytes < b.blockSize { 214 nbytes = b.blockSize 215 } 216 217 newBuf.Reserve(nbytes) 218 b.currentOutBuffer = len(b.blocks) 219 b.blocks = append(b.blocks, newBuf) 220 } 221 222 func (b *multiBufferBuilder) RemainingBytes() int { 223 if len(b.blocks) == 0 { 224 return 0 225 } 226 227 buf := b.blocks[b.currentOutBuffer] 228 return buf.Cap() - buf.Len() 229 } 230 231 func (b *multiBufferBuilder) Reset() { 232 b.currentOutBuffer = 0 233 for _, block := range b.Finish() { 234 block.Release() 235 } 236 } 237 238 func (b *multiBufferBuilder) UnsafeAppend(hdr *arrow.ViewHeader, val []byte) { 239 buf := b.blocks[b.currentOutBuffer] 240 idx, offset := b.currentOutBuffer, buf.Len() 241 hdr.SetIndexOffset(int32(idx), int32(offset)) 242 243 n := copy(buf.Buf()[offset:], val) 244 buf.ResizeNoShrink(offset + n) 245 } 246 247 func (b *multiBufferBuilder) UnsafeAppendString(hdr *arrow.ViewHeader, val string) { 248 // create a byte slice with zero-copies 249 // in go1.20 this would be equivalent to unsafe.StringData 250 v := *(*[]byte)(unsafe.Pointer(&struct { 251 string 252 int 253 }{val, len(val)})) 254 b.UnsafeAppend(hdr, v) 255 } 256 257 func (b *multiBufferBuilder) Finish() (out []*memory.Buffer) { 258 b.currentOutBuffer = 0 259 out, b.blocks = b.blocks, nil 260 return 261 }