github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/encoding/fixed_len_byte_array_encoder.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package encoding 18 19 import ( 20 "github.com/apache/arrow/go/v7/parquet" 21 "github.com/apache/arrow/go/v7/parquet/internal/utils" 22 ) 23 24 // PlainFixedLenByteArrayEncoder writes the raw bytes of the byte array 25 // always writing typeLength bytes for each value. 26 type PlainFixedLenByteArrayEncoder struct { 27 encoder 28 29 bitSetReader utils.SetBitRunReader 30 } 31 32 // Put writes the provided values to the encoder 33 func (enc *PlainFixedLenByteArrayEncoder) Put(in []parquet.FixedLenByteArray) { 34 typeLen := enc.descr.TypeLength() 35 if typeLen == 0 { 36 return 37 } 38 39 bytesNeeded := len(in) * typeLen 40 enc.sink.Reserve(bytesNeeded) 41 for _, val := range in { 42 if val == nil { 43 panic("value cannot be nil") 44 } 45 enc.sink.UnsafeWrite(val[:typeLen]) 46 } 47 } 48 49 // PutSpaced is like Put but works with data that is spaced out according to the passed in bitmap 50 func (enc *PlainFixedLenByteArrayEncoder) PutSpaced(in []parquet.FixedLenByteArray, validBits []byte, validBitsOffset int64) { 51 if validBits != nil { 52 if enc.bitSetReader == nil { 53 enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in))) 54 } else { 55 enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in))) 56 } 57 58 for { 59 run := enc.bitSetReader.NextRun() 60 if run.Length == 0 { 61 break 62 } 63 enc.Put(in[int(run.Pos):int(run.Pos+run.Length)]) 64 } 65 } else { 66 enc.Put(in) 67 } 68 } 69 70 // Type returns the underlying physical type this encoder works with, Fixed Length byte arrays. 71 func (PlainFixedLenByteArrayEncoder) Type() parquet.Type { 72 return parquet.Types.FixedLenByteArray 73 } 74 75 // WriteDict overrides the embedded WriteDict function to call a specialized function 76 // for copying out the Fixed length values from the dictionary more efficiently. 77 func (enc *DictFixedLenByteArrayEncoder) WriteDict(out []byte) { 78 enc.memo.(BinaryMemoTable).CopyFixedWidthValues(0, enc.typeLen, out) 79 } 80 81 // Put writes fixed length values to a dictionary encoded column 82 func (enc *DictFixedLenByteArrayEncoder) Put(in []parquet.FixedLenByteArray) { 83 for _, v := range in { 84 if v == nil { 85 v = empty[:] 86 } 87 memoIdx, found, err := enc.memo.GetOrInsert(v) 88 if err != nil { 89 panic(err) 90 } 91 if !found { 92 enc.dictEncodedSize += enc.typeLen 93 } 94 enc.addIndex(memoIdx) 95 } 96 } 97 98 // PutSpaced is like Put but leaves space for nulls 99 func (enc *DictFixedLenByteArrayEncoder) PutSpaced(in []parquet.FixedLenByteArray, validBits []byte, validBitsOffset int64) { 100 utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error { 101 enc.Put(in[pos : pos+length]) 102 return nil 103 }) 104 }