github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/encoding/fixed_len_byte_array_encoder.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package encoding
    18  
    19  import (
    20  	"github.com/apache/arrow/go/v7/parquet"
    21  	"github.com/apache/arrow/go/v7/parquet/internal/utils"
    22  )
    23  
    24  // PlainFixedLenByteArrayEncoder writes the raw bytes of the byte array
    25  // always writing typeLength bytes for each value.
    26  type PlainFixedLenByteArrayEncoder struct {
    27  	encoder
    28  
    29  	bitSetReader utils.SetBitRunReader
    30  }
    31  
    32  // Put writes the provided values to the encoder
    33  func (enc *PlainFixedLenByteArrayEncoder) Put(in []parquet.FixedLenByteArray) {
    34  	typeLen := enc.descr.TypeLength()
    35  	if typeLen == 0 {
    36  		return
    37  	}
    38  
    39  	bytesNeeded := len(in) * typeLen
    40  	enc.sink.Reserve(bytesNeeded)
    41  	for _, val := range in {
    42  		if val == nil {
    43  			panic("value cannot be nil")
    44  		}
    45  		enc.sink.UnsafeWrite(val[:typeLen])
    46  	}
    47  }
    48  
    49  // PutSpaced is like Put but works with data that is spaced out according to the passed in bitmap
    50  func (enc *PlainFixedLenByteArrayEncoder) PutSpaced(in []parquet.FixedLenByteArray, validBits []byte, validBitsOffset int64) {
    51  	if validBits != nil {
    52  		if enc.bitSetReader == nil {
    53  			enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
    54  		} else {
    55  			enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
    56  		}
    57  
    58  		for {
    59  			run := enc.bitSetReader.NextRun()
    60  			if run.Length == 0 {
    61  				break
    62  			}
    63  			enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
    64  		}
    65  	} else {
    66  		enc.Put(in)
    67  	}
    68  }
    69  
    70  // Type returns the underlying physical type this encoder works with, Fixed Length byte arrays.
    71  func (PlainFixedLenByteArrayEncoder) Type() parquet.Type {
    72  	return parquet.Types.FixedLenByteArray
    73  }
    74  
    75  // WriteDict overrides the embedded WriteDict function to call a specialized function
    76  // for copying out the Fixed length values from the dictionary more efficiently.
    77  func (enc *DictFixedLenByteArrayEncoder) WriteDict(out []byte) {
    78  	enc.memo.(BinaryMemoTable).CopyFixedWidthValues(0, enc.typeLen, out)
    79  }
    80  
    81  // Put writes fixed length values to a dictionary encoded column
    82  func (enc *DictFixedLenByteArrayEncoder) Put(in []parquet.FixedLenByteArray) {
    83  	for _, v := range in {
    84  		if v == nil {
    85  			v = empty[:]
    86  		}
    87  		memoIdx, found, err := enc.memo.GetOrInsert(v)
    88  		if err != nil {
    89  			panic(err)
    90  		}
    91  		if !found {
    92  			enc.dictEncodedSize += enc.typeLen
    93  		}
    94  		enc.addIndex(memoIdx)
    95  	}
    96  }
    97  
    98  // PutSpaced is like Put but leaves space for nulls
    99  func (enc *DictFixedLenByteArrayEncoder) PutSpaced(in []parquet.FixedLenByteArray, validBits []byte, validBitsOffset int64) {
   100  	utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
   101  		enc.Put(in[pos : pos+length])
   102  		return nil
   103  	})
   104  }