github.com/apache/arrow/go/v14@v14.0.2/parquet/internal/utils/bitmap_writer.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package utils
    18  
    19  import (
    20  	"encoding/binary"
    21  	"math/bits"
    22  
    23  	"github.com/apache/arrow/go/v14/arrow/bitutil"
    24  )
    25  
    26  // BitmapWriter is an interface for bitmap writers so that we can use multiple
    27  // implementations or swap if necessary.
    28  type BitmapWriter interface {
    29  	// Set sets the current bit that will be written
    30  	Set()
    31  	// Clear clears the current bit that will be written
    32  	Clear()
    33  	// Next advances to the next bit for the writer
    34  	Next()
    35  	// Finish flushes the current byte out to the bitmap slice
    36  	Finish()
    37  	// AppendWord takes nbits from word which should be an LSB bitmap and appends them to the bitmap.
    38  	AppendWord(word uint64, nbits int64)
    39  	// AppendBools appends the bit representation of the bools slice, returning the number
    40  	// of bools that were able to fit in the remaining length of the bitmapwriter.
    41  	AppendBools(in []bool) int
    42  	// Pos is the current position that will be written next
    43  	Pos() int
    44  	// Reset allows reusing the bitmapwriter by resetting Pos to start with length as
    45  	// the number of bits that the writer can write.
    46  	Reset(start, length int)
    47  }
    48  
    49  type bitmapWriter struct {
    50  	*bitutil.BitmapWriter
    51  }
    52  
    53  func NewBitmapWriter(bitmap []byte, start, length int) BitmapWriter {
    54  	return &bitmapWriter{bitutil.NewBitmapWriter(bitmap, start, length)}
    55  }
    56  
    57  func (b *bitmapWriter) AppendWord(uint64, int64) {
    58  	panic("unimplemented")
    59  }
    60  
    61  type firstTimeBitmapWriter struct {
    62  	buf    []byte
    63  	pos    int64
    64  	length int64
    65  
    66  	curByte      uint8
    67  	bitMask      uint8
    68  	byteOffset   int64
    69  	endianBuffer [8]byte
    70  }
    71  
    72  // NewFirstTimeBitmapWriter creates a bitmap writer that might clobber any bit values
    73  // following the bits written to the bitmap, as such it is faster than the bitmapwriter
    74  // that is created with NewBitmapWriter
    75  func NewFirstTimeBitmapWriter(buf []byte, start, length int64) BitmapWriter {
    76  	ret := &firstTimeBitmapWriter{
    77  		buf:        buf,
    78  		byteOffset: start / 8,
    79  		bitMask:    bitutil.BitMask[start%8],
    80  		length:     length,
    81  	}
    82  	if length > 0 {
    83  		ret.curByte = ret.buf[int(ret.byteOffset)] & bitutil.PrecedingBitmask[start%8]
    84  	}
    85  	return ret
    86  }
    87  
    88  func (bw *firstTimeBitmapWriter) Reset(start, length int) {
    89  	bw.pos = 0
    90  	bw.byteOffset = int64(start / 8)
    91  	bw.bitMask = bitutil.BitMask[start%8]
    92  	bw.length = int64(length)
    93  	if length > 0 {
    94  		bw.curByte = bw.buf[int(bw.byteOffset)] & bitutil.PrecedingBitmask[start%8]
    95  	}
    96  }
    97  
    98  func (bw *firstTimeBitmapWriter) Pos() int { return int(bw.pos) }
    99  func (bw *firstTimeBitmapWriter) AppendWord(word uint64, nbits int64) {
   100  	if nbits == 0 {
   101  		return
   102  	}
   103  
   104  	// location that the first byte needs to be written to for appending
   105  	appslice := bw.buf[int(bw.byteOffset):]
   106  
   107  	// update everything but curByte
   108  	bw.pos += nbits
   109  	bitOffset := bits.TrailingZeros32(uint32(bw.bitMask))
   110  	bw.bitMask = bitutil.BitMask[(int64(bitOffset)+nbits)%8]
   111  	bw.byteOffset += (int64(bitOffset) + nbits) / 8
   112  
   113  	if bitOffset != 0 {
   114  		// we're in the middle of the byte. Update the byte and shift bits appropriately
   115  		// so we can just copy the bytes.
   116  		carry := 8 - bitOffset
   117  		// Carry over bits from word to curByte. We assume any extra bits in word are unset
   118  		// so no additional accounting is needed for when nbits < carry
   119  		bw.curByte |= uint8((word & uint64(bitutil.PrecedingBitmask[carry])) << bitOffset)
   120  		// check everything was transferred to curByte
   121  		if nbits < int64(carry) {
   122  			return
   123  		}
   124  		appslice[0] = bw.curByte
   125  		appslice = appslice[1:]
   126  		// move the carry bits off of word
   127  		word = word >> carry
   128  		nbits -= int64(carry)
   129  	}
   130  	bytesForWord := bitutil.BytesForBits(nbits)
   131  	binary.LittleEndian.PutUint64(bw.endianBuffer[:], word)
   132  	copy(appslice, bw.endianBuffer[:bytesForWord])
   133  
   134  	// at this point, the previous curByte has been written, the new curByte
   135  	// is either the last relevant byte in word or cleared if the new position
   136  	// is byte aligned (ie. a fresh byte)
   137  	if bw.bitMask == 0x1 {
   138  		bw.curByte = 0
   139  	} else {
   140  		bw.curByte = appslice[bytesForWord-1]
   141  	}
   142  }
   143  
   144  func (bw *firstTimeBitmapWriter) Set() {
   145  	bw.curByte |= bw.bitMask
   146  }
   147  
   148  func (bw *firstTimeBitmapWriter) Clear() {}
   149  
   150  func (bw *firstTimeBitmapWriter) Next() {
   151  	bw.bitMask = uint8(bw.bitMask << 1)
   152  	bw.pos++
   153  	if bw.bitMask == 0 {
   154  		// byte finished, advance to the next one
   155  		bw.bitMask = 0x1
   156  		bw.buf[int(bw.byteOffset)] = bw.curByte
   157  		bw.byteOffset++
   158  		bw.curByte = 0
   159  	}
   160  }
   161  
   162  func (b *firstTimeBitmapWriter) AppendBools(in []bool) int {
   163  	panic("Append Bools not yet implemented for firstTimeBitmapWriter")
   164  }
   165  
   166  func (bw *firstTimeBitmapWriter) Finish() {
   167  	// store curByte into the bitmap
   168  	if bw.length > 0 && bw.bitMask != 0x01 || bw.pos < bw.length {
   169  		bw.buf[int(bw.byteOffset)] = bw.curByte
   170  	}
   171  }
   172  
   173  func (bw *firstTimeBitmapWriter) Position() int64 { return bw.pos }