github.com/apache/arrow/go/v14@v14.0.1/parquet/internal/utils/bit_writer.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package utils
    18  
    19  import (
    20  	"encoding/binary"
    21  	"io"
    22  	"log"
    23  
    24  	"github.com/apache/arrow/go/v14/arrow/bitutil"
    25  )
    26  
    27  // WriterAtBuffer is a convenience struct for providing a WriteAt function
    28  // to a byte slice for use with things that want an io.WriterAt
    29  type WriterAtBuffer struct {
    30  	buf []byte
    31  }
    32  
    33  // NewWriterAtBuffer returns an object which fulfills the io.WriterAt interface
    34  // by taking ownership of the passed in slice.
    35  func NewWriterAtBuffer(buf []byte) WriterAtWithLen {
    36  	return &WriterAtBuffer{buf}
    37  }
    38  
    39  // Len returns the length of the underlying byte slice.
    40  func (w *WriterAtBuffer) Len() int {
    41  	return len(w.buf)
    42  }
    43  
    44  // WriteAt fulfills the io.WriterAt interface to write len(p) bytes from p
    45  // to the underlying byte slice starting at offset off. It returns the number
    46  // of bytes written from p (0 <= n <= len(p)) and any error encountered.
    47  func (w *WriterAtBuffer) WriteAt(p []byte, off int64) (n int, err error) {
    48  	if off > int64(len(w.buf)) {
    49  		return 0, io.ErrUnexpectedEOF
    50  	}
    51  
    52  	n = copy(w.buf[off:], p)
    53  	if n < len(p) {
    54  		err = io.ErrUnexpectedEOF
    55  	}
    56  	return
    57  }
    58  
    59  func (w *WriterAtBuffer) Reserve(nbytes int) {
    60  	// no-op. We should not expand or otherwise modify the underlying buffer
    61  }
    62  
    63  // WriterAtWithLen is an interface for an io.WriterAt with a Len function
    64  type WriterAtWithLen interface {
    65  	io.WriterAt
    66  	Len() int
    67  	Reserve(int)
    68  }
    69  
    70  // BitWriter is a utility for writing values of specific bit widths to a stream
    71  // using a uint64 as a buffer to build up between flushing for efficiency.
    72  type BitWriter struct {
    73  	wr         WriterAtWithLen
    74  	buffer     uint64
    75  	byteoffset int
    76  	bitoffset  uint
    77  	raw        [8]byte
    78  }
    79  
    80  // NewBitWriter initializes a new bit writer to write to the passed in interface
    81  // using WriteAt to write the appropriate offsets and values.
    82  func NewBitWriter(w WriterAtWithLen) *BitWriter {
    83  	return &BitWriter{wr: w}
    84  }
    85  
    86  // SkipBytes reserves the next aligned nbytes, skipping them and returning
    87  // the offset to use with WriteAt to write to those reserved bytes. Used for
    88  // RLE encoding to fill in the indicators after encoding.
    89  func (b *BitWriter) SkipBytes(nbytes int) (int, error) {
    90  	b.Flush(true)
    91  	ret := b.byteoffset
    92  	b.byteoffset += nbytes
    93  	b.wr.Reserve(b.byteoffset)
    94  	return ret, nil
    95  }
    96  
    97  // WriteAt fulfills the io.WriterAt interface to write len(p) bytes from p
    98  // to the underlying byte slice starting at offset off. It returns the number
    99  // of bytes written from p (0 <= n <= len(p)) and any error encountered.
   100  // This allows writing full bytes directly to the underlying writer.
   101  func (b *BitWriter) WriteAt(val []byte, off int64) (int, error) {
   102  	return b.wr.WriteAt(val, off)
   103  }
   104  
   105  // Written returns the number of bytes that have been written to the BitWriter,
   106  // not how many bytes have been flushed. Use Flush to ensure that all data is flushed
   107  // to the underlying writer.
   108  func (b *BitWriter) Written() int {
   109  	return b.byteoffset + int(bitutil.BytesForBits(int64(b.bitoffset)))
   110  }
   111  
   112  // WriteValue writes the value v using nbits to pack it, returning false if it fails
   113  // for some reason.
   114  func (b *BitWriter) WriteValue(v uint64, nbits uint) error {
   115  	b.buffer |= v << b.bitoffset
   116  	b.bitoffset += nbits
   117  
   118  	if b.bitoffset >= 64 {
   119  		binary.LittleEndian.PutUint64(b.raw[:], b.buffer)
   120  		if _, err := b.wr.WriteAt(b.raw[:], int64(b.byteoffset)); err != nil {
   121  			return err
   122  		}
   123  		b.buffer = 0
   124  		b.byteoffset += 8
   125  		b.bitoffset -= 64
   126  		b.buffer = v >> (nbits - b.bitoffset)
   127  	}
   128  	return nil
   129  }
   130  
   131  // Flush will flush any buffered data to the underlying writer, pass true if
   132  // the next write should be byte-aligned after this flush.
   133  func (b *BitWriter) Flush(align bool) {
   134  	var nbytes int64
   135  	if b.bitoffset > 0 {
   136  		nbytes = bitutil.BytesForBits(int64(b.bitoffset))
   137  		binary.LittleEndian.PutUint64(b.raw[:], b.buffer)
   138  		b.wr.WriteAt(b.raw[:nbytes], int64(b.byteoffset))
   139  	}
   140  
   141  	if align {
   142  		b.buffer = 0
   143  		b.byteoffset += int(nbytes)
   144  		b.bitoffset = 0
   145  	}
   146  }
   147  
   148  // WriteAligned writes the value val as a little endian value in exactly nbytes
   149  // byte-aligned to the underlying writer, flushing via Flush(true) before writing nbytes
   150  // without buffering.
   151  func (b *BitWriter) WriteAligned(val uint64, nbytes int) bool {
   152  	b.Flush(true)
   153  	binary.LittleEndian.PutUint64(b.raw[:], val)
   154  	if _, err := b.wr.WriteAt(b.raw[:nbytes], int64(b.byteoffset)); err != nil {
   155  		log.Println(err)
   156  		return false
   157  	}
   158  	b.byteoffset += nbytes
   159  	return true
   160  }
   161  
   162  // WriteVlqInt writes v as a vlq encoded integer byte-aligned to the underlying writer
   163  // without buffering.
   164  func (b *BitWriter) WriteVlqInt(v uint64) bool {
   165  	b.Flush(true)
   166  	var buf [binary.MaxVarintLen64]byte
   167  	nbytes := binary.PutUvarint(buf[:], v)
   168  	if _, err := b.wr.WriteAt(buf[:nbytes], int64(b.byteoffset)); err != nil {
   169  		log.Println(err)
   170  		return false
   171  	}
   172  	b.byteoffset += nbytes
   173  	return true
   174  }
   175  
   176  // WriteZigZagVlqInt writes a zigzag encoded integer byte-aligned to the underlying writer
   177  // without buffering.
   178  func (b *BitWriter) WriteZigZagVlqInt(v int64) bool {
   179  	return b.WriteVlqInt(uint64((v << 1) ^ (v >> 63)))
   180  }
   181  
   182  // Clear resets the writer so that subsequent writes will start from offset 0,
   183  // allowing reuse of the underlying buffer and writer.
   184  func (b *BitWriter) Clear() {
   185  	b.byteoffset = 0
   186  	b.bitoffset = 0
   187  	b.buffer = 0
   188  }