github.com/apache/arrow/go/v14@v14.0.1/parquet/internal/utils/bit_writer.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package utils 18 19 import ( 20 "encoding/binary" 21 "io" 22 "log" 23 24 "github.com/apache/arrow/go/v14/arrow/bitutil" 25 ) 26 27 // WriterAtBuffer is a convenience struct for providing a WriteAt function 28 // to a byte slice for use with things that want an io.WriterAt 29 type WriterAtBuffer struct { 30 buf []byte 31 } 32 33 // NewWriterAtBuffer returns an object which fulfills the io.WriterAt interface 34 // by taking ownership of the passed in slice. 35 func NewWriterAtBuffer(buf []byte) WriterAtWithLen { 36 return &WriterAtBuffer{buf} 37 } 38 39 // Len returns the length of the underlying byte slice. 40 func (w *WriterAtBuffer) Len() int { 41 return len(w.buf) 42 } 43 44 // WriteAt fulfills the io.WriterAt interface to write len(p) bytes from p 45 // to the underlying byte slice starting at offset off. It returns the number 46 // of bytes written from p (0 <= n <= len(p)) and any error encountered. 47 func (w *WriterAtBuffer) WriteAt(p []byte, off int64) (n int, err error) { 48 if off > int64(len(w.buf)) { 49 return 0, io.ErrUnexpectedEOF 50 } 51 52 n = copy(w.buf[off:], p) 53 if n < len(p) { 54 err = io.ErrUnexpectedEOF 55 } 56 return 57 } 58 59 func (w *WriterAtBuffer) Reserve(nbytes int) { 60 // no-op. We should not expand or otherwise modify the underlying buffer 61 } 62 63 // WriterAtWithLen is an interface for an io.WriterAt with a Len function 64 type WriterAtWithLen interface { 65 io.WriterAt 66 Len() int 67 Reserve(int) 68 } 69 70 // BitWriter is a utility for writing values of specific bit widths to a stream 71 // using a uint64 as a buffer to build up between flushing for efficiency. 72 type BitWriter struct { 73 wr WriterAtWithLen 74 buffer uint64 75 byteoffset int 76 bitoffset uint 77 raw [8]byte 78 } 79 80 // NewBitWriter initializes a new bit writer to write to the passed in interface 81 // using WriteAt to write the appropriate offsets and values. 82 func NewBitWriter(w WriterAtWithLen) *BitWriter { 83 return &BitWriter{wr: w} 84 } 85 86 // SkipBytes reserves the next aligned nbytes, skipping them and returning 87 // the offset to use with WriteAt to write to those reserved bytes. Used for 88 // RLE encoding to fill in the indicators after encoding. 89 func (b *BitWriter) SkipBytes(nbytes int) (int, error) { 90 b.Flush(true) 91 ret := b.byteoffset 92 b.byteoffset += nbytes 93 b.wr.Reserve(b.byteoffset) 94 return ret, nil 95 } 96 97 // WriteAt fulfills the io.WriterAt interface to write len(p) bytes from p 98 // to the underlying byte slice starting at offset off. It returns the number 99 // of bytes written from p (0 <= n <= len(p)) and any error encountered. 100 // This allows writing full bytes directly to the underlying writer. 101 func (b *BitWriter) WriteAt(val []byte, off int64) (int, error) { 102 return b.wr.WriteAt(val, off) 103 } 104 105 // Written returns the number of bytes that have been written to the BitWriter, 106 // not how many bytes have been flushed. Use Flush to ensure that all data is flushed 107 // to the underlying writer. 108 func (b *BitWriter) Written() int { 109 return b.byteoffset + int(bitutil.BytesForBits(int64(b.bitoffset))) 110 } 111 112 // WriteValue writes the value v using nbits to pack it, returning false if it fails 113 // for some reason. 114 func (b *BitWriter) WriteValue(v uint64, nbits uint) error { 115 b.buffer |= v << b.bitoffset 116 b.bitoffset += nbits 117 118 if b.bitoffset >= 64 { 119 binary.LittleEndian.PutUint64(b.raw[:], b.buffer) 120 if _, err := b.wr.WriteAt(b.raw[:], int64(b.byteoffset)); err != nil { 121 return err 122 } 123 b.buffer = 0 124 b.byteoffset += 8 125 b.bitoffset -= 64 126 b.buffer = v >> (nbits - b.bitoffset) 127 } 128 return nil 129 } 130 131 // Flush will flush any buffered data to the underlying writer, pass true if 132 // the next write should be byte-aligned after this flush. 133 func (b *BitWriter) Flush(align bool) { 134 var nbytes int64 135 if b.bitoffset > 0 { 136 nbytes = bitutil.BytesForBits(int64(b.bitoffset)) 137 binary.LittleEndian.PutUint64(b.raw[:], b.buffer) 138 b.wr.WriteAt(b.raw[:nbytes], int64(b.byteoffset)) 139 } 140 141 if align { 142 b.buffer = 0 143 b.byteoffset += int(nbytes) 144 b.bitoffset = 0 145 } 146 } 147 148 // WriteAligned writes the value val as a little endian value in exactly nbytes 149 // byte-aligned to the underlying writer, flushing via Flush(true) before writing nbytes 150 // without buffering. 151 func (b *BitWriter) WriteAligned(val uint64, nbytes int) bool { 152 b.Flush(true) 153 binary.LittleEndian.PutUint64(b.raw[:], val) 154 if _, err := b.wr.WriteAt(b.raw[:nbytes], int64(b.byteoffset)); err != nil { 155 log.Println(err) 156 return false 157 } 158 b.byteoffset += nbytes 159 return true 160 } 161 162 // WriteVlqInt writes v as a vlq encoded integer byte-aligned to the underlying writer 163 // without buffering. 164 func (b *BitWriter) WriteVlqInt(v uint64) bool { 165 b.Flush(true) 166 var buf [binary.MaxVarintLen64]byte 167 nbytes := binary.PutUvarint(buf[:], v) 168 if _, err := b.wr.WriteAt(buf[:nbytes], int64(b.byteoffset)); err != nil { 169 log.Println(err) 170 return false 171 } 172 b.byteoffset += nbytes 173 return true 174 } 175 176 // WriteZigZagVlqInt writes a zigzag encoded integer byte-aligned to the underlying writer 177 // without buffering. 178 func (b *BitWriter) WriteZigZagVlqInt(v int64) bool { 179 return b.WriteVlqInt(uint64((v << 1) ^ (v >> 63))) 180 } 181 182 // Clear resets the writer so that subsequent writes will start from offset 0, 183 // allowing reuse of the underlying buffer and writer. 184 func (b *BitWriter) Clear() { 185 b.byteoffset = 0 186 b.bitoffset = 0 187 b.buffer = 0 188 }