github.com/fraugster/parquet-go@v0.12.0/packed_array.go (about)

     1  package goparquet
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  )
     7  
     8  // packedArray is a bitmap encoded array mainly for repetition and definition
     9  // levels, which normally have low values (~<10), a []uint16 array is not the
    10  // most memory efficient structure due to the large number of values. Memory
    11  // storage requirements for the packed array are ~1/8th compared to
    12  // []uint16 array.
    13  type packedArray struct {
    14  	count int
    15  	bw    int
    16  	data  []byte
    17  
    18  	buf    [8]int32
    19  	bufPos int
    20  
    21  	writer pack8int32Func
    22  	reader unpack8int32Func
    23  }
    24  
    25  // This function is only for testing, as it flushes first, so be careful!
    26  func (pa *packedArray) toArray() []int32 {
    27  	ret := make([]int32, pa.count)
    28  	for i := range ret {
    29  		ret[i], _ = pa.at(i)
    30  	}
    31  	return ret
    32  }
    33  
    34  func (pa *packedArray) reset(bw int) {
    35  	if bw < 0 || bw > 32 {
    36  		panic("invalid bit width")
    37  	}
    38  	pa.bw = bw
    39  	pa.count = 0
    40  	pa.bufPos = 0
    41  	pa.data = pa.data[:0]
    42  	pa.writer = pack8Int32FuncByWidth[bw]
    43  	pa.reader = unpack8Int32FuncByWidth[bw]
    44  }
    45  
    46  func (pa *packedArray) flush() {
    47  	for i := pa.bufPos; i < 8; i++ {
    48  		pa.buf[i] = 0
    49  	}
    50  	pa.data = append(pa.data, pa.writer(pa.buf)...)
    51  	pa.bufPos = 0
    52  }
    53  
    54  func (pa *packedArray) appendSingle(v int32) {
    55  	if pa.bufPos == 8 {
    56  		pa.flush()
    57  	}
    58  	pa.buf[pa.bufPos] = v
    59  	pa.bufPos++
    60  	pa.count++
    61  }
    62  
    63  func (pa *packedArray) at(pos int) (int32, error) {
    64  	if pos < 0 || pos >= pa.count {
    65  		return 0, errors.New("out of range")
    66  	}
    67  	if pa.bw == 0 {
    68  		return 0, nil
    69  	}
    70  
    71  	block := (pos / 8) * pa.bw
    72  	idx := pos % 8
    73  
    74  	if block >= len(pa.data) {
    75  		return pa.buf[idx], nil
    76  	}
    77  
    78  	buf := pa.reader(pa.data[block : block+pa.bw])
    79  	return buf[idx], nil
    80  }
    81  
    82  func (pa *packedArray) appendArray(other *packedArray) {
    83  	if other == nil {
    84  		return
    85  	}
    86  
    87  	if pa.bw != other.bw {
    88  		panic(fmt.Sprintf("Can not append array with different bit width : %d and %d", pa.bw, other.bw))
    89  	}
    90  
    91  	if cap(pa.data) < len(pa.data)+len(other.data)+1 {
    92  		data := make([]byte, len(pa.data), len(pa.data)+len(other.data)+1)
    93  		copy(data, pa.data)
    94  		pa.data = data
    95  	}
    96  
    97  	for i := 0; i < other.count; i++ {
    98  		v, _ := other.at(i)
    99  		pa.appendSingle(v)
   100  	}
   101  }