github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/utils/bit_run_reader_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package utils_test
    18  
    19  import (
    20  	"math/bits"
    21  	"testing"
    22  	"unsafe"
    23  
    24  	"github.com/apache/arrow/go/v7/arrow/bitutil"
    25  	"github.com/apache/arrow/go/v7/arrow/endian"
    26  	"github.com/apache/arrow/go/v7/parquet/internal/utils"
    27  	"github.com/stretchr/testify/assert"
    28  )
    29  
    30  var toLittleEndian func(uint64) uint64
    31  
    32  func init() {
    33  	if endian.IsBigEndian {
    34  		toLittleEndian = bits.ReverseBytes64
    35  	} else {
    36  		toLittleEndian = func(in uint64) uint64 { return in }
    37  	}
    38  }
    39  
    40  func TestBitRunReaderZeroLength(t *testing.T) {
    41  	reader := utils.NewBitRunReader(nil, 0, 0)
    42  	assert.Zero(t, reader.NextRun().Len)
    43  }
    44  
    45  func bitmapFromSlice(vals []int, bitOffset int64) []byte {
    46  	out := make([]byte, int(bitutil.BytesForBits(int64(len(vals))+bitOffset)))
    47  	writer := bitutil.NewBitmapWriter(out, int(bitOffset), len(vals))
    48  	for _, val := range vals {
    49  		if val == 1 {
    50  			writer.Set()
    51  		} else {
    52  			writer.Clear()
    53  		}
    54  		writer.Next()
    55  	}
    56  	writer.Finish()
    57  
    58  	return out
    59  }
    60  
    61  func TestBitRunReader(t *testing.T) {
    62  	tests := []struct {
    63  		name     string
    64  		val      []int
    65  		bmvec    []int
    66  		offset   int64
    67  		len      int64
    68  		expected []utils.BitRun
    69  	}{
    70  		{"normal operation",
    71  			[]int{5, 0, 7, 1, 3, 0, 25, 1, 21, 0, 26, 1, 130, 0, 65, 1},
    72  			[]int{1, 0, 1},
    73  			0, -1,
    74  			[]utils.BitRun{
    75  				{1, true},
    76  				{1, false},
    77  				{1, true},
    78  				{5, false},
    79  				{7, true},
    80  				{3, false},
    81  				{25, true},
    82  				{21, false},
    83  				{26, true},
    84  				{130, false},
    85  				{65, true},
    86  			},
    87  		},
    88  		{"truncated at word", []int{7, 1, 58, 0}, []int{}, 1, 63,
    89  			[]utils.BitRun{{6, true}, {57, false}},
    90  		},
    91  		{"truncated within word multiple of 8 bits",
    92  			[]int{7, 1, 5, 0}, []int{}, 1, 7,
    93  			[]utils.BitRun{{6, true}, {1, false}},
    94  		},
    95  		{"truncated within word", []int{37 + 40, 0, 23, 1}, []int{}, 37, 53,
    96  			[]utils.BitRun{{40, false}, {13, true}},
    97  		},
    98  		{"truncated multiple words", []int{5, 0, 30, 1, 95, 0}, []int{1, 0, 1},
    99  			5, (3 + 5 + 30 + 95) - (5 + 3), []utils.BitRun{{3, false}, {30, true}, {92, false}},
   100  		},
   101  	}
   102  
   103  	for _, tt := range tests {
   104  		t.Run(tt.name, func(t *testing.T) {
   105  			bmvec := tt.bmvec
   106  
   107  			for i := 0; i < len(tt.val); i += 2 {
   108  				for j := 0; j < tt.val[i]; j++ {
   109  					bmvec = append(bmvec, tt.val[i+1])
   110  				}
   111  			}
   112  
   113  			bitmap := bitmapFromSlice(bmvec, 0)
   114  			length := int64(len(bmvec)) - tt.offset
   115  			if tt.len != -1 {
   116  				length = tt.len
   117  			}
   118  			reader := utils.NewBitRunReader(bitmap, tt.offset, length)
   119  
   120  			results := make([]utils.BitRun, 0)
   121  			for {
   122  				results = append(results, reader.NextRun())
   123  				if results[len(results)-1].Len == 0 {
   124  					break
   125  				}
   126  			}
   127  			assert.Zero(t, results[len(results)-1].Len)
   128  			results = results[:len(results)-1]
   129  
   130  			assert.Equal(t, tt.expected, results)
   131  		})
   132  	}
   133  }
   134  
   135  func TestBitRunReaderAllFirstByteCombos(t *testing.T) {
   136  	for offset := int64(0); offset < 8; offset++ {
   137  		for x := int64(0); x < (1<<8)-1; x++ {
   138  			bits := int64(toLittleEndian(uint64(x)))
   139  			reader := utils.NewBitRunReader((*(*[8]byte)(unsafe.Pointer(&bits)))[:], offset, 8-offset)
   140  
   141  			results := make([]utils.BitRun, 0)
   142  			for {
   143  				results = append(results, reader.NextRun())
   144  				if results[len(results)-1].Len == 0 {
   145  					break
   146  				}
   147  			}
   148  			assert.Zero(t, results[len(results)-1].Len)
   149  			results = results[:len(results)-1]
   150  
   151  			var sum int64
   152  			for _, r := range results {
   153  				sum += r.Len
   154  			}
   155  			assert.EqualValues(t, sum, 8-offset)
   156  		}
   157  	}
   158  }