github.com/apache/arrow/go/v14@v14.0.2/parquet/file/level_conversion_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package file
    18  
    19  import (
    20  	"strings"
    21  	"testing"
    22  
    23  	"github.com/apache/arrow/go/v14/arrow/bitutil"
    24  	"github.com/apache/arrow/go/v14/parquet/internal/bmi"
    25  	"github.com/apache/arrow/go/v14/parquet/internal/utils"
    26  	"github.com/stretchr/testify/assert"
    27  )
    28  
    29  func bitmapToString(bitmap []byte, bitCount int64) string {
    30  	var bld strings.Builder
    31  	bld.Grow(int(bitCount))
    32  	for i := 0; i < int(bitCount); i++ {
    33  		if bitutil.BitIsSet(bitmap, i) {
    34  			bld.WriteByte('1')
    35  		} else {
    36  			bld.WriteByte('0')
    37  		}
    38  	}
    39  	return bld.String()
    40  }
    41  
    42  func TestDefLevelsToBitmap(t *testing.T) {
    43  	defLevels := []int16{3, 3, 3, 2, 3, 3, 3, 3, 3}
    44  	validBits := []byte{2, 0}
    45  
    46  	var info LevelInfo
    47  	info.DefLevel = 3
    48  	info.RepLevel = 1
    49  
    50  	var io ValidityBitmapInputOutput
    51  	io.ReadUpperBound = int64(len(defLevels))
    52  	io.Read = -1
    53  	io.ValidBits = validBits
    54  
    55  	DefLevelsToBitmap(defLevels, info, &io)
    56  	assert.Equal(t, int64(9), io.Read)
    57  	assert.Equal(t, int64(1), io.NullCount)
    58  
    59  	// call again with 0 definition levels make sure that valid bits is unmodified
    60  	curByte := validBits[1]
    61  	io.NullCount = 0
    62  	DefLevelsToBitmap(defLevels[:0], info, &io)
    63  
    64  	assert.Zero(t, io.Read)
    65  	assert.Zero(t, io.NullCount)
    66  	assert.Equal(t, curByte, validBits[1])
    67  }
    68  
    69  func TestDefLevelstToBitmapPowerOf2(t *testing.T) {
    70  	defLevels := []int16{3, 3, 3, 2, 3, 3, 3, 3}
    71  	validBits := []byte{1, 0}
    72  
    73  	var (
    74  		info LevelInfo
    75  		io   ValidityBitmapInputOutput
    76  	)
    77  
    78  	info.RepLevel = 1
    79  	info.DefLevel = 3
    80  	io.Read = -1
    81  	io.ReadUpperBound = int64(len(defLevels))
    82  	io.ValidBits = validBits
    83  
    84  	DefLevelsToBitmap(defLevels[4:8], info, &io)
    85  	assert.Equal(t, int64(4), io.Read)
    86  	assert.Zero(t, io.NullCount)
    87  }
    88  
    89  func TestGreaterThanBitmapGeneratesExpectedBitmasks(t *testing.T) {
    90  	defLevels := []int16{
    91  		0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
    92  		0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
    93  		0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
    94  		0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7}
    95  
    96  	tests := []struct {
    97  		name     string
    98  		num      int
    99  		rhs      int16
   100  		expected uint64
   101  	}{
   102  		{"no levels", 0, 0, 0},
   103  		{"64 and 8", 64, 8, 0},
   104  		{"64 and -1", 64, -1, 0xFFFFFFFFFFFFFFFF},
   105  		// should be zero padded
   106  		{"zero pad 47, -1", 47, -1, 0x7FFFFFFFFFFF},
   107  		{"zero pad 64 and 6", 64, 6, 0x8080808080808080},
   108  	}
   109  
   110  	for _, tt := range tests {
   111  		t.Run(tt.name, func(t *testing.T) {
   112  			assert.Equal(t, tt.expected, bmi.GreaterThanBitmap(defLevels[:tt.num], tt.rhs))
   113  		})
   114  	}
   115  }
   116  
   117  func TestWithRepetitionlevelFiltersOutEmptyListValues(t *testing.T) {
   118  	validityBitmap := make([]byte, 8)
   119  	io := ValidityBitmapInputOutput{
   120  		ReadUpperBound:  64,
   121  		Read:            1,
   122  		NullCount:       5,
   123  		ValidBits:       validityBitmap,
   124  		ValidBitsOffset: 1,
   125  	}
   126  
   127  	info := LevelInfo{
   128  		RepeatedAncestorDefLevel: 1,
   129  		DefLevel:                 2,
   130  		RepLevel:                 1,
   131  	}
   132  
   133  	defLevels := []int16{0, 0, 0, 2, 2, 1, 0, 2}
   134  	DefLevelsToBitmap(defLevels, info, &io)
   135  
   136  	assert.Equal(t, bitmapToString(validityBitmap, 8), "01101000")
   137  	for _, x := range validityBitmap[1:] {
   138  		assert.Zero(t, x)
   139  	}
   140  	assert.EqualValues(t, 6, io.NullCount)
   141  	assert.EqualValues(t, 4, io.Read)
   142  }
   143  
   144  type MultiLevelTestData struct {
   145  	defLevels []int16
   146  	repLevels []int16
   147  }
   148  
   149  func TriplNestedList() MultiLevelTestData {
   150  	// Triply nested list values borrow from write_path
   151  	// [null, [[1, null, 3], []], []],
   152  	// [[[]], [[], [1, 2]], null, [[3]]],
   153  	// null,
   154  	// []
   155  	return MultiLevelTestData{
   156  		defLevels: []int16{2, 7, 6, 7, 5, 3, // first row
   157  			5, 5, 7, 7, 2, 7, // second row
   158  			0, // third row
   159  			1},
   160  		repLevels: []int16{0, 1, 3, 3, 2, 1, // first row
   161  			0, 1, 2, 3, 1, 1, // second row
   162  			0, 0},
   163  	}
   164  }
   165  
   166  func TestActualCase(t *testing.T) {
   167  	out := make([]byte, 512)
   168  	defs := make([]int16, 64)
   169  	for i := range defs {
   170  		defs[i] = 3
   171  	}
   172  
   173  	defs[0] = 0
   174  	defs[25] = 0
   175  	defs[33] = 0
   176  	defs[49] = 0
   177  	defs[58] = 0
   178  	defs[59] = 0
   179  	defs[60] = 0
   180  	defs[61] = 0
   181  
   182  	remaining := int64(4096)
   183  	info := LevelInfo{
   184  		NullSlotUsage:            0,
   185  		DefLevel:                 3,
   186  		RepLevel:                 1,
   187  		RepeatedAncestorDefLevel: 2,
   188  	}
   189  
   190  	wr := utils.NewFirstTimeBitmapWriter(out, 0, 4096)
   191  	v := defLevelsBatchToBitmap(defs, remaining, info, wr, true)
   192  	assert.EqualValues(t, 56, v)
   193  	assert.Equal(t, []byte{255, 255, 255, 255}, out[:4])
   194  }