github.com/apache/arrow/go/v14@v14.0.1/parquet/file/level_conversion_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package file 18 19 import ( 20 "strings" 21 "testing" 22 23 "github.com/apache/arrow/go/v14/arrow/bitutil" 24 "github.com/apache/arrow/go/v14/parquet/internal/bmi" 25 "github.com/apache/arrow/go/v14/parquet/internal/utils" 26 "github.com/stretchr/testify/assert" 27 ) 28 29 func bitmapToString(bitmap []byte, bitCount int64) string { 30 var bld strings.Builder 31 bld.Grow(int(bitCount)) 32 for i := 0; i < int(bitCount); i++ { 33 if bitutil.BitIsSet(bitmap, i) { 34 bld.WriteByte('1') 35 } else { 36 bld.WriteByte('0') 37 } 38 } 39 return bld.String() 40 } 41 42 func TestDefLevelsToBitmap(t *testing.T) { 43 defLevels := []int16{3, 3, 3, 2, 3, 3, 3, 3, 3} 44 validBits := []byte{2, 0} 45 46 var info LevelInfo 47 info.DefLevel = 3 48 info.RepLevel = 1 49 50 var io ValidityBitmapInputOutput 51 io.ReadUpperBound = int64(len(defLevels)) 52 io.Read = -1 53 io.ValidBits = validBits 54 55 DefLevelsToBitmap(defLevels, info, &io) 56 assert.Equal(t, int64(9), io.Read) 57 assert.Equal(t, int64(1), io.NullCount) 58 59 // call again with 0 definition levels make sure that valid bits is unmodified 60 curByte := validBits[1] 61 io.NullCount = 0 62 DefLevelsToBitmap(defLevels[:0], info, &io) 63 64 assert.Zero(t, io.Read) 65 assert.Zero(t, io.NullCount) 66 assert.Equal(t, curByte, validBits[1]) 67 } 68 69 func TestDefLevelstToBitmapPowerOf2(t *testing.T) { 70 defLevels := []int16{3, 3, 3, 2, 3, 3, 3, 3} 71 validBits := []byte{1, 0} 72 73 var ( 74 info LevelInfo 75 io ValidityBitmapInputOutput 76 ) 77 78 info.RepLevel = 1 79 info.DefLevel = 3 80 io.Read = -1 81 io.ReadUpperBound = int64(len(defLevels)) 82 io.ValidBits = validBits 83 84 DefLevelsToBitmap(defLevels[4:8], info, &io) 85 assert.Equal(t, int64(4), io.Read) 86 assert.Zero(t, io.NullCount) 87 } 88 89 func TestGreaterThanBitmapGeneratesExpectedBitmasks(t *testing.T) { 90 defLevels := []int16{ 91 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 92 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 93 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 94 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7} 95 96 tests := []struct { 97 name string 98 num int 99 rhs int16 100 expected uint64 101 }{ 102 {"no levels", 0, 0, 0}, 103 {"64 and 8", 64, 8, 0}, 104 {"64 and -1", 64, -1, 0xFFFFFFFFFFFFFFFF}, 105 // should be zero padded 106 {"zero pad 47, -1", 47, -1, 0x7FFFFFFFFFFF}, 107 {"zero pad 64 and 6", 64, 6, 0x8080808080808080}, 108 } 109 110 for _, tt := range tests { 111 t.Run(tt.name, func(t *testing.T) { 112 assert.Equal(t, tt.expected, bmi.GreaterThanBitmap(defLevels[:tt.num], tt.rhs)) 113 }) 114 } 115 } 116 117 func TestWithRepetitionlevelFiltersOutEmptyListValues(t *testing.T) { 118 validityBitmap := make([]byte, 8) 119 io := ValidityBitmapInputOutput{ 120 ReadUpperBound: 64, 121 Read: 1, 122 NullCount: 5, 123 ValidBits: validityBitmap, 124 ValidBitsOffset: 1, 125 } 126 127 info := LevelInfo{ 128 RepeatedAncestorDefLevel: 1, 129 DefLevel: 2, 130 RepLevel: 1, 131 } 132 133 defLevels := []int16{0, 0, 0, 2, 2, 1, 0, 2} 134 DefLevelsToBitmap(defLevels, info, &io) 135 136 assert.Equal(t, bitmapToString(validityBitmap, 8), "01101000") 137 for _, x := range validityBitmap[1:] { 138 assert.Zero(t, x) 139 } 140 assert.EqualValues(t, 6, io.NullCount) 141 assert.EqualValues(t, 4, io.Read) 142 } 143 144 type MultiLevelTestData struct { 145 defLevels []int16 146 repLevels []int16 147 } 148 149 func TriplNestedList() MultiLevelTestData { 150 // Triply nested list values borrow from write_path 151 // [null, [[1, null, 3], []], []], 152 // [[[]], [[], [1, 2]], null, [[3]]], 153 // null, 154 // [] 155 return MultiLevelTestData{ 156 defLevels: []int16{2, 7, 6, 7, 5, 3, // first row 157 5, 5, 7, 7, 2, 7, // second row 158 0, // third row 159 1}, 160 repLevels: []int16{0, 1, 3, 3, 2, 1, // first row 161 0, 1, 2, 3, 1, 1, // second row 162 0, 0}, 163 } 164 } 165 166 func TestActualCase(t *testing.T) { 167 out := make([]byte, 512) 168 defs := make([]int16, 64) 169 for i := range defs { 170 defs[i] = 3 171 } 172 173 defs[0] = 0 174 defs[25] = 0 175 defs[33] = 0 176 defs[49] = 0 177 defs[58] = 0 178 defs[59] = 0 179 defs[60] = 0 180 defs[61] = 0 181 182 remaining := int64(4096) 183 info := LevelInfo{ 184 NullSlotUsage: 0, 185 DefLevel: 3, 186 RepLevel: 1, 187 RepeatedAncestorDefLevel: 2, 188 } 189 190 wr := utils.NewFirstTimeBitmapWriter(out, 0, 4096) 191 v := defLevelsBatchToBitmap(defs, remaining, info, wr, true) 192 assert.EqualValues(t, 56, v) 193 assert.Equal(t, []byte{255, 255, 255, 255}, out[:4]) 194 }