github.com/apache/arrow/go/v14@v14.0.1/parquet/internal/utils/bitmap_writer_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package utils_test 18 19 import ( 20 "fmt" 21 "reflect" 22 "strings" 23 "testing" 24 25 "github.com/apache/arrow/go/v14/arrow/bitutil" 26 "github.com/apache/arrow/go/v14/parquet/internal/utils" 27 "github.com/stretchr/testify/suite" 28 ) 29 30 func writeSliceToWriter(wr utils.BitmapWriter, values []int) { 31 for _, v := range values { 32 if v != 0 { 33 wr.Set() 34 } else { 35 wr.Clear() 36 } 37 wr.Next() 38 } 39 wr.Finish() 40 } 41 42 type FirstTimeBitmapWriterSuite struct { 43 suite.Suite 44 } 45 46 func (f *FirstTimeBitmapWriterSuite) TestNormalOperation() { 47 for _, fb := range []byte{0x00, 0xFF} { 48 { 49 bitmap := []byte{fb, fb, fb, fb} 50 wr := utils.NewFirstTimeBitmapWriter(bitmap, 0, 12) 51 writeSliceToWriter(wr, []int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}) 52 // {0b00110110, 0b1010, 0, 0} 53 f.Equal([]byte{0x36, 0x0a}, bitmap[:2]) 54 } 55 { 56 bitmap := []byte{fb, fb, fb, fb} 57 wr := utils.NewFirstTimeBitmapWriter(bitmap, 4, 12) 58 writeSliceToWriter(wr, []int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}) 59 // {0b00110110, 0b1010, 0, 0} 60 f.Equal([]byte{0x60 | (fb & 0x0f), 0xa3}, bitmap[:2]) 61 } 62 // Consecutive write chunks 63 { 64 bitmap := []byte{fb, fb, fb, fb} 65 { 66 wr := utils.NewFirstTimeBitmapWriter(bitmap, 0, 6) 67 writeSliceToWriter(wr, []int{0, 1, 1, 0, 1, 1}) 68 } 69 { 70 wr := utils.NewFirstTimeBitmapWriter(bitmap, 6, 3) 71 writeSliceToWriter(wr, []int{0, 0, 0}) 72 } 73 { 74 wr := utils.NewFirstTimeBitmapWriter(bitmap, 9, 3) 75 writeSliceToWriter(wr, []int{1, 0, 1}) 76 } 77 f.Equal([]byte{0x36, 0x0a}, bitmap[:2]) 78 } 79 { 80 bitmap := []byte{fb, fb, fb, fb} 81 { 82 wr := utils.NewFirstTimeBitmapWriter(bitmap, 4, 0) 83 writeSliceToWriter(wr, []int{}) 84 } 85 { 86 wr := utils.NewFirstTimeBitmapWriter(bitmap, 4, 6) 87 writeSliceToWriter(wr, []int{0, 1, 1, 0, 1, 1}) 88 } 89 { 90 wr := utils.NewFirstTimeBitmapWriter(bitmap, 10, 3) 91 writeSliceToWriter(wr, []int{0, 0, 0}) 92 } 93 { 94 wr := utils.NewFirstTimeBitmapWriter(bitmap, 13, 0) 95 writeSliceToWriter(wr, []int{}) 96 } 97 { 98 wr := utils.NewFirstTimeBitmapWriter(bitmap, 13, 3) 99 writeSliceToWriter(wr, []int{1, 0, 1}) 100 } 101 f.Equal([]byte{0x60 | (fb & 0x0f), 0xa3}, bitmap[:2]) 102 } 103 } 104 } 105 106 func bitmapToString(bitmap []byte, bitCount int64) string { 107 var bld strings.Builder 108 bld.Grow(int(bitCount)) 109 for i := 0; i < int(bitCount); i++ { 110 if bitutil.BitIsSet(bitmap, i) { 111 bld.WriteByte('1') 112 } else { 113 bld.WriteByte('0') 114 } 115 } 116 return bld.String() 117 } 118 119 func (f *FirstTimeBitmapWriterSuite) TestAppendWordOffsetOverwritesCorrectBits() { 120 check := func(start byte, expectedBits string, offset int64) { 121 validBits := []byte{start} 122 const bitsAfterAppend = 8 123 wr := utils.NewFirstTimeBitmapWriter(validBits, offset, int64(8*len(validBits))-offset) 124 wr.AppendWord(0xFF, bitsAfterAppend-offset) 125 wr.Finish() 126 f.Equal(expectedBits, bitmapToString(validBits, bitsAfterAppend)) 127 } 128 129 f.Run("CheckAppend", func() { 130 tests := []struct { 131 expectedBits string 132 offset int64 133 }{ 134 {"11111111", 0}, 135 {"01111111", 1}, 136 {"00111111", 2}, 137 {"00011111", 3}, 138 {"00001111", 4}, 139 {"00000111", 5}, 140 {"00000011", 6}, 141 {"00000001", 7}, 142 } 143 for _, tt := range tests { 144 f.Run(tt.expectedBits, func() { check(0x00, tt.expectedBits, tt.offset) }) 145 } 146 }) 147 148 f.Run("CheckWithSet", func() { 149 tests := []struct { 150 expectedBits string 151 offset int64 152 }{ 153 {"11111111", 1}, 154 {"10111111", 2}, 155 {"10011111", 3}, 156 {"10001111", 4}, 157 {"10000111", 5}, 158 {"10000011", 6}, 159 {"10000001", 7}, 160 } 161 for _, tt := range tests { 162 f.Run(tt.expectedBits, func() { check(0x1, tt.expectedBits, tt.offset) }) 163 } 164 }) 165 166 f.Run("CheckWithPreceding", func() { 167 tests := []struct { 168 expectedBits string 169 offset int64 170 }{ 171 {"11111111", 0}, 172 {"11111111", 1}, 173 {"11111111", 2}, 174 {"11111111", 3}, 175 {"11111111", 4}, 176 {"11111111", 5}, 177 {"11111111", 6}, 178 {"11111111", 7}, 179 } 180 for _, tt := range tests { 181 f.Run(fmt.Sprintf("%d", tt.offset), func() { check(0xFF, tt.expectedBits, tt.offset) }) 182 } 183 }) 184 } 185 186 func (f *FirstTimeBitmapWriterSuite) TestAppendZeroBitsNoImpact() { 187 validBits := []byte{0x00} 188 wr := utils.NewFirstTimeBitmapWriter(validBits, 1, int64(len(validBits)*8)) 189 wr.AppendWord(0xFF, 0) 190 wr.AppendWord(0xFF, 0) 191 wr.AppendWord(0x01, 1) 192 wr.Finish() 193 f.Equal(uint8(0x2), validBits[0]) 194 } 195 196 func (f *FirstTimeBitmapWriterSuite) TestAppendLessThanByte() { 197 { 198 validBits := make([]byte, 8) 199 wr := utils.NewFirstTimeBitmapWriter(validBits, 1, 8) 200 wr.AppendWord(0xB, 4) 201 wr.Finish() 202 f.Equal("01101000", bitmapToString(validBits, 8)) 203 } 204 { 205 // test with all bits initially set 206 validBits := []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF} 207 wr := utils.NewFirstTimeBitmapWriter(validBits, 1, 8) 208 wr.AppendWord(0xB, 4) 209 wr.Finish() 210 f.Equal("11101000", bitmapToString(validBits, 8)) 211 } 212 } 213 214 func (f *FirstTimeBitmapWriterSuite) TestAppendByteThenMore() { 215 { 216 validBits := make([]byte, 8) 217 wr := utils.NewFirstTimeBitmapWriter(validBits, 0, 9) 218 wr.AppendWord(0xC3, 8) 219 wr.AppendWord(0x01, 1) 220 wr.Finish() 221 f.Equal("110000111", bitmapToString(validBits, 9)) 222 } 223 { 224 // test with all bits initially set 225 validBits := []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF} 226 wr := utils.NewFirstTimeBitmapWriter(validBits, 0, 9) 227 wr.AppendWord(0xC3, 8) 228 wr.AppendWord(0x01, 1) 229 wr.Finish() 230 f.Equal("110000111", bitmapToString(validBits, 9)) 231 } 232 } 233 234 func (f *FirstTimeBitmapWriterSuite) TestAppendWordShiftBitsCorrectly() { 235 const pattern = 0x9A9A9A9A9A9A9A9A 236 237 tests := []struct { 238 leadingBits string 239 middleBits string 240 trailingBits string 241 offset int64 242 presetBufferBits bool 243 }{ 244 {"01011001", "01011001", "00000000", 8, false}, 245 {"00101100", "10101100", "10000000", 9, false}, 246 {"00010110", "01010110", "01000000", 10, false}, 247 {"00001011", "00101011", "00100000", 11, false}, 248 {"00000101", "10010101", "10010000", 12, false}, 249 {"00000010", "11001010", "11001000", 13, false}, 250 {"00000001", "01100101", "01100100", 14, false}, 251 {"00000000", "10110010", "10110010", 15, false}, 252 {"01011001", "01011001", "11111111", 8, true}, 253 {"10101100", "10101100", "10000000", 9, true}, 254 {"11010110", "01010110", "01000000", 10, true}, 255 {"11101011", "00101011", "00100000", 11, true}, 256 {"11110101", "10010101", "10010000", 12, true}, 257 {"11111010", "11001010", "11001000", 13, true}, 258 {"11111101", "01100101", "01100100", 14, true}, 259 {"11111110", "10110010", "10110010", 15, true}, 260 } 261 for _, tt := range tests { 262 f.Run(tt.leadingBits, func() { 263 f.Require().GreaterOrEqual(tt.offset, int64(8)) 264 validBits := make([]byte, 10) 265 if tt.presetBufferBits { 266 for idx := range validBits { 267 validBits[idx] = 0xFF 268 } 269 } 270 271 validBits[0] = 0x99 272 wr := utils.NewFirstTimeBitmapWriter(validBits, tt.offset, (9*int64(reflect.TypeOf(uint64(0)).Size()))-tt.offset) 273 wr.AppendWord(pattern, 64) 274 wr.Finish() 275 f.Equal(uint8(0x99), validBits[0]) 276 f.Equal(tt.leadingBits, bitmapToString(validBits[1:], 8)) 277 for x := 2; x < 9; x++ { 278 f.Equal(tt.middleBits, bitmapToString(validBits[x:], 8)) 279 } 280 f.Equal(tt.trailingBits, bitmapToString(validBits[9:], 8)) 281 }) 282 } 283 } 284 285 func (f *FirstTimeBitmapWriterSuite) TestAppendWordOnlyAppropriateBytesWritten() { 286 validBits := []byte{0x00, 0x00} 287 bitmap := uint64(0x1FF) 288 { 289 wr := utils.NewFirstTimeBitmapWriter(validBits, 1, int64(8*len(validBits))-1) 290 wr.AppendWord(bitmap, 7) 291 wr.Finish() 292 f.Equal([]byte{0xFE, 0x00}, validBits) 293 } 294 { 295 wr := utils.NewFirstTimeBitmapWriter(validBits, 1, int64(8*len(validBits)-1)) 296 wr.AppendWord(bitmap, 8) 297 wr.Finish() 298 f.Equal([]byte{0xFE, 0x03}, validBits) 299 } 300 } 301 302 func TestFirstTimeBitmapWriter(t *testing.T) { 303 suite.Run(t, new(FirstTimeBitmapWriterSuite)) 304 }