github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/utils/bit_set_run_reader_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package utils_test 18 19 import ( 20 "reflect" 21 "testing" 22 23 "github.com/apache/arrow/go/v7/arrow/bitutil" 24 "github.com/apache/arrow/go/v7/parquet/internal/utils" 25 "github.com/stretchr/testify/suite" 26 ) 27 28 func reverseAny(s interface{}) { 29 n := reflect.ValueOf(s).Len() 30 swap := reflect.Swapper(s) 31 for i, j := 0, n-1; i < j; i, j = i+1, j-1 { 32 swap(i, j) 33 } 34 } 35 36 type linearBitRunReader struct { 37 reader *bitutil.BitmapReader 38 } 39 40 func (l linearBitRunReader) NextRun() utils.BitRun { 41 r := utils.BitRun{0, l.reader.Set()} 42 for l.reader.Pos() < l.reader.Len() && l.reader.Set() == r.Set { 43 r.Len++ 44 l.reader.Next() 45 } 46 return r 47 } 48 49 func bitmapFromString(s string) []byte { 50 maxLen := bitutil.BytesForBits(int64(len(s))) 51 ret := make([]byte, maxLen) 52 i := 0 53 for _, c := range s { 54 switch c { 55 case '0': 56 bitutil.ClearBit(ret, i) 57 i++ 58 case '1': 59 bitutil.SetBit(ret, i) 60 i++ 61 case ' ', '\t', '\r', '\n': 62 default: 63 panic("unexpected character for bitmap string") 64 } 65 } 66 67 actualLen := bitutil.BytesForBits(int64(i)) 68 return ret[:actualLen] 69 } 70 71 func referenceBitRuns(data []byte, offset, length int) (ret []utils.SetBitRun) { 72 ret = make([]utils.SetBitRun, 0) 73 reader := linearBitRunReader{bitutil.NewBitmapReader(data, offset, length)} 74 pos := 0 75 for pos < length { 76 br := reader.NextRun() 77 if br.Set { 78 ret = append(ret, utils.SetBitRun{int64(pos), br.Len}) 79 } 80 pos += int(br.Len) 81 } 82 return 83 } 84 85 type BitSetRunReaderSuite struct { 86 suite.Suite 87 88 testOffsets []int64 89 } 90 91 func TestBitSetRunReader(t *testing.T) { 92 suite.Run(t, new(BitSetRunReaderSuite)) 93 } 94 95 func (br *BitSetRunReaderSuite) SetupSuite() { 96 br.testOffsets = []int64{0, 1, 6, 7, 8, 33, 63, 64, 65, 71} 97 br.T().Parallel() 98 } 99 100 type Range struct { 101 Offset int64 102 Len int64 103 } 104 105 func (r Range) EndOffset() int64 { return r.Offset + r.Len } 106 107 func (br *BitSetRunReaderSuite) bufferTestRanges(buf []byte) []Range { 108 bufSize := int64(len(buf) * 8) // in bits 109 rg := make([]Range, 0) 110 for _, offset := range br.testOffsets { 111 for _, lenAdjust := range br.testOffsets { 112 length := utils.Min(bufSize-offset, lenAdjust) 113 br.GreaterOrEqual(length, int64(0)) 114 rg = append(rg, Range{offset, length}) 115 length = utils.Min(bufSize-offset, bufSize-lenAdjust) 116 br.GreaterOrEqual(length, int64(0)) 117 rg = append(rg, Range{offset, length}) 118 } 119 } 120 return rg 121 } 122 123 func (br *BitSetRunReaderSuite) assertBitRuns(buf []byte, start, length int64, expected []utils.SetBitRun) { 124 { 125 runs := make([]utils.SetBitRun, 0) 126 reader := utils.NewSetBitRunReader(buf, start, length) 127 for { 128 run := reader.NextRun() 129 if run.Length == 0 { 130 break 131 } 132 runs = append(runs, run) 133 } 134 br.Equal(expected, runs) 135 } 136 { 137 runs := make([]utils.SetBitRun, 0) 138 reader := utils.NewReverseSetBitRunReader(buf, start, length) 139 for { 140 run := reader.NextRun() 141 if run.Length == 0 { 142 break 143 } 144 runs = append(runs, run) 145 } 146 reverseAny(expected) 147 br.Equal(expected, runs) 148 } 149 } 150 151 func (br *BitSetRunReaderSuite) TestEmpty() { 152 for _, offset := range br.testOffsets { 153 br.assertBitRuns(nil, offset, 0, []utils.SetBitRun{}) 154 } 155 } 156 157 func (br *BitSetRunReaderSuite) TestOneByte() { 158 buffer := bitmapFromString("01101101") 159 br.assertBitRuns(buffer, 0, 8, []utils.SetBitRun{ 160 {1, 2}, {4, 2}, {7, 1}, 161 }) 162 163 for _, str := range []string{"01101101", "10110110", "00000000", "11111111"} { 164 buf := bitmapFromString(str) 165 for offset := 0; offset < 8; offset++ { 166 for length := 0; length <= 8-offset; length++ { 167 expected := referenceBitRuns(buf, offset, length) 168 br.assertBitRuns(buf, int64(offset), int64(length), expected) 169 } 170 } 171 } 172 } 173 174 func (br *BitSetRunReaderSuite) TestTiny() { 175 buf := bitmapFromString("11100011 10001110 00111000 11100011 10001110 00111000") 176 177 br.assertBitRuns(buf, 0, 48, []utils.SetBitRun{ 178 {0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3}, 179 }) 180 br.assertBitRuns(buf, 0, 46, []utils.SetBitRun{ 181 {0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3}, 182 }) 183 br.assertBitRuns(buf, 0, 45, []utils.SetBitRun{ 184 {0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3}, 185 }) 186 br.assertBitRuns(buf, 0, 42, []utils.SetBitRun{ 187 {0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, 188 }) 189 br.assertBitRuns(buf, 3, 45, []utils.SetBitRun{ 190 {3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3}, 191 }) 192 br.assertBitRuns(buf, 3, 43, []utils.SetBitRun{ 193 {3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3}, 194 }) 195 br.assertBitRuns(buf, 3, 42, []utils.SetBitRun{ 196 {3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3}, 197 }) 198 br.assertBitRuns(buf, 3, 39, []utils.SetBitRun{ 199 {3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, 200 }) 201 } 202 203 func (br *BitSetRunReaderSuite) TestAllZeros() { 204 const bufferSize = 256 205 buf := make([]byte, int(bitutil.BytesForBits(bufferSize))) 206 207 for _, rg := range br.bufferTestRanges(buf) { 208 br.assertBitRuns(buf, rg.Offset, rg.Len, []utils.SetBitRun{}) 209 } 210 } 211 212 func (br *BitSetRunReaderSuite) TestAllOnes() { 213 const bufferSize = 256 214 buf := make([]byte, int(bitutil.BytesForBits(bufferSize))) 215 bitutil.SetBitsTo(buf, 0, bufferSize, true) 216 217 for _, rg := range br.bufferTestRanges(buf) { 218 if rg.Len > 0 { 219 br.assertBitRuns(buf, rg.Offset, rg.Len, []utils.SetBitRun{{0, rg.Len}}) 220 } else { 221 br.assertBitRuns(buf, rg.Offset, rg.Len, []utils.SetBitRun{}) 222 } 223 } 224 } 225 226 func (br *BitSetRunReaderSuite) TestSmall() { 227 // ones then zeros then ones 228 const ( 229 bufferSize = 256 230 onesLen = 64 231 secondOnesStart = bufferSize - onesLen 232 ) 233 234 buf := make([]byte, int(bitutil.BytesForBits(bufferSize))) 235 bitutil.SetBitsTo(buf, 0, bufferSize, false) 236 bitutil.SetBitsTo(buf, 0, onesLen, true) 237 bitutil.SetBitsTo(buf, secondOnesStart, onesLen, true) 238 239 for _, rg := range br.bufferTestRanges(buf) { 240 expected := []utils.SetBitRun{} 241 if rg.Offset < onesLen && rg.Len > 0 { 242 expected = append(expected, utils.SetBitRun{0, utils.Min(onesLen-rg.Offset, rg.Len)}) 243 } 244 if rg.Offset+rg.Len > secondOnesStart { 245 expected = append(expected, utils.SetBitRun{secondOnesStart - rg.Offset, rg.Len + rg.Offset - secondOnesStart}) 246 } 247 br.assertBitRuns(buf, rg.Offset, rg.Len, expected) 248 } 249 } 250 251 func (br *BitSetRunReaderSuite) TestSingleRun() { 252 // one single run of ones, at varying places in the buffer 253 const bufferSize = 512 254 buf := make([]byte, int(bitutil.BytesForBits(bufferSize))) 255 256 for _, onesRg := range br.bufferTestRanges(buf) { 257 bitutil.SetBitsTo(buf, 0, bufferSize, false) 258 bitutil.SetBitsTo(buf, onesRg.Offset, onesRg.Len, true) 259 260 for _, rg := range br.bufferTestRanges(buf) { 261 expect := []utils.SetBitRun{} 262 if rg.Len != 0 && onesRg.Len != 0 && rg.Offset < onesRg.EndOffset() && onesRg.Offset < rg.EndOffset() { 263 // the two ranges intersect 264 var ( 265 intersectStart = utils.Max(rg.Offset, onesRg.Offset) 266 intersectStop = utils.Min(rg.EndOffset(), onesRg.EndOffset()) 267 ) 268 expect = append(expect, utils.SetBitRun{intersectStart - rg.Offset, intersectStop - intersectStart}) 269 } 270 br.assertBitRuns(buf, rg.Offset, rg.Len, expect) 271 } 272 } 273 }