github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/utils/bit_set_run_reader_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package utils_test
    18  
    19  import (
    20  	"reflect"
    21  	"testing"
    22  
    23  	"github.com/apache/arrow/go/v7/arrow/bitutil"
    24  	"github.com/apache/arrow/go/v7/parquet/internal/utils"
    25  	"github.com/stretchr/testify/suite"
    26  )
    27  
    28  func reverseAny(s interface{}) {
    29  	n := reflect.ValueOf(s).Len()
    30  	swap := reflect.Swapper(s)
    31  	for i, j := 0, n-1; i < j; i, j = i+1, j-1 {
    32  		swap(i, j)
    33  	}
    34  }
    35  
    36  type linearBitRunReader struct {
    37  	reader *bitutil.BitmapReader
    38  }
    39  
    40  func (l linearBitRunReader) NextRun() utils.BitRun {
    41  	r := utils.BitRun{0, l.reader.Set()}
    42  	for l.reader.Pos() < l.reader.Len() && l.reader.Set() == r.Set {
    43  		r.Len++
    44  		l.reader.Next()
    45  	}
    46  	return r
    47  }
    48  
    49  func bitmapFromString(s string) []byte {
    50  	maxLen := bitutil.BytesForBits(int64(len(s)))
    51  	ret := make([]byte, maxLen)
    52  	i := 0
    53  	for _, c := range s {
    54  		switch c {
    55  		case '0':
    56  			bitutil.ClearBit(ret, i)
    57  			i++
    58  		case '1':
    59  			bitutil.SetBit(ret, i)
    60  			i++
    61  		case ' ', '\t', '\r', '\n':
    62  		default:
    63  			panic("unexpected character for bitmap string")
    64  		}
    65  	}
    66  
    67  	actualLen := bitutil.BytesForBits(int64(i))
    68  	return ret[:actualLen]
    69  }
    70  
    71  func referenceBitRuns(data []byte, offset, length int) (ret []utils.SetBitRun) {
    72  	ret = make([]utils.SetBitRun, 0)
    73  	reader := linearBitRunReader{bitutil.NewBitmapReader(data, offset, length)}
    74  	pos := 0
    75  	for pos < length {
    76  		br := reader.NextRun()
    77  		if br.Set {
    78  			ret = append(ret, utils.SetBitRun{int64(pos), br.Len})
    79  		}
    80  		pos += int(br.Len)
    81  	}
    82  	return
    83  }
    84  
    85  type BitSetRunReaderSuite struct {
    86  	suite.Suite
    87  
    88  	testOffsets []int64
    89  }
    90  
    91  func TestBitSetRunReader(t *testing.T) {
    92  	suite.Run(t, new(BitSetRunReaderSuite))
    93  }
    94  
    95  func (br *BitSetRunReaderSuite) SetupSuite() {
    96  	br.testOffsets = []int64{0, 1, 6, 7, 8, 33, 63, 64, 65, 71}
    97  	br.T().Parallel()
    98  }
    99  
   100  type Range struct {
   101  	Offset int64
   102  	Len    int64
   103  }
   104  
   105  func (r Range) EndOffset() int64 { return r.Offset + r.Len }
   106  
   107  func (br *BitSetRunReaderSuite) bufferTestRanges(buf []byte) []Range {
   108  	bufSize := int64(len(buf) * 8) // in bits
   109  	rg := make([]Range, 0)
   110  	for _, offset := range br.testOffsets {
   111  		for _, lenAdjust := range br.testOffsets {
   112  			length := utils.Min(bufSize-offset, lenAdjust)
   113  			br.GreaterOrEqual(length, int64(0))
   114  			rg = append(rg, Range{offset, length})
   115  			length = utils.Min(bufSize-offset, bufSize-lenAdjust)
   116  			br.GreaterOrEqual(length, int64(0))
   117  			rg = append(rg, Range{offset, length})
   118  		}
   119  	}
   120  	return rg
   121  }
   122  
   123  func (br *BitSetRunReaderSuite) assertBitRuns(buf []byte, start, length int64, expected []utils.SetBitRun) {
   124  	{
   125  		runs := make([]utils.SetBitRun, 0)
   126  		reader := utils.NewSetBitRunReader(buf, start, length)
   127  		for {
   128  			run := reader.NextRun()
   129  			if run.Length == 0 {
   130  				break
   131  			}
   132  			runs = append(runs, run)
   133  		}
   134  		br.Equal(expected, runs)
   135  	}
   136  	{
   137  		runs := make([]utils.SetBitRun, 0)
   138  		reader := utils.NewReverseSetBitRunReader(buf, start, length)
   139  		for {
   140  			run := reader.NextRun()
   141  			if run.Length == 0 {
   142  				break
   143  			}
   144  			runs = append(runs, run)
   145  		}
   146  		reverseAny(expected)
   147  		br.Equal(expected, runs)
   148  	}
   149  }
   150  
   151  func (br *BitSetRunReaderSuite) TestEmpty() {
   152  	for _, offset := range br.testOffsets {
   153  		br.assertBitRuns(nil, offset, 0, []utils.SetBitRun{})
   154  	}
   155  }
   156  
   157  func (br *BitSetRunReaderSuite) TestOneByte() {
   158  	buffer := bitmapFromString("01101101")
   159  	br.assertBitRuns(buffer, 0, 8, []utils.SetBitRun{
   160  		{1, 2}, {4, 2}, {7, 1},
   161  	})
   162  
   163  	for _, str := range []string{"01101101", "10110110", "00000000", "11111111"} {
   164  		buf := bitmapFromString(str)
   165  		for offset := 0; offset < 8; offset++ {
   166  			for length := 0; length <= 8-offset; length++ {
   167  				expected := referenceBitRuns(buf, offset, length)
   168  				br.assertBitRuns(buf, int64(offset), int64(length), expected)
   169  			}
   170  		}
   171  	}
   172  }
   173  
   174  func (br *BitSetRunReaderSuite) TestTiny() {
   175  	buf := bitmapFromString("11100011 10001110 00111000 11100011 10001110 00111000")
   176  
   177  	br.assertBitRuns(buf, 0, 48, []utils.SetBitRun{
   178  		{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3},
   179  	})
   180  	br.assertBitRuns(buf, 0, 46, []utils.SetBitRun{
   181  		{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3},
   182  	})
   183  	br.assertBitRuns(buf, 0, 45, []utils.SetBitRun{
   184  		{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3},
   185  	})
   186  	br.assertBitRuns(buf, 0, 42, []utils.SetBitRun{
   187  		{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3},
   188  	})
   189  	br.assertBitRuns(buf, 3, 45, []utils.SetBitRun{
   190  		{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3},
   191  	})
   192  	br.assertBitRuns(buf, 3, 43, []utils.SetBitRun{
   193  		{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3},
   194  	})
   195  	br.assertBitRuns(buf, 3, 42, []utils.SetBitRun{
   196  		{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3},
   197  	})
   198  	br.assertBitRuns(buf, 3, 39, []utils.SetBitRun{
   199  		{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3},
   200  	})
   201  }
   202  
   203  func (br *BitSetRunReaderSuite) TestAllZeros() {
   204  	const bufferSize = 256
   205  	buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
   206  
   207  	for _, rg := range br.bufferTestRanges(buf) {
   208  		br.assertBitRuns(buf, rg.Offset, rg.Len, []utils.SetBitRun{})
   209  	}
   210  }
   211  
   212  func (br *BitSetRunReaderSuite) TestAllOnes() {
   213  	const bufferSize = 256
   214  	buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
   215  	bitutil.SetBitsTo(buf, 0, bufferSize, true)
   216  
   217  	for _, rg := range br.bufferTestRanges(buf) {
   218  		if rg.Len > 0 {
   219  			br.assertBitRuns(buf, rg.Offset, rg.Len, []utils.SetBitRun{{0, rg.Len}})
   220  		} else {
   221  			br.assertBitRuns(buf, rg.Offset, rg.Len, []utils.SetBitRun{})
   222  		}
   223  	}
   224  }
   225  
   226  func (br *BitSetRunReaderSuite) TestSmall() {
   227  	// ones then zeros then ones
   228  	const (
   229  		bufferSize      = 256
   230  		onesLen         = 64
   231  		secondOnesStart = bufferSize - onesLen
   232  	)
   233  
   234  	buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
   235  	bitutil.SetBitsTo(buf, 0, bufferSize, false)
   236  	bitutil.SetBitsTo(buf, 0, onesLen, true)
   237  	bitutil.SetBitsTo(buf, secondOnesStart, onesLen, true)
   238  
   239  	for _, rg := range br.bufferTestRanges(buf) {
   240  		expected := []utils.SetBitRun{}
   241  		if rg.Offset < onesLen && rg.Len > 0 {
   242  			expected = append(expected, utils.SetBitRun{0, utils.Min(onesLen-rg.Offset, rg.Len)})
   243  		}
   244  		if rg.Offset+rg.Len > secondOnesStart {
   245  			expected = append(expected, utils.SetBitRun{secondOnesStart - rg.Offset, rg.Len + rg.Offset - secondOnesStart})
   246  		}
   247  		br.assertBitRuns(buf, rg.Offset, rg.Len, expected)
   248  	}
   249  }
   250  
   251  func (br *BitSetRunReaderSuite) TestSingleRun() {
   252  	// one single run of ones, at varying places in the buffer
   253  	const bufferSize = 512
   254  	buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
   255  
   256  	for _, onesRg := range br.bufferTestRanges(buf) {
   257  		bitutil.SetBitsTo(buf, 0, bufferSize, false)
   258  		bitutil.SetBitsTo(buf, onesRg.Offset, onesRg.Len, true)
   259  
   260  		for _, rg := range br.bufferTestRanges(buf) {
   261  			expect := []utils.SetBitRun{}
   262  			if rg.Len != 0 && onesRg.Len != 0 && rg.Offset < onesRg.EndOffset() && onesRg.Offset < rg.EndOffset() {
   263  				// the two ranges intersect
   264  				var (
   265  					intersectStart = utils.Max(rg.Offset, onesRg.Offset)
   266  					intersectStop  = utils.Min(rg.EndOffset(), onesRg.EndOffset())
   267  				)
   268  				expect = append(expect, utils.SetBitRun{intersectStart - rg.Offset, intersectStop - intersectStart})
   269  			}
   270  			br.assertBitRuns(buf, rg.Offset, rg.Len, expect)
   271  		}
   272  	}
   273  }