github.com/apache/arrow/go/v14@v14.0.2/internal/bitutils/bit_set_run_reader_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package bitutils_test
    18  
    19  import (
    20  	"reflect"
    21  	"testing"
    22  
    23  	"github.com/apache/arrow/go/v14/arrow/bitutil"
    24  	"github.com/apache/arrow/go/v14/internal/bitutils"
    25  	"github.com/apache/arrow/go/v14/internal/utils"
    26  	"github.com/stretchr/testify/suite"
    27  )
    28  
    29  func reverseAny(s interface{}) {
    30  	n := reflect.ValueOf(s).Len()
    31  	swap := reflect.Swapper(s)
    32  	for i, j := 0, n-1; i < j; i, j = i+1, j-1 {
    33  		swap(i, j)
    34  	}
    35  }
    36  
    37  type linearBitRunReader struct {
    38  	reader *bitutil.BitmapReader
    39  }
    40  
    41  func (l linearBitRunReader) NextRun() bitutils.BitRun {
    42  	r := bitutils.BitRun{0, l.reader.Set()}
    43  	for l.reader.Pos() < l.reader.Len() && l.reader.Set() == r.Set {
    44  		r.Len++
    45  		l.reader.Next()
    46  	}
    47  	return r
    48  }
    49  
    50  func bitmapFromString(s string) []byte {
    51  	maxLen := bitutil.BytesForBits(int64(len(s)))
    52  	ret := make([]byte, maxLen)
    53  	i := 0
    54  	for _, c := range s {
    55  		switch c {
    56  		case '0':
    57  			bitutil.ClearBit(ret, i)
    58  			i++
    59  		case '1':
    60  			bitutil.SetBit(ret, i)
    61  			i++
    62  		case ' ', '\t', '\r', '\n':
    63  		default:
    64  			panic("unexpected character for bitmap string")
    65  		}
    66  	}
    67  
    68  	actualLen := bitutil.BytesForBits(int64(i))
    69  	return ret[:actualLen]
    70  }
    71  
    72  func referenceBitRuns(data []byte, offset, length int) (ret []bitutils.SetBitRun) {
    73  	ret = make([]bitutils.SetBitRun, 0)
    74  	reader := linearBitRunReader{bitutil.NewBitmapReader(data, offset, length)}
    75  	pos := 0
    76  	for pos < length {
    77  		br := reader.NextRun()
    78  		if br.Set {
    79  			ret = append(ret, bitutils.SetBitRun{int64(pos), br.Len})
    80  		}
    81  		pos += int(br.Len)
    82  	}
    83  	return
    84  }
    85  
    86  type BitSetRunReaderSuite struct {
    87  	suite.Suite
    88  
    89  	testOffsets []int64
    90  }
    91  
    92  func TestBitSetRunReader(t *testing.T) {
    93  	suite.Run(t, new(BitSetRunReaderSuite))
    94  }
    95  
    96  func (br *BitSetRunReaderSuite) SetupSuite() {
    97  	br.testOffsets = []int64{0, 1, 6, 7, 8, 33, 63, 64, 65, 71}
    98  	br.T().Parallel()
    99  }
   100  
   101  type Range struct {
   102  	Offset int64
   103  	Len    int64
   104  }
   105  
   106  func (r Range) EndOffset() int64 { return r.Offset + r.Len }
   107  
   108  func (br *BitSetRunReaderSuite) bufferTestRanges(buf []byte) []Range {
   109  	bufSize := int64(len(buf) * 8) // in bits
   110  	rg := make([]Range, 0)
   111  	for _, offset := range br.testOffsets {
   112  		for _, lenAdjust := range br.testOffsets {
   113  			length := utils.Min(bufSize-offset, lenAdjust)
   114  			br.GreaterOrEqual(length, int64(0))
   115  			rg = append(rg, Range{offset, length})
   116  			length = utils.Min(bufSize-offset, bufSize-lenAdjust)
   117  			br.GreaterOrEqual(length, int64(0))
   118  			rg = append(rg, Range{offset, length})
   119  		}
   120  	}
   121  	return rg
   122  }
   123  
   124  func (br *BitSetRunReaderSuite) assertBitRuns(buf []byte, start, length int64, expected []bitutils.SetBitRun) {
   125  	{
   126  		runs := make([]bitutils.SetBitRun, 0)
   127  		reader := bitutils.NewSetBitRunReader(buf, start, length)
   128  		for {
   129  			run := reader.NextRun()
   130  			if run.Length == 0 {
   131  				break
   132  			}
   133  			runs = append(runs, run)
   134  		}
   135  		br.Equal(expected, runs)
   136  	}
   137  	{
   138  		runs := make([]bitutils.SetBitRun, 0)
   139  		reader := bitutils.NewReverseSetBitRunReader(buf, start, length)
   140  		for {
   141  			run := reader.NextRun()
   142  			if run.Length == 0 {
   143  				break
   144  			}
   145  			runs = append(runs, run)
   146  		}
   147  		reverseAny(expected)
   148  		br.Equal(expected, runs)
   149  	}
   150  }
   151  
   152  func (br *BitSetRunReaderSuite) TestEmpty() {
   153  	for _, offset := range br.testOffsets {
   154  		br.assertBitRuns(nil, offset, 0, []bitutils.SetBitRun{})
   155  	}
   156  }
   157  
   158  func (br *BitSetRunReaderSuite) TestOneByte() {
   159  	buffer := bitmapFromString("01101101")
   160  	br.assertBitRuns(buffer, 0, 8, []bitutils.SetBitRun{
   161  		{1, 2}, {4, 2}, {7, 1},
   162  	})
   163  
   164  	for _, str := range []string{"01101101", "10110110", "00000000", "11111111"} {
   165  		buf := bitmapFromString(str)
   166  		for offset := 0; offset < 8; offset++ {
   167  			for length := 0; length <= 8-offset; length++ {
   168  				expected := referenceBitRuns(buf, offset, length)
   169  				br.assertBitRuns(buf, int64(offset), int64(length), expected)
   170  			}
   171  		}
   172  	}
   173  }
   174  
   175  func (br *BitSetRunReaderSuite) TestTiny() {
   176  	buf := bitmapFromString("11100011 10001110 00111000 11100011 10001110 00111000")
   177  
   178  	br.assertBitRuns(buf, 0, 48, []bitutils.SetBitRun{
   179  		{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3},
   180  	})
   181  	br.assertBitRuns(buf, 0, 46, []bitutils.SetBitRun{
   182  		{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3},
   183  	})
   184  	br.assertBitRuns(buf, 0, 45, []bitutils.SetBitRun{
   185  		{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3},
   186  	})
   187  	br.assertBitRuns(buf, 0, 42, []bitutils.SetBitRun{
   188  		{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3},
   189  	})
   190  	br.assertBitRuns(buf, 3, 45, []bitutils.SetBitRun{
   191  		{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3},
   192  	})
   193  	br.assertBitRuns(buf, 3, 43, []bitutils.SetBitRun{
   194  		{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3},
   195  	})
   196  	br.assertBitRuns(buf, 3, 42, []bitutils.SetBitRun{
   197  		{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3},
   198  	})
   199  	br.assertBitRuns(buf, 3, 39, []bitutils.SetBitRun{
   200  		{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3},
   201  	})
   202  }
   203  
   204  func (br *BitSetRunReaderSuite) TestAllZeros() {
   205  	const bufferSize = 256
   206  	buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
   207  
   208  	for _, rg := range br.bufferTestRanges(buf) {
   209  		br.assertBitRuns(buf, rg.Offset, rg.Len, []bitutils.SetBitRun{})
   210  	}
   211  }
   212  
   213  func (br *BitSetRunReaderSuite) TestAllOnes() {
   214  	const bufferSize = 256
   215  	buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
   216  	bitutil.SetBitsTo(buf, 0, bufferSize, true)
   217  
   218  	for _, rg := range br.bufferTestRanges(buf) {
   219  		if rg.Len > 0 {
   220  			br.assertBitRuns(buf, rg.Offset, rg.Len, []bitutils.SetBitRun{{0, rg.Len}})
   221  		} else {
   222  			br.assertBitRuns(buf, rg.Offset, rg.Len, []bitutils.SetBitRun{})
   223  		}
   224  	}
   225  }
   226  
   227  func (br *BitSetRunReaderSuite) TestSmall() {
   228  	// ones then zeros then ones
   229  	const (
   230  		bufferSize      = 256
   231  		onesLen         = 64
   232  		secondOnesStart = bufferSize - onesLen
   233  	)
   234  
   235  	buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
   236  	bitutil.SetBitsTo(buf, 0, bufferSize, false)
   237  	bitutil.SetBitsTo(buf, 0, onesLen, true)
   238  	bitutil.SetBitsTo(buf, secondOnesStart, onesLen, true)
   239  
   240  	for _, rg := range br.bufferTestRanges(buf) {
   241  		expected := []bitutils.SetBitRun{}
   242  		if rg.Offset < onesLen && rg.Len > 0 {
   243  			expected = append(expected, bitutils.SetBitRun{0, utils.Min(onesLen-rg.Offset, rg.Len)})
   244  		}
   245  		if rg.Offset+rg.Len > secondOnesStart {
   246  			expected = append(expected, bitutils.SetBitRun{secondOnesStart - rg.Offset, rg.Len + rg.Offset - secondOnesStart})
   247  		}
   248  		br.assertBitRuns(buf, rg.Offset, rg.Len, expected)
   249  	}
   250  }
   251  
   252  func (br *BitSetRunReaderSuite) TestSingleRun() {
   253  	// one single run of ones, at varying places in the buffer
   254  	const bufferSize = 512
   255  	buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
   256  
   257  	for _, onesRg := range br.bufferTestRanges(buf) {
   258  		bitutil.SetBitsTo(buf, 0, bufferSize, false)
   259  		bitutil.SetBitsTo(buf, onesRg.Offset, onesRg.Len, true)
   260  
   261  		for _, rg := range br.bufferTestRanges(buf) {
   262  			expect := []bitutils.SetBitRun{}
   263  			if rg.Len != 0 && onesRg.Len != 0 && rg.Offset < onesRg.EndOffset() && onesRg.Offset < rg.EndOffset() {
   264  				// the two ranges intersect
   265  				var (
   266  					intersectStart = utils.Max(rg.Offset, onesRg.Offset)
   267  					intersectStop  = utils.Min(rg.EndOffset(), onesRg.EndOffset())
   268  				)
   269  				expect = append(expect, bitutils.SetBitRun{intersectStart - rg.Offset, intersectStop - intersectStart})
   270  			}
   271  			br.assertBitRuns(buf, rg.Offset, rg.Len, expect)
   272  		}
   273  	}
   274  }