github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/utils/bit_block_counter_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package utils_test
    18  
    19  import (
    20  	"testing"
    21  
    22  	"github.com/apache/arrow/go/v7/arrow/bitutil"
    23  	"github.com/apache/arrow/go/v7/arrow/memory"
    24  	"github.com/apache/arrow/go/v7/parquet/internal/utils"
    25  	"github.com/stretchr/testify/assert"
    26  	"golang.org/x/exp/rand"
    27  )
    28  
    29  const kWordSize = 64
    30  
    31  func create(nbytes, offset, length int64) (*memory.Buffer, *utils.BitBlockCounter) {
    32  	buf := memory.NewResizableBuffer(memory.DefaultAllocator)
    33  	buf.Resize(int(nbytes))
    34  	return buf, utils.NewBitBlockCounter(buf.Bytes(), offset, length)
    35  }
    36  
    37  func TestOneWordBasics(t *testing.T) {
    38  	const nbytes = 1024
    39  
    40  	buf, counter := create(nbytes, 0, nbytes*8)
    41  	defer buf.Release()
    42  
    43  	var bitsScanned int64
    44  	for i := 0; i < nbytes/8; i++ {
    45  		block := counter.NextWord()
    46  		assert.EqualValues(t, kWordSize, block.Len)
    47  		assert.EqualValues(t, 0, block.Popcnt)
    48  		bitsScanned += int64(block.Len)
    49  	}
    50  	assert.EqualValues(t, 1024*8, bitsScanned)
    51  
    52  	block := counter.NextWord()
    53  	assert.Zero(t, block.Len)
    54  	assert.Zero(t, block.Popcnt)
    55  	assert.True(t, block.NoneSet())
    56  }
    57  
    58  func TestFourWordsBasics(t *testing.T) {
    59  	const nbytes = 1024
    60  
    61  	buf, counter := create(nbytes, 0, nbytes*8)
    62  	defer buf.Release()
    63  
    64  	var bitsScanned int64
    65  	for i := 0; i < nbytes/32; i++ {
    66  		block := counter.NextFourWords()
    67  		assert.EqualValues(t, 4*kWordSize, block.Len)
    68  		assert.EqualValues(t, 0, block.Popcnt)
    69  		bitsScanned += int64(block.Len)
    70  	}
    71  	assert.EqualValues(t, 1024*8, bitsScanned)
    72  
    73  	block := counter.NextFourWords()
    74  	assert.Zero(t, block.Len)
    75  	assert.Zero(t, block.Popcnt)
    76  }
    77  
    78  func TestOneWordWithOffsets(t *testing.T) {
    79  	checkWithOffset := func(offset int64) {
    80  		const (
    81  			nwords     int64 = 4
    82  			totalBytes       = nwords*8 + 1
    83  		)
    84  
    85  		// Trim a bit from the end of the bitmap so we can check
    86  		// the remainder bits behavior
    87  		buf, counter := create(totalBytes, offset, nwords*kWordSize-offset-1)
    88  		defer buf.Release()
    89  
    90  		memory.Set(buf.Bytes(), byte(0xFF))
    91  
    92  		block := counter.NextWord()
    93  		assert.EqualValues(t, kWordSize, block.Len)
    94  		assert.EqualValues(t, 64, block.Popcnt)
    95  
    96  		// add a false value to the next word
    97  		bitutil.SetBitTo(buf.Bytes(), kWordSize+int(offset), false)
    98  		block = counter.NextWord()
    99  		assert.EqualValues(t, 64, block.Len)
   100  		assert.EqualValues(t, 63, block.Popcnt)
   101  
   102  		// Set the next word to all false
   103  		bitutil.SetBitsTo(buf.Bytes(), 2*kWordSize+offset, kWordSize, false)
   104  
   105  		block = counter.NextWord()
   106  		assert.EqualValues(t, 64, block.Len)
   107  		assert.Zero(t, block.Popcnt)
   108  
   109  		block = counter.NextWord()
   110  		assert.EqualValues(t, kWordSize-offset-1, block.Len)
   111  		assert.EqualValues(t, block.Len, block.Popcnt)
   112  		assert.True(t, block.AllSet())
   113  
   114  		// we can keep calling nextword safely
   115  		block = counter.NextWord()
   116  		assert.Zero(t, block.Len)
   117  		assert.Zero(t, block.Popcnt)
   118  	}
   119  
   120  	for offsetI := int64(0); offsetI < 8; offsetI++ {
   121  		checkWithOffset(offsetI)
   122  	}
   123  }
   124  
   125  func TestFourWordsWithOffsets(t *testing.T) {
   126  	checkWithOffset := func(offset int64) {
   127  		const (
   128  			nwords     = 17
   129  			totalBytes = nwords*8 + 1
   130  		)
   131  
   132  		// trim a bit from the end of the bitmap so we can check the remainder
   133  		// bits behavior
   134  		buf, counter := create(totalBytes, offset, nwords*kWordSize-offset-1)
   135  
   136  		// start with all set
   137  		memory.Set(buf.Bytes(), 0xFF)
   138  
   139  		block := counter.NextFourWords()
   140  		assert.EqualValues(t, 4*kWordSize, block.Len)
   141  		assert.EqualValues(t, block.Len, block.Popcnt)
   142  
   143  		// add some false values to the next 3 shifted words
   144  		bitutil.ClearBit(buf.Bytes(), int(4*kWordSize+offset))
   145  		bitutil.ClearBit(buf.Bytes(), int(5*kWordSize+offset))
   146  		bitutil.ClearBit(buf.Bytes(), int(6*kWordSize+offset))
   147  
   148  		block = counter.NextFourWords()
   149  		assert.EqualValues(t, 4*kWordSize, block.Len)
   150  		assert.EqualValues(t, 253, block.Popcnt)
   151  
   152  		// set the next two words to all false
   153  		bitutil.SetBitsTo(buf.Bytes(), 8*kWordSize+offset, 2*kWordSize, false)
   154  
   155  		// block is half set
   156  		block = counter.NextFourWords()
   157  		assert.EqualValues(t, 4*kWordSize, block.Len)
   158  		assert.EqualValues(t, 128, block.Popcnt)
   159  
   160  		// last full block whether offset or no
   161  		block = counter.NextFourWords()
   162  		assert.EqualValues(t, 4*kWordSize, block.Len)
   163  		assert.EqualValues(t, block.Len, block.Popcnt)
   164  
   165  		// partial block
   166  		block = counter.NextFourWords()
   167  		assert.EqualValues(t, kWordSize-offset-1, block.Len)
   168  		assert.EqualValues(t, block.Len, block.Popcnt)
   169  
   170  		// we can keep calling NextFourWords safely
   171  		block = counter.NextFourWords()
   172  		assert.Zero(t, block.Len)
   173  		assert.Zero(t, block.Popcnt)
   174  	}
   175  
   176  	for offsetI := int64(0); offsetI < 8; offsetI++ {
   177  		checkWithOffset(offsetI)
   178  	}
   179  }
   180  
   181  func TestFourWordsRandomData(t *testing.T) {
   182  	const (
   183  		nbytes = 1024
   184  	)
   185  
   186  	buf := make([]byte, nbytes)
   187  	r := rand.New(rand.NewSource(0))
   188  	r.Read(buf)
   189  
   190  	checkWithOffset := func(offset int64) {
   191  		counter := utils.NewBitBlockCounter(buf, offset, nbytes*8-offset)
   192  		for i := 0; i < nbytes/32; i++ {
   193  			block := counter.NextFourWords()
   194  			assert.EqualValues(t, bitutil.CountSetBits(buf, i*256+int(offset), int(block.Len)), block.Popcnt)
   195  		}
   196  	}
   197  
   198  	for offsetI := int64(0); offsetI < 8; offsetI++ {
   199  		checkWithOffset(offsetI)
   200  	}
   201  }