github.com/apache/arrow/go/v14@v14.0.2/parquet/internal/utils/bitmap_writer_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package utils_test
    18  
    19  import (
    20  	"fmt"
    21  	"reflect"
    22  	"strings"
    23  	"testing"
    24  
    25  	"github.com/apache/arrow/go/v14/arrow/bitutil"
    26  	"github.com/apache/arrow/go/v14/parquet/internal/utils"
    27  	"github.com/stretchr/testify/suite"
    28  )
    29  
    30  func writeSliceToWriter(wr utils.BitmapWriter, values []int) {
    31  	for _, v := range values {
    32  		if v != 0 {
    33  			wr.Set()
    34  		} else {
    35  			wr.Clear()
    36  		}
    37  		wr.Next()
    38  	}
    39  	wr.Finish()
    40  }
    41  
    42  type FirstTimeBitmapWriterSuite struct {
    43  	suite.Suite
    44  }
    45  
    46  func (f *FirstTimeBitmapWriterSuite) TestNormalOperation() {
    47  	for _, fb := range []byte{0x00, 0xFF} {
    48  		{
    49  			bitmap := []byte{fb, fb, fb, fb}
    50  			wr := utils.NewFirstTimeBitmapWriter(bitmap, 0, 12)
    51  			writeSliceToWriter(wr, []int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1})
    52  			// {0b00110110, 0b1010, 0, 0}
    53  			f.Equal([]byte{0x36, 0x0a}, bitmap[:2])
    54  		}
    55  		{
    56  			bitmap := []byte{fb, fb, fb, fb}
    57  			wr := utils.NewFirstTimeBitmapWriter(bitmap, 4, 12)
    58  			writeSliceToWriter(wr, []int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1})
    59  			// {0b00110110, 0b1010, 0, 0}
    60  			f.Equal([]byte{0x60 | (fb & 0x0f), 0xa3}, bitmap[:2])
    61  		}
    62  		// Consecutive write chunks
    63  		{
    64  			bitmap := []byte{fb, fb, fb, fb}
    65  			{
    66  				wr := utils.NewFirstTimeBitmapWriter(bitmap, 0, 6)
    67  				writeSliceToWriter(wr, []int{0, 1, 1, 0, 1, 1})
    68  			}
    69  			{
    70  				wr := utils.NewFirstTimeBitmapWriter(bitmap, 6, 3)
    71  				writeSliceToWriter(wr, []int{0, 0, 0})
    72  			}
    73  			{
    74  				wr := utils.NewFirstTimeBitmapWriter(bitmap, 9, 3)
    75  				writeSliceToWriter(wr, []int{1, 0, 1})
    76  			}
    77  			f.Equal([]byte{0x36, 0x0a}, bitmap[:2])
    78  		}
    79  		{
    80  			bitmap := []byte{fb, fb, fb, fb}
    81  			{
    82  				wr := utils.NewFirstTimeBitmapWriter(bitmap, 4, 0)
    83  				writeSliceToWriter(wr, []int{})
    84  			}
    85  			{
    86  				wr := utils.NewFirstTimeBitmapWriter(bitmap, 4, 6)
    87  				writeSliceToWriter(wr, []int{0, 1, 1, 0, 1, 1})
    88  			}
    89  			{
    90  				wr := utils.NewFirstTimeBitmapWriter(bitmap, 10, 3)
    91  				writeSliceToWriter(wr, []int{0, 0, 0})
    92  			}
    93  			{
    94  				wr := utils.NewFirstTimeBitmapWriter(bitmap, 13, 0)
    95  				writeSliceToWriter(wr, []int{})
    96  			}
    97  			{
    98  				wr := utils.NewFirstTimeBitmapWriter(bitmap, 13, 3)
    99  				writeSliceToWriter(wr, []int{1, 0, 1})
   100  			}
   101  			f.Equal([]byte{0x60 | (fb & 0x0f), 0xa3}, bitmap[:2])
   102  		}
   103  	}
   104  }
   105  
   106  func bitmapToString(bitmap []byte, bitCount int64) string {
   107  	var bld strings.Builder
   108  	bld.Grow(int(bitCount))
   109  	for i := 0; i < int(bitCount); i++ {
   110  		if bitutil.BitIsSet(bitmap, i) {
   111  			bld.WriteByte('1')
   112  		} else {
   113  			bld.WriteByte('0')
   114  		}
   115  	}
   116  	return bld.String()
   117  }
   118  
   119  func (f *FirstTimeBitmapWriterSuite) TestAppendWordOffsetOverwritesCorrectBits() {
   120  	check := func(start byte, expectedBits string, offset int64) {
   121  		validBits := []byte{start}
   122  		const bitsAfterAppend = 8
   123  		wr := utils.NewFirstTimeBitmapWriter(validBits, offset, int64(8*len(validBits))-offset)
   124  		wr.AppendWord(0xFF, bitsAfterAppend-offset)
   125  		wr.Finish()
   126  		f.Equal(expectedBits, bitmapToString(validBits, bitsAfterAppend))
   127  	}
   128  
   129  	f.Run("CheckAppend", func() {
   130  		tests := []struct {
   131  			expectedBits string
   132  			offset       int64
   133  		}{
   134  			{"11111111", 0},
   135  			{"01111111", 1},
   136  			{"00111111", 2},
   137  			{"00011111", 3},
   138  			{"00001111", 4},
   139  			{"00000111", 5},
   140  			{"00000011", 6},
   141  			{"00000001", 7},
   142  		}
   143  		for _, tt := range tests {
   144  			f.Run(tt.expectedBits, func() { check(0x00, tt.expectedBits, tt.offset) })
   145  		}
   146  	})
   147  
   148  	f.Run("CheckWithSet", func() {
   149  		tests := []struct {
   150  			expectedBits string
   151  			offset       int64
   152  		}{
   153  			{"11111111", 1},
   154  			{"10111111", 2},
   155  			{"10011111", 3},
   156  			{"10001111", 4},
   157  			{"10000111", 5},
   158  			{"10000011", 6},
   159  			{"10000001", 7},
   160  		}
   161  		for _, tt := range tests {
   162  			f.Run(tt.expectedBits, func() { check(0x1, tt.expectedBits, tt.offset) })
   163  		}
   164  	})
   165  
   166  	f.Run("CheckWithPreceding", func() {
   167  		tests := []struct {
   168  			expectedBits string
   169  			offset       int64
   170  		}{
   171  			{"11111111", 0},
   172  			{"11111111", 1},
   173  			{"11111111", 2},
   174  			{"11111111", 3},
   175  			{"11111111", 4},
   176  			{"11111111", 5},
   177  			{"11111111", 6},
   178  			{"11111111", 7},
   179  		}
   180  		for _, tt := range tests {
   181  			f.Run(fmt.Sprintf("%d", tt.offset), func() { check(0xFF, tt.expectedBits, tt.offset) })
   182  		}
   183  	})
   184  }
   185  
   186  func (f *FirstTimeBitmapWriterSuite) TestAppendZeroBitsNoImpact() {
   187  	validBits := []byte{0x00}
   188  	wr := utils.NewFirstTimeBitmapWriter(validBits, 1, int64(len(validBits)*8))
   189  	wr.AppendWord(0xFF, 0)
   190  	wr.AppendWord(0xFF, 0)
   191  	wr.AppendWord(0x01, 1)
   192  	wr.Finish()
   193  	f.Equal(uint8(0x2), validBits[0])
   194  }
   195  
   196  func (f *FirstTimeBitmapWriterSuite) TestAppendLessThanByte() {
   197  	{
   198  		validBits := make([]byte, 8)
   199  		wr := utils.NewFirstTimeBitmapWriter(validBits, 1, 8)
   200  		wr.AppendWord(0xB, 4)
   201  		wr.Finish()
   202  		f.Equal("01101000", bitmapToString(validBits, 8))
   203  	}
   204  	{
   205  		// test with all bits initially set
   206  		validBits := []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}
   207  		wr := utils.NewFirstTimeBitmapWriter(validBits, 1, 8)
   208  		wr.AppendWord(0xB, 4)
   209  		wr.Finish()
   210  		f.Equal("11101000", bitmapToString(validBits, 8))
   211  	}
   212  }
   213  
   214  func (f *FirstTimeBitmapWriterSuite) TestAppendByteThenMore() {
   215  	{
   216  		validBits := make([]byte, 8)
   217  		wr := utils.NewFirstTimeBitmapWriter(validBits, 0, 9)
   218  		wr.AppendWord(0xC3, 8)
   219  		wr.AppendWord(0x01, 1)
   220  		wr.Finish()
   221  		f.Equal("110000111", bitmapToString(validBits, 9))
   222  	}
   223  	{
   224  		// test with all bits initially set
   225  		validBits := []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}
   226  		wr := utils.NewFirstTimeBitmapWriter(validBits, 0, 9)
   227  		wr.AppendWord(0xC3, 8)
   228  		wr.AppendWord(0x01, 1)
   229  		wr.Finish()
   230  		f.Equal("110000111", bitmapToString(validBits, 9))
   231  	}
   232  }
   233  
   234  func (f *FirstTimeBitmapWriterSuite) TestAppendWordShiftBitsCorrectly() {
   235  	const pattern = 0x9A9A9A9A9A9A9A9A
   236  
   237  	tests := []struct {
   238  		leadingBits      string
   239  		middleBits       string
   240  		trailingBits     string
   241  		offset           int64
   242  		presetBufferBits bool
   243  	}{
   244  		{"01011001", "01011001", "00000000", 8, false},
   245  		{"00101100", "10101100", "10000000", 9, false},
   246  		{"00010110", "01010110", "01000000", 10, false},
   247  		{"00001011", "00101011", "00100000", 11, false},
   248  		{"00000101", "10010101", "10010000", 12, false},
   249  		{"00000010", "11001010", "11001000", 13, false},
   250  		{"00000001", "01100101", "01100100", 14, false},
   251  		{"00000000", "10110010", "10110010", 15, false},
   252  		{"01011001", "01011001", "11111111", 8, true},
   253  		{"10101100", "10101100", "10000000", 9, true},
   254  		{"11010110", "01010110", "01000000", 10, true},
   255  		{"11101011", "00101011", "00100000", 11, true},
   256  		{"11110101", "10010101", "10010000", 12, true},
   257  		{"11111010", "11001010", "11001000", 13, true},
   258  		{"11111101", "01100101", "01100100", 14, true},
   259  		{"11111110", "10110010", "10110010", 15, true},
   260  	}
   261  	for _, tt := range tests {
   262  		f.Run(tt.leadingBits, func() {
   263  			f.Require().GreaterOrEqual(tt.offset, int64(8))
   264  			validBits := make([]byte, 10)
   265  			if tt.presetBufferBits {
   266  				for idx := range validBits {
   267  					validBits[idx] = 0xFF
   268  				}
   269  			}
   270  
   271  			validBits[0] = 0x99
   272  			wr := utils.NewFirstTimeBitmapWriter(validBits, tt.offset, (9*int64(reflect.TypeOf(uint64(0)).Size()))-tt.offset)
   273  			wr.AppendWord(pattern, 64)
   274  			wr.Finish()
   275  			f.Equal(uint8(0x99), validBits[0])
   276  			f.Equal(tt.leadingBits, bitmapToString(validBits[1:], 8))
   277  			for x := 2; x < 9; x++ {
   278  				f.Equal(tt.middleBits, bitmapToString(validBits[x:], 8))
   279  			}
   280  			f.Equal(tt.trailingBits, bitmapToString(validBits[9:], 8))
   281  		})
   282  	}
   283  }
   284  
   285  func (f *FirstTimeBitmapWriterSuite) TestAppendWordOnlyAppropriateBytesWritten() {
   286  	validBits := []byte{0x00, 0x00}
   287  	bitmap := uint64(0x1FF)
   288  	{
   289  		wr := utils.NewFirstTimeBitmapWriter(validBits, 1, int64(8*len(validBits))-1)
   290  		wr.AppendWord(bitmap, 7)
   291  		wr.Finish()
   292  		f.Equal([]byte{0xFE, 0x00}, validBits)
   293  	}
   294  	{
   295  		wr := utils.NewFirstTimeBitmapWriter(validBits, 1, int64(8*len(validBits)-1))
   296  		wr.AppendWord(bitmap, 8)
   297  		wr.Finish()
   298  		f.Equal([]byte{0xFE, 0x03}, validBits)
   299  	}
   300  }
   301  
   302  func TestFirstTimeBitmapWriter(t *testing.T) {
   303  	suite.Run(t, new(FirstTimeBitmapWriterSuite))
   304  }