github.com/apache/arrow/go/v14@v14.0.1/parquet/internal/encoding/memo_table_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package encoding_test
    18  
    19  import (
    20  	"math"
    21  	"testing"
    22  
    23  	"github.com/apache/arrow/go/v14/arrow"
    24  	"github.com/apache/arrow/go/v14/arrow/array"
    25  	"github.com/apache/arrow/go/v14/arrow/memory"
    26  	"github.com/apache/arrow/go/v14/internal/hashing"
    27  	"github.com/apache/arrow/go/v14/parquet/internal/encoding"
    28  	"github.com/stretchr/testify/suite"
    29  )
    30  
    31  type MemoTableTestSuite struct {
    32  	suite.Suite
    33  }
    34  
    35  func TestMemoTable(t *testing.T) {
    36  	suite.Run(t, new(MemoTableTestSuite))
    37  }
    38  
    39  func (m *MemoTableTestSuite) assertGetNotFound(table encoding.MemoTable, v interface{}) {
    40  	_, ok := table.Get(v)
    41  	m.False(ok)
    42  }
    43  
    44  func (m *MemoTableTestSuite) assertGet(table encoding.MemoTable, v interface{}, expected int) {
    45  	idx, ok := table.Get(v)
    46  	m.Equal(expected, idx)
    47  	m.True(ok)
    48  }
    49  
    50  func (m *MemoTableTestSuite) assertGetOrInsert(table encoding.MemoTable, v interface{}, expected int) {
    51  	idx, _, err := table.GetOrInsert(v)
    52  	m.NoError(err)
    53  	m.Equal(expected, idx)
    54  }
    55  
    56  func (m *MemoTableTestSuite) assertGetNullNotFound(table encoding.MemoTable) {
    57  	_, ok := table.GetNull()
    58  	m.False(ok)
    59  }
    60  
    61  func (m *MemoTableTestSuite) assertGetNull(table encoding.MemoTable, expected int) {
    62  	idx, ok := table.GetNull()
    63  	m.Equal(expected, idx)
    64  	m.True(ok)
    65  }
    66  
    67  func (m *MemoTableTestSuite) assertGetOrInsertNull(table encoding.MemoTable, expected int) {
    68  	idx, _ := table.GetOrInsertNull()
    69  	m.Equal(expected, idx)
    70  }
    71  
    72  func (m *MemoTableTestSuite) TestInt64() {
    73  	const (
    74  		A int64 = 1234
    75  		B int64 = 0
    76  		C int64 = -98765321
    77  		D int64 = 12345678901234
    78  		E int64 = -1
    79  		F int64 = 1
    80  		G int64 = 9223372036854775807
    81  		H int64 = -9223372036854775807 - 1
    82  	)
    83  
    84  	// table := encoding.NewInt64MemoTable(nil)
    85  	table := hashing.NewInt64MemoTable(0)
    86  	m.Zero(table.Size())
    87  	m.assertGetNotFound(table, A)
    88  	m.assertGetNullNotFound(table)
    89  	m.assertGetOrInsert(table, A, 0)
    90  	m.assertGetNotFound(table, B)
    91  	m.assertGetOrInsert(table, B, 1)
    92  	m.assertGetOrInsert(table, C, 2)
    93  	m.assertGetOrInsert(table, D, 3)
    94  	m.assertGetOrInsert(table, E, 4)
    95  	m.assertGetOrInsertNull(table, 5)
    96  
    97  	m.assertGet(table, A, 0)
    98  	m.assertGetOrInsert(table, A, 0)
    99  	m.assertGet(table, E, 4)
   100  	m.assertGetOrInsert(table, E, 4)
   101  
   102  	m.assertGetOrInsert(table, F, 6)
   103  	m.assertGetOrInsert(table, G, 7)
   104  	m.assertGetOrInsert(table, H, 8)
   105  
   106  	m.assertGetOrInsert(table, G, 7)
   107  	m.assertGetOrInsert(table, F, 6)
   108  	m.assertGetOrInsertNull(table, 5)
   109  	m.assertGetOrInsert(table, E, 4)
   110  	m.assertGetOrInsert(table, D, 3)
   111  	m.assertGetOrInsert(table, C, 2)
   112  	m.assertGetOrInsert(table, B, 1)
   113  	m.assertGetOrInsert(table, A, 0)
   114  
   115  	const sz int = 9
   116  	m.Equal(sz, table.Size())
   117  	m.Panics(func() {
   118  		values := make([]int32, sz)
   119  		table.CopyValues(values)
   120  	}, "should panic because wrong type")
   121  	m.Panics(func() {
   122  		values := make([]int64, sz-3)
   123  		table.CopyValues(values)
   124  	}, "should panic because out of bounds")
   125  
   126  	{
   127  		values := make([]int64, sz)
   128  		table.CopyValues(values)
   129  		m.Equal([]int64{A, B, C, D, E, 0, F, G, H}, values)
   130  	}
   131  	{
   132  		const offset = 3
   133  		values := make([]int64, sz-offset)
   134  		table.CopyValuesSubset(offset, values)
   135  		m.Equal([]int64{D, E, 0, F, G, H}, values)
   136  	}
   137  }
   138  
   139  func (m *MemoTableTestSuite) TestFloat64() {
   140  	const (
   141  		A float64 = 0.0
   142  		B float64 = 1.5
   143  		C float64 = -0.1
   144  	)
   145  	var (
   146  		D = math.Inf(1)
   147  		E = -D
   148  		F = math.NaN()                                       // uses Quiet NaN i.e. 0x7FF8000000000001
   149  		G = math.Float64frombits(uint64(0x7FF0000000000001)) // test Signalling NaN
   150  		H = math.Float64frombits(uint64(0xFFF7FFFFFFFFFFFF)) // other NaN bit pattern
   151  	)
   152  
   153  	// table := encoding.NewFloat64MemoTable(nil)
   154  	table := hashing.NewFloat64MemoTable(0)
   155  	m.Zero(table.Size())
   156  	m.assertGetNotFound(table, A)
   157  	m.assertGetNullNotFound(table)
   158  	m.assertGetOrInsert(table, A, 0)
   159  	m.assertGetNotFound(table, B)
   160  	m.assertGetOrInsert(table, B, 1)
   161  	m.assertGetOrInsert(table, C, 2)
   162  	m.assertGetOrInsert(table, D, 3)
   163  	m.assertGetOrInsert(table, E, 4)
   164  	m.assertGetOrInsert(table, F, 5)
   165  	m.assertGetOrInsert(table, G, 5)
   166  	m.assertGetOrInsert(table, H, 5)
   167  
   168  	m.assertGet(table, A, 0)
   169  	m.assertGetOrInsert(table, A, 0)
   170  	m.assertGetOrInsert(table, B, 1)
   171  	m.assertGetOrInsert(table, C, 2)
   172  	m.assertGetOrInsert(table, D, 3)
   173  	m.assertGet(table, E, 4)
   174  	m.assertGetOrInsert(table, E, 4)
   175  	m.assertGet(table, F, 5)
   176  	m.assertGetOrInsert(table, F, 5)
   177  	m.assertGet(table, G, 5)
   178  	m.assertGetOrInsert(table, G, 5)
   179  	m.assertGet(table, H, 5)
   180  	m.assertGetOrInsert(table, H, 5)
   181  
   182  	m.Equal(6, table.Size())
   183  	expected := []float64{A, B, C, D, E, F}
   184  	m.Panics(func() {
   185  		values := make([]int32, 6)
   186  		table.CopyValues(values)
   187  	}, "should panic because wrong type")
   188  	m.Panics(func() {
   189  		values := make([]float64, 3)
   190  		table.CopyValues(values)
   191  	}, "should panic because out of bounds")
   192  
   193  	values := make([]float64, len(expected))
   194  	table.CopyValues(values)
   195  	for idx, ex := range expected {
   196  		if math.IsNaN(ex) {
   197  			m.True(math.IsNaN(values[idx]))
   198  		} else {
   199  			m.Equal(ex, values[idx])
   200  		}
   201  	}
   202  }
   203  
   204  func (m *MemoTableTestSuite) TestBinaryBasics() {
   205  	const (
   206  		A = ""
   207  		B = "a"
   208  		C = "foo"
   209  		D = "bar"
   210  		E = "\000"
   211  		F = "\000trailing"
   212  	)
   213  
   214  	table := hashing.NewBinaryMemoTable(0, -1, array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary))
   215  	defer table.Release()
   216  
   217  	m.Zero(table.Size())
   218  	m.assertGetNotFound(table, A)
   219  	m.assertGetNullNotFound(table)
   220  	m.assertGetOrInsert(table, A, 0)
   221  	m.assertGetNotFound(table, B)
   222  	m.assertGetOrInsert(table, B, 1)
   223  	m.assertGetOrInsert(table, C, 2)
   224  	m.assertGetOrInsert(table, D, 3)
   225  	m.assertGetOrInsert(table, E, 4)
   226  	m.assertGetOrInsert(table, F, 5)
   227  	m.assertGetOrInsertNull(table, 6)
   228  
   229  	m.assertGet(table, A, 0)
   230  	m.assertGetOrInsert(table, A, 0)
   231  	m.assertGet(table, B, 1)
   232  	m.assertGetOrInsert(table, B, 1)
   233  	m.assertGetOrInsert(table, C, 2)
   234  	m.assertGetOrInsert(table, D, 3)
   235  	m.assertGetOrInsert(table, E, 4)
   236  	m.assertGet(table, F, 5)
   237  	m.assertGetOrInsert(table, F, 5)
   238  	m.assertGetNull(table, 6)
   239  	m.assertGetOrInsertNull(table, 6)
   240  
   241  	m.Equal(7, table.Size())
   242  	m.Equal(17, table.ValuesSize())
   243  
   244  	size := table.Size()
   245  	{
   246  		offsets := make([]int32, size+1)
   247  		table.CopyOffsets(offsets)
   248  		m.Equal([]int32{0, 0, 1, 4, 7, 8, 17, 17}, offsets)
   249  
   250  		expectedValues := "afoobar"
   251  		expectedValues += "\000"
   252  		expectedValues += "\000"
   253  		expectedValues += "trailing"
   254  		values := make([]byte, 17)
   255  		table.CopyValues(values)
   256  		m.Equal(expectedValues, string(values))
   257  	}
   258  
   259  	{
   260  		startOffset := 4
   261  		offsets := make([]int32, size+1-int(startOffset))
   262  		table.CopyOffsetsSubset(startOffset, offsets)
   263  		m.Equal([]int32{0, 1, 10, 10}, offsets)
   264  
   265  		expectedValues := ""
   266  		expectedValues += "\000"
   267  		expectedValues += "\000"
   268  		expectedValues += "trailing"
   269  
   270  		values := make([]byte, 10)
   271  		table.CopyValuesSubset(startOffset, values)
   272  		m.Equal(expectedValues, string(values))
   273  	}
   274  
   275  	{
   276  		startOffset := 1
   277  		values := make([]string, 0)
   278  		table.VisitValues(startOffset, func(b []byte) {
   279  			values = append(values, string(b))
   280  		})
   281  		m.Equal([]string{B, C, D, E, F, ""}, values)
   282  	}
   283  }
   284  
   285  func (m *MemoTableTestSuite) TestBinaryEmpty() {
   286  	table := encoding.NewBinaryMemoTable(memory.DefaultAllocator)
   287  	defer table.Release()
   288  
   289  	m.Zero(table.Size())
   290  	offsets := make([]int32, 1)
   291  	table.CopyOffsetsSubset(0, offsets)
   292  	m.Equal(int32(0), offsets[0])
   293  }