github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/encoding/memo_table_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package encoding_test
    18  
    19  import (
    20  	"math"
    21  	"testing"
    22  
    23  	"github.com/apache/arrow/go/v7/arrow/memory"
    24  	"github.com/apache/arrow/go/v7/parquet/internal/encoding"
    25  	"github.com/apache/arrow/go/v7/parquet/internal/hashing"
    26  	"github.com/stretchr/testify/suite"
    27  )
    28  
    29  type MemoTableTestSuite struct {
    30  	suite.Suite
    31  }
    32  
    33  func TestMemoTable(t *testing.T) {
    34  	suite.Run(t, new(MemoTableTestSuite))
    35  }
    36  
    37  func (m *MemoTableTestSuite) assertGetNotFound(table encoding.MemoTable, v interface{}) {
    38  	_, ok := table.Get(v)
    39  	m.False(ok)
    40  }
    41  
    42  func (m *MemoTableTestSuite) assertGet(table encoding.MemoTable, v interface{}, expected int) {
    43  	idx, ok := table.Get(v)
    44  	m.Equal(expected, idx)
    45  	m.True(ok)
    46  }
    47  
    48  func (m *MemoTableTestSuite) assertGetOrInsert(table encoding.MemoTable, v interface{}, expected int) {
    49  	idx, _, err := table.GetOrInsert(v)
    50  	m.NoError(err)
    51  	m.Equal(expected, idx)
    52  }
    53  
    54  func (m *MemoTableTestSuite) assertGetNullNotFound(table encoding.MemoTable) {
    55  	_, ok := table.GetNull()
    56  	m.False(ok)
    57  }
    58  
    59  func (m *MemoTableTestSuite) assertGetNull(table encoding.MemoTable, expected int) {
    60  	idx, ok := table.GetNull()
    61  	m.Equal(expected, idx)
    62  	m.True(ok)
    63  }
    64  
    65  func (m *MemoTableTestSuite) assertGetOrInsertNull(table encoding.MemoTable, expected int) {
    66  	idx, _ := table.GetOrInsertNull()
    67  	m.Equal(expected, idx)
    68  }
    69  
    70  func (m *MemoTableTestSuite) TestInt64() {
    71  	const (
    72  		A int64 = 1234
    73  		B int64 = 0
    74  		C int64 = -98765321
    75  		D int64 = 12345678901234
    76  		E int64 = -1
    77  		F int64 = 1
    78  		G int64 = 9223372036854775807
    79  		H int64 = -9223372036854775807 - 1
    80  	)
    81  
    82  	// table := encoding.NewInt64MemoTable(nil)
    83  	table := hashing.NewInt64MemoTable(0)
    84  	m.Zero(table.Size())
    85  	m.assertGetNotFound(table, A)
    86  	m.assertGetNullNotFound(table)
    87  	m.assertGetOrInsert(table, A, 0)
    88  	m.assertGetNotFound(table, B)
    89  	m.assertGetOrInsert(table, B, 1)
    90  	m.assertGetOrInsert(table, C, 2)
    91  	m.assertGetOrInsert(table, D, 3)
    92  	m.assertGetOrInsert(table, E, 4)
    93  	m.assertGetOrInsertNull(table, 5)
    94  
    95  	m.assertGet(table, A, 0)
    96  	m.assertGetOrInsert(table, A, 0)
    97  	m.assertGet(table, E, 4)
    98  	m.assertGetOrInsert(table, E, 4)
    99  
   100  	m.assertGetOrInsert(table, F, 6)
   101  	m.assertGetOrInsert(table, G, 7)
   102  	m.assertGetOrInsert(table, H, 8)
   103  
   104  	m.assertGetOrInsert(table, G, 7)
   105  	m.assertGetOrInsert(table, F, 6)
   106  	m.assertGetOrInsertNull(table, 5)
   107  	m.assertGetOrInsert(table, E, 4)
   108  	m.assertGetOrInsert(table, D, 3)
   109  	m.assertGetOrInsert(table, C, 2)
   110  	m.assertGetOrInsert(table, B, 1)
   111  	m.assertGetOrInsert(table, A, 0)
   112  
   113  	const sz int = 9
   114  	m.Equal(sz, table.Size())
   115  	m.Panics(func() {
   116  		values := make([]int32, sz)
   117  		table.CopyValues(values)
   118  	}, "should panic because wrong type")
   119  	m.Panics(func() {
   120  		values := make([]int64, sz-3)
   121  		table.CopyValues(values)
   122  	}, "should panic because out of bounds")
   123  
   124  	{
   125  		values := make([]int64, sz)
   126  		table.CopyValues(values)
   127  		m.Equal([]int64{A, B, C, D, E, 0, F, G, H}, values)
   128  	}
   129  	{
   130  		const offset = 3
   131  		values := make([]int64, sz-offset)
   132  		table.CopyValuesSubset(offset, values)
   133  		m.Equal([]int64{D, E, 0, F, G, H}, values)
   134  	}
   135  }
   136  
   137  func (m *MemoTableTestSuite) TestFloat64() {
   138  	const (
   139  		A float64 = 0.0
   140  		B float64 = 1.5
   141  		C float64 = -0.1
   142  	)
   143  	var (
   144  		D = math.Inf(1)
   145  		E = -D
   146  		F = math.NaN()                                       // uses Quiet NaN i.e. 0x7FF8000000000001
   147  		G = math.Float64frombits(uint64(0x7FF0000000000001)) // test Signalling NaN
   148  		H = math.Float64frombits(uint64(0xFFF7FFFFFFFFFFFF)) // other NaN bit pattern
   149  	)
   150  
   151  	// table := encoding.NewFloat64MemoTable(nil)
   152  	table := hashing.NewFloat64MemoTable(0)
   153  	m.Zero(table.Size())
   154  	m.assertGetNotFound(table, A)
   155  	m.assertGetNullNotFound(table)
   156  	m.assertGetOrInsert(table, A, 0)
   157  	m.assertGetNotFound(table, B)
   158  	m.assertGetOrInsert(table, B, 1)
   159  	m.assertGetOrInsert(table, C, 2)
   160  	m.assertGetOrInsert(table, D, 3)
   161  	m.assertGetOrInsert(table, E, 4)
   162  	m.assertGetOrInsert(table, F, 5)
   163  	m.assertGetOrInsert(table, G, 5)
   164  	m.assertGetOrInsert(table, H, 5)
   165  
   166  	m.assertGet(table, A, 0)
   167  	m.assertGetOrInsert(table, A, 0)
   168  	m.assertGetOrInsert(table, B, 1)
   169  	m.assertGetOrInsert(table, C, 2)
   170  	m.assertGetOrInsert(table, D, 3)
   171  	m.assertGet(table, E, 4)
   172  	m.assertGetOrInsert(table, E, 4)
   173  	m.assertGet(table, F, 5)
   174  	m.assertGetOrInsert(table, F, 5)
   175  	m.assertGet(table, G, 5)
   176  	m.assertGetOrInsert(table, G, 5)
   177  	m.assertGet(table, H, 5)
   178  	m.assertGetOrInsert(table, H, 5)
   179  
   180  	m.Equal(6, table.Size())
   181  	expected := []float64{A, B, C, D, E, F}
   182  	m.Panics(func() {
   183  		values := make([]int32, 6)
   184  		table.CopyValues(values)
   185  	}, "should panic because wrong type")
   186  	m.Panics(func() {
   187  		values := make([]float64, 3)
   188  		table.CopyValues(values)
   189  	}, "should panic because out of bounds")
   190  
   191  	values := make([]float64, len(expected))
   192  	table.CopyValues(values)
   193  	for idx, ex := range expected {
   194  		if math.IsNaN(ex) {
   195  			m.True(math.IsNaN(values[idx]))
   196  		} else {
   197  			m.Equal(ex, values[idx])
   198  		}
   199  	}
   200  }
   201  
   202  func (m *MemoTableTestSuite) TestBinaryBasics() {
   203  	const (
   204  		A = ""
   205  		B = "a"
   206  		C = "foo"
   207  		D = "bar"
   208  		E = "\000"
   209  		F = "\000trailing"
   210  	)
   211  
   212  	table := hashing.NewBinaryMemoTable(memory.DefaultAllocator, 0, -1)
   213  	defer table.Release()
   214  
   215  	m.Zero(table.Size())
   216  	m.assertGetNotFound(table, A)
   217  	m.assertGetNullNotFound(table)
   218  	m.assertGetOrInsert(table, A, 0)
   219  	m.assertGetNotFound(table, B)
   220  	m.assertGetOrInsert(table, B, 1)
   221  	m.assertGetOrInsert(table, C, 2)
   222  	m.assertGetOrInsert(table, D, 3)
   223  	m.assertGetOrInsert(table, E, 4)
   224  	m.assertGetOrInsert(table, F, 5)
   225  	m.assertGetOrInsertNull(table, 6)
   226  
   227  	m.assertGet(table, A, 0)
   228  	m.assertGetOrInsert(table, A, 0)
   229  	m.assertGet(table, B, 1)
   230  	m.assertGetOrInsert(table, B, 1)
   231  	m.assertGetOrInsert(table, C, 2)
   232  	m.assertGetOrInsert(table, D, 3)
   233  	m.assertGetOrInsert(table, E, 4)
   234  	m.assertGet(table, F, 5)
   235  	m.assertGetOrInsert(table, F, 5)
   236  	m.assertGetNull(table, 6)
   237  	m.assertGetOrInsertNull(table, 6)
   238  
   239  	m.Equal(7, table.Size())
   240  	m.Equal(17, table.ValuesSize())
   241  
   242  	size := table.Size()
   243  	{
   244  		offsets := make([]int8, size+1)
   245  		table.CopyOffsets(offsets)
   246  		m.Equal([]int8{0, 0, 1, 4, 7, 8, 17, 17}, offsets)
   247  
   248  		expectedValues := "afoobar"
   249  		expectedValues += "\000"
   250  		expectedValues += "\000"
   251  		expectedValues += "trailing"
   252  		values := make([]byte, 17)
   253  		table.CopyValues(values)
   254  		m.Equal(expectedValues, string(values))
   255  	}
   256  
   257  	{
   258  		startOffset := 4
   259  		offsets := make([]int8, size+1-int(startOffset))
   260  		table.CopyOffsetsSubset(startOffset, offsets)
   261  		m.Equal([]int8{0, 1, 10, 10}, offsets)
   262  
   263  		expectedValues := ""
   264  		expectedValues += "\000"
   265  		expectedValues += "\000"
   266  		expectedValues += "trailing"
   267  
   268  		values := make([]byte, 10)
   269  		table.CopyValuesSubset(startOffset, values)
   270  		m.Equal(expectedValues, string(values))
   271  	}
   272  
   273  	{
   274  		startOffset := 1
   275  		values := make([]string, 0)
   276  		table.VisitValues(startOffset, func(b []byte) {
   277  			values = append(values, string(b))
   278  		})
   279  		m.Equal([]string{B, C, D, E, F, ""}, values)
   280  	}
   281  }
   282  
   283  func (m *MemoTableTestSuite) TestBinaryEmpty() {
   284  	table := encoding.NewBinaryMemoTable(memory.DefaultAllocator)
   285  	defer table.Release()
   286  
   287  	m.Zero(table.Size())
   288  	offsets := make([]int8, 1)
   289  	table.CopyOffsetsSubset(0, offsets)
   290  	m.Equal(int8(0), offsets[0])
   291  }