github.com/apache/arrow/go/v14@v14.0.1/parquet/internal/encoding/memo_table_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package encoding_test 18 19 import ( 20 "math" 21 "testing" 22 23 "github.com/apache/arrow/go/v14/arrow" 24 "github.com/apache/arrow/go/v14/arrow/array" 25 "github.com/apache/arrow/go/v14/arrow/memory" 26 "github.com/apache/arrow/go/v14/internal/hashing" 27 "github.com/apache/arrow/go/v14/parquet/internal/encoding" 28 "github.com/stretchr/testify/suite" 29 ) 30 31 type MemoTableTestSuite struct { 32 suite.Suite 33 } 34 35 func TestMemoTable(t *testing.T) { 36 suite.Run(t, new(MemoTableTestSuite)) 37 } 38 39 func (m *MemoTableTestSuite) assertGetNotFound(table encoding.MemoTable, v interface{}) { 40 _, ok := table.Get(v) 41 m.False(ok) 42 } 43 44 func (m *MemoTableTestSuite) assertGet(table encoding.MemoTable, v interface{}, expected int) { 45 idx, ok := table.Get(v) 46 m.Equal(expected, idx) 47 m.True(ok) 48 } 49 50 func (m *MemoTableTestSuite) assertGetOrInsert(table encoding.MemoTable, v interface{}, expected int) { 51 idx, _, err := table.GetOrInsert(v) 52 m.NoError(err) 53 m.Equal(expected, idx) 54 } 55 56 func (m *MemoTableTestSuite) assertGetNullNotFound(table encoding.MemoTable) { 57 _, ok := table.GetNull() 58 m.False(ok) 59 } 60 61 func (m *MemoTableTestSuite) assertGetNull(table encoding.MemoTable, expected int) { 62 idx, ok := table.GetNull() 63 m.Equal(expected, idx) 64 m.True(ok) 65 } 66 67 func (m *MemoTableTestSuite) assertGetOrInsertNull(table encoding.MemoTable, expected int) { 68 idx, _ := table.GetOrInsertNull() 69 m.Equal(expected, idx) 70 } 71 72 func (m *MemoTableTestSuite) TestInt64() { 73 const ( 74 A int64 = 1234 75 B int64 = 0 76 C int64 = -98765321 77 D int64 = 12345678901234 78 E int64 = -1 79 F int64 = 1 80 G int64 = 9223372036854775807 81 H int64 = -9223372036854775807 - 1 82 ) 83 84 // table := encoding.NewInt64MemoTable(nil) 85 table := hashing.NewInt64MemoTable(0) 86 m.Zero(table.Size()) 87 m.assertGetNotFound(table, A) 88 m.assertGetNullNotFound(table) 89 m.assertGetOrInsert(table, A, 0) 90 m.assertGetNotFound(table, B) 91 m.assertGetOrInsert(table, B, 1) 92 m.assertGetOrInsert(table, C, 2) 93 m.assertGetOrInsert(table, D, 3) 94 m.assertGetOrInsert(table, E, 4) 95 m.assertGetOrInsertNull(table, 5) 96 97 m.assertGet(table, A, 0) 98 m.assertGetOrInsert(table, A, 0) 99 m.assertGet(table, E, 4) 100 m.assertGetOrInsert(table, E, 4) 101 102 m.assertGetOrInsert(table, F, 6) 103 m.assertGetOrInsert(table, G, 7) 104 m.assertGetOrInsert(table, H, 8) 105 106 m.assertGetOrInsert(table, G, 7) 107 m.assertGetOrInsert(table, F, 6) 108 m.assertGetOrInsertNull(table, 5) 109 m.assertGetOrInsert(table, E, 4) 110 m.assertGetOrInsert(table, D, 3) 111 m.assertGetOrInsert(table, C, 2) 112 m.assertGetOrInsert(table, B, 1) 113 m.assertGetOrInsert(table, A, 0) 114 115 const sz int = 9 116 m.Equal(sz, table.Size()) 117 m.Panics(func() { 118 values := make([]int32, sz) 119 table.CopyValues(values) 120 }, "should panic because wrong type") 121 m.Panics(func() { 122 values := make([]int64, sz-3) 123 table.CopyValues(values) 124 }, "should panic because out of bounds") 125 126 { 127 values := make([]int64, sz) 128 table.CopyValues(values) 129 m.Equal([]int64{A, B, C, D, E, 0, F, G, H}, values) 130 } 131 { 132 const offset = 3 133 values := make([]int64, sz-offset) 134 table.CopyValuesSubset(offset, values) 135 m.Equal([]int64{D, E, 0, F, G, H}, values) 136 } 137 } 138 139 func (m *MemoTableTestSuite) TestFloat64() { 140 const ( 141 A float64 = 0.0 142 B float64 = 1.5 143 C float64 = -0.1 144 ) 145 var ( 146 D = math.Inf(1) 147 E = -D 148 F = math.NaN() // uses Quiet NaN i.e. 0x7FF8000000000001 149 G = math.Float64frombits(uint64(0x7FF0000000000001)) // test Signalling NaN 150 H = math.Float64frombits(uint64(0xFFF7FFFFFFFFFFFF)) // other NaN bit pattern 151 ) 152 153 // table := encoding.NewFloat64MemoTable(nil) 154 table := hashing.NewFloat64MemoTable(0) 155 m.Zero(table.Size()) 156 m.assertGetNotFound(table, A) 157 m.assertGetNullNotFound(table) 158 m.assertGetOrInsert(table, A, 0) 159 m.assertGetNotFound(table, B) 160 m.assertGetOrInsert(table, B, 1) 161 m.assertGetOrInsert(table, C, 2) 162 m.assertGetOrInsert(table, D, 3) 163 m.assertGetOrInsert(table, E, 4) 164 m.assertGetOrInsert(table, F, 5) 165 m.assertGetOrInsert(table, G, 5) 166 m.assertGetOrInsert(table, H, 5) 167 168 m.assertGet(table, A, 0) 169 m.assertGetOrInsert(table, A, 0) 170 m.assertGetOrInsert(table, B, 1) 171 m.assertGetOrInsert(table, C, 2) 172 m.assertGetOrInsert(table, D, 3) 173 m.assertGet(table, E, 4) 174 m.assertGetOrInsert(table, E, 4) 175 m.assertGet(table, F, 5) 176 m.assertGetOrInsert(table, F, 5) 177 m.assertGet(table, G, 5) 178 m.assertGetOrInsert(table, G, 5) 179 m.assertGet(table, H, 5) 180 m.assertGetOrInsert(table, H, 5) 181 182 m.Equal(6, table.Size()) 183 expected := []float64{A, B, C, D, E, F} 184 m.Panics(func() { 185 values := make([]int32, 6) 186 table.CopyValues(values) 187 }, "should panic because wrong type") 188 m.Panics(func() { 189 values := make([]float64, 3) 190 table.CopyValues(values) 191 }, "should panic because out of bounds") 192 193 values := make([]float64, len(expected)) 194 table.CopyValues(values) 195 for idx, ex := range expected { 196 if math.IsNaN(ex) { 197 m.True(math.IsNaN(values[idx])) 198 } else { 199 m.Equal(ex, values[idx]) 200 } 201 } 202 } 203 204 func (m *MemoTableTestSuite) TestBinaryBasics() { 205 const ( 206 A = "" 207 B = "a" 208 C = "foo" 209 D = "bar" 210 E = "\000" 211 F = "\000trailing" 212 ) 213 214 table := hashing.NewBinaryMemoTable(0, -1, array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary)) 215 defer table.Release() 216 217 m.Zero(table.Size()) 218 m.assertGetNotFound(table, A) 219 m.assertGetNullNotFound(table) 220 m.assertGetOrInsert(table, A, 0) 221 m.assertGetNotFound(table, B) 222 m.assertGetOrInsert(table, B, 1) 223 m.assertGetOrInsert(table, C, 2) 224 m.assertGetOrInsert(table, D, 3) 225 m.assertGetOrInsert(table, E, 4) 226 m.assertGetOrInsert(table, F, 5) 227 m.assertGetOrInsertNull(table, 6) 228 229 m.assertGet(table, A, 0) 230 m.assertGetOrInsert(table, A, 0) 231 m.assertGet(table, B, 1) 232 m.assertGetOrInsert(table, B, 1) 233 m.assertGetOrInsert(table, C, 2) 234 m.assertGetOrInsert(table, D, 3) 235 m.assertGetOrInsert(table, E, 4) 236 m.assertGet(table, F, 5) 237 m.assertGetOrInsert(table, F, 5) 238 m.assertGetNull(table, 6) 239 m.assertGetOrInsertNull(table, 6) 240 241 m.Equal(7, table.Size()) 242 m.Equal(17, table.ValuesSize()) 243 244 size := table.Size() 245 { 246 offsets := make([]int32, size+1) 247 table.CopyOffsets(offsets) 248 m.Equal([]int32{0, 0, 1, 4, 7, 8, 17, 17}, offsets) 249 250 expectedValues := "afoobar" 251 expectedValues += "\000" 252 expectedValues += "\000" 253 expectedValues += "trailing" 254 values := make([]byte, 17) 255 table.CopyValues(values) 256 m.Equal(expectedValues, string(values)) 257 } 258 259 { 260 startOffset := 4 261 offsets := make([]int32, size+1-int(startOffset)) 262 table.CopyOffsetsSubset(startOffset, offsets) 263 m.Equal([]int32{0, 1, 10, 10}, offsets) 264 265 expectedValues := "" 266 expectedValues += "\000" 267 expectedValues += "\000" 268 expectedValues += "trailing" 269 270 values := make([]byte, 10) 271 table.CopyValuesSubset(startOffset, values) 272 m.Equal(expectedValues, string(values)) 273 } 274 275 { 276 startOffset := 1 277 values := make([]string, 0) 278 table.VisitValues(startOffset, func(b []byte) { 279 values = append(values, string(b)) 280 }) 281 m.Equal([]string{B, C, D, E, F, ""}, values) 282 } 283 } 284 285 func (m *MemoTableTestSuite) TestBinaryEmpty() { 286 table := encoding.NewBinaryMemoTable(memory.DefaultAllocator) 287 defer table.Release() 288 289 m.Zero(table.Size()) 290 offsets := make([]int32, 1) 291 table.CopyOffsetsSubset(0, offsets) 292 m.Equal(int32(0), offsets[0]) 293 }