github.com/apache/arrow/go/v14@v14.0.2/parquet/metadata/stat_compare_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package metadata 18 19 import ( 20 "encoding/binary" 21 "testing" 22 23 "github.com/apache/arrow/go/v14/parquet" 24 "github.com/apache/arrow/go/v14/parquet/schema" 25 "github.com/stretchr/testify/assert" 26 "github.com/stretchr/testify/require" 27 ) 28 29 func TestSignedByteArrayCompare(t *testing.T) { 30 s := ByteArrayStatistics{ 31 statistics: statistics{ 32 order: schema.SortSIGNED, 33 }, 34 } 35 36 // signed byte array comparison is only used for Decimal comparison. 37 // when decimals are encoded as byte arrays they use twos compliment 38 // big-endian encoded values. Comparisons of byte arrays of unequal 39 // types need to handle sign extension. 40 41 tests := []struct { 42 b []byte 43 order int 44 }{ 45 {[]byte{0x80, 0x80, 0, 0}, 0}, 46 {[]byte{ /*0xFF,*/ 0x80, 0, 0}, 1}, 47 {[]byte{0xFF, 0x80, 0, 0}, 1}, 48 {[]byte{ /*0xFF,*/ 0xFF, 0x01, 0}, 2}, 49 {[]byte{ /*0xFF, 0xFF,*/ 0x80, 0}, 3}, 50 {[]byte{ /*0xFF,*/ 0xFF, 0x80, 0}, 3}, 51 {[]byte{0xFF, 0xFF, 0x80, 0}, 3}, 52 {[]byte{ /*0xFF,0xFF,0xFF,*/ 0x80}, 4}, 53 {[]byte{ /*0xFF,0xFF,0xFF*/ 0xFF}, 5}, 54 {[]byte{ /*0, 0,*/ 0x01, 0x01}, 6}, 55 {[]byte{ /*0,*/ 0, 0x01, 0x01}, 6}, 56 {[]byte{0, 0, 0x01, 0x01}, 6}, 57 {[]byte{ /*0,*/ 0x01, 0x01, 0}, 7}, 58 {[]byte{0x01, 0x01, 0, 0}, 8}, 59 } 60 61 for i, tt := range tests { 62 // empty array is always the smallest 63 assert.Truef(t, s.less(parquet.ByteArray{}, parquet.ByteArray(tt.b)), "case: %d", i) 64 assert.Falsef(t, s.less(parquet.ByteArray(tt.b), parquet.ByteArray{}), "case: %d", i) 65 // equals is always false 66 assert.Falsef(t, s.less(parquet.ByteArray(tt.b), parquet.ByteArray(tt.b)), "case: %d", i) 67 68 for j, case2 := range tests { 69 var fn func(assert.TestingT, bool, string, ...interface{}) bool 70 if tt.order < case2.order { 71 fn = assert.Truef 72 } else { 73 fn = assert.Falsef 74 } 75 fn(t, s.less(parquet.ByteArray(tt.b), parquet.ByteArray(case2.b)), 76 "%d (order: %d) %d (order: %d)", i, tt.order, j, case2.order) 77 } 78 } 79 } 80 81 func TestUnsignedByteArrayCompare(t *testing.T) { 82 s := ByteArrayStatistics{ 83 statistics: statistics{ 84 order: schema.SortUNSIGNED, 85 }, 86 } 87 88 s1ba := parquet.ByteArray("arrange") 89 s2ba := parquet.ByteArray("arrangement") 90 assert.True(t, s.less(s1ba, s2ba)) 91 92 // multi-byte utf-8 characters 93 s1ba = parquet.ByteArray("braten") 94 s2ba = parquet.ByteArray("bügeln") 95 assert.True(t, s.less(s1ba, s2ba)) 96 97 s1ba = parquet.ByteArray("ünk123456") // ü = 252 98 s2ba = parquet.ByteArray("ănk123456") // ă = 259 99 assert.True(t, s.less(s1ba, s2ba)) 100 } 101 102 func TestSignedCompareFLBA(t *testing.T) { 103 s := FixedLenByteArrayStatistics{ 104 statistics: statistics{order: schema.SortSIGNED}, 105 } 106 107 values := []parquet.FixedLenByteArray{ 108 []byte{0x80, 0, 0, 0}, 109 []byte{0xFF, 0xFF, 0x01, 0}, 110 []byte{0xFF, 0xFF, 0x80, 0}, 111 []byte{0xFF, 0xFF, 0xFF, 0x80}, 112 []byte{0xFF, 0xFF, 0xFF, 0xFF}, 113 []byte{0, 0, 0x01, 0x01}, 114 []byte{0, 0x01, 0x01, 0}, 115 []byte{0x01, 0x01, 0, 0}, 116 } 117 118 for i, v := range values { 119 assert.Falsef(t, s.less(v, v), "%d", i) 120 for j, v2 := range values[i+1:] { 121 assert.Truef(t, s.less(v, v2), "%d %d", i, j) 122 assert.Falsef(t, s.less(v2, v), "%d %d", j, i) 123 } 124 } 125 } 126 127 func TestUnsignedCompareFLBA(t *testing.T) { 128 s := FixedLenByteArrayStatistics{ 129 statistics: statistics{order: schema.SortUNSIGNED}, 130 } 131 132 s1flba := parquet.FixedLenByteArray("Anti123456") 133 s2flba := parquet.FixedLenByteArray("Bunkd123456") 134 assert.True(t, s.less(s1flba, s2flba)) 135 136 s1flba = parquet.FixedLenByteArray("Bunk123456") 137 s2flba = parquet.FixedLenByteArray("Bünk123456") 138 assert.True(t, s.less(s1flba, s2flba)) 139 } 140 141 func TestSignedCompareInt96(t *testing.T) { 142 s := Int96Statistics{ 143 statistics: statistics{order: schema.SortSIGNED}, 144 } 145 146 val := -14 147 148 var ( 149 a = parquet.NewInt96([3]uint32{1, 41, 14}) 150 b = parquet.NewInt96([3]uint32{1, 41, 42}) 151 aa = parquet.NewInt96([3]uint32{1, 41, 14}) 152 bb = parquet.NewInt96([3]uint32{1, 41, 14}) 153 aaa = parquet.NewInt96([3]uint32{1, 41, uint32(val)}) 154 bbb = parquet.NewInt96([3]uint32{1, 41, 42}) 155 ) 156 157 assert.True(t, s.less(a, b)) 158 assert.True(t, !s.less(aa, bb) && !s.less(bb, aa)) 159 assert.True(t, s.less(aaa, bbb)) 160 } 161 162 func TestUnsignedCompareInt96(t *testing.T) { 163 s := Int96Statistics{ 164 statistics: statistics{order: schema.SortUNSIGNED}, 165 } 166 167 valb := -41 168 valbb := -14 169 170 var ( 171 a = parquet.NewInt96([3]uint32{1, 41, 14}) 172 b = parquet.NewInt96([3]uint32{1, uint32(valb), 42}) 173 aa = parquet.NewInt96([3]uint32{1, 41, 14}) 174 bb = parquet.NewInt96([3]uint32{1, 41, uint32(valbb)}) 175 aaa parquet.Int96 176 bbb parquet.Int96 177 ) 178 179 assert.True(t, s.less(a, b)) 180 assert.True(t, s.less(aa, bb)) 181 182 binary.LittleEndian.PutUint32(aaa[8:], 2451545) // 2000-01-01 183 binary.LittleEndian.PutUint32(bbb[8:], 2451546) // 2000-01-02 184 // 12 hours + 34 minutes + 56 seconds 185 aaa.SetNanoSeconds(45296000000000) 186 // 12 hours + 34 minutes + 50 seconds 187 bbb.SetNanoSeconds(45290000000000) 188 assert.True(t, s.less(aaa, bbb)) 189 190 binary.LittleEndian.PutUint32(aaa[8:], 2451545) // 2000-01-01 191 binary.LittleEndian.PutUint32(bbb[8:], 2451545) // 2000-01-01 192 // 11 hours + 34 minutes + 56 seconds 193 aaa.SetNanoSeconds(41696000000000) 194 // 12 hours + 34 minutes + 50 seconds 195 bbb.SetNanoSeconds(45290000000000) 196 assert.True(t, s.less(aaa, bbb)) 197 198 binary.LittleEndian.PutUint32(aaa[8:], 2451545) // 2000-01-01 199 binary.LittleEndian.PutUint32(bbb[8:], 2451545) // 2000-01-01 200 // 12 hours + 34 minutes + 55 seconds 201 aaa.SetNanoSeconds(45295000000000) 202 // 12 hours + 34 minutes + 56 seconds 203 bbb.SetNanoSeconds(45296000000000) 204 assert.True(t, s.less(aaa, bbb)) 205 } 206 207 func TestCompareSignedInt64(t *testing.T) { 208 var ( 209 a int64 = 1 210 b int64 = 4 211 aa int64 = 1 212 bb int64 = 1 213 aaa int64 = -1 214 bbb int64 = 1 215 ) 216 217 n := schema.NewInt64Node("signedint64", parquet.Repetitions.Required, -1) 218 descr := schema.NewColumn(n, 0, 0) 219 s := NewStatistics(descr, nil).(*Int64Statistics) 220 221 assert.True(t, s.less(a, b)) 222 assert.True(t, !s.less(aa, bb) && !s.less(bb, aa)) 223 assert.True(t, s.less(aaa, bbb)) 224 } 225 226 func TestCompareUnsignedInt64(t *testing.T) { 227 var ( 228 a int64 = 1 229 b int64 = 4 230 aa int64 = 1 231 bb int64 = 1 232 aaa int64 = 1 233 bbb int64 = -1 234 ) 235 236 n, err := schema.NewPrimitiveNodeConverted("unsigned int64", parquet.Repetitions.Required, parquet.Types.Int64, schema.ConvertedTypes.Uint64, 0, 0, 0, 0) 237 require.NoError(t, err) 238 descr := schema.NewColumn(n, 0, 0) 239 240 assert.Equal(t, schema.SortUNSIGNED, descr.SortOrder()) 241 s := NewStatistics(descr, nil).(*Int64Statistics) 242 243 assert.True(t, s.less(a, b)) 244 assert.True(t, !s.less(aa, bb) && !s.less(bb, aa)) 245 assert.True(t, s.less(aaa, bbb)) 246 } 247 248 func TestCompareUnsignedInt32(t *testing.T) { 249 var ( 250 a int32 = 1 251 b int32 = 4 252 aa int32 = 1 253 bb int32 = 1 254 aaa int32 = 1 255 bbb int32 = -1 256 ) 257 258 n, err := schema.NewPrimitiveNodeConverted("unsigned int32", parquet.Repetitions.Required, parquet.Types.Int32, schema.ConvertedTypes.Uint32, 0, 0, 0, 0) 259 require.NoError(t, err) 260 descr := schema.NewColumn(n, 0, 0) 261 262 assert.Equal(t, schema.SortUNSIGNED, descr.SortOrder()) 263 s := NewStatistics(descr, nil).(*Int32Statistics) 264 265 assert.True(t, s.less(a, b)) 266 assert.True(t, !s.less(aa, bb) && !s.less(bb, aa)) 267 assert.True(t, s.less(aaa, bbb)) 268 }