github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/common/hll_test.go (about) 1 // Copyright (c) 2017-2018 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package common 16 17 import ( 18 "fmt" 19 "github.com/onsi/ginkgo" 20 . "github.com/onsi/gomega" 21 memCom "github.com/uber/aresdb/memstore/common" 22 "io/ioutil" 23 "unsafe" 24 ) 25 26 var _ = ginkgo.Describe("hll", func() { 27 hllData := [DenseDataLength + 28]byte{} 28 hllData[12] = 1 29 hllData[13] = 1 30 31 ginkgo.It("CalculateSizes should work", func() { 32 hllData := HLLData{ 33 DimIndexes: make([]int, 7), 34 DataTypes: make([]memCom.DataType, 7), 35 PaddedRawDimValuesVectorLength: 100, 36 ResultSize: 10, 37 PaddedHLLVectorLength: DenseDataLength + 32, 38 } 39 headerSize, totalSize := hllData.CalculateSizes() 40 Ω(headerSize).Should(BeEquivalentTo(56)) 41 Ω(totalSize).Should(BeEquivalentTo(16596)) 42 43 hllData.EnumDicts = map[int][]string{ 44 1: {"a", "b", "c", "d"}, // 4 + 8 + 4 + 8 = 24 45 2: {}, // 8 46 } 47 48 headerSize, totalSize = hllData.CalculateSizes() 49 Ω(headerSize).Should(BeEquivalentTo(88)) 50 Ω(totalSize).Should(BeEquivalentTo(16628)) 51 }) 52 53 ginkgo.It("CalculateEnumCasesBytes should work", func() { 54 Ω(CalculateEnumCasesBytes([]string{"ss", "a", "b"})).Should(BeEquivalentTo(16)) 55 Ω(CalculateEnumCasesBytes([]string{"ss"})).Should(BeEquivalentTo(8)) 56 Ω(CalculateEnumCasesBytes([]string{})).Should(BeEquivalentTo(0)) 57 }) 58 59 ginkgo.It("readHLL should work", func() { 60 counts := []uint16{3, DenseDataLength, 4, DenseDataLength, 5} 61 hllVector := [2*DenseDataLength + 48]byte{} 62 63 hllVector[12] = 1 64 hllVector[13] = 1 65 66 var currentOffset int64 67 var hllData HLL 68 // Sparse 69 hllData = readHLL(unsafe.Pointer(&hllVector[0]), counts[0], ¤tOffset) 70 Ω(currentOffset).Should(BeEquivalentTo(12)) 71 Ω(hllData.SparseData).ShouldNot(BeNil()) 72 Ω(hllData.DenseData).Should(BeNil()) 73 Ω(hllData.NonZeroRegisters).Should(BeEquivalentTo(3)) 74 75 // Dense 76 hllData = readHLL(unsafe.Pointer(&hllVector[0]), counts[1], ¤tOffset) 77 Ω(currentOffset).Should(BeEquivalentTo(12 + DenseDataLength)) 78 Ω(hllData.SparseData).Should(BeNil()) 79 Ω(hllData.DenseData).ShouldNot(BeNil()) 80 Ω(hllData.NonZeroRegisters).Should(BeEquivalentTo(2)) 81 82 // Sparse 83 hllData = readHLL(unsafe.Pointer(&hllVector[0]), counts[2], ¤tOffset) 84 Ω(currentOffset).Should(BeEquivalentTo(28 + DenseDataLength)) 85 Ω(hllData.SparseData).ShouldNot(BeNil()) 86 Ω(hllData.DenseData).Should(BeNil()) 87 Ω(hllData.NonZeroRegisters).Should(BeEquivalentTo(4)) 88 89 // Dense 90 hllData = readHLL(unsafe.Pointer(&hllVector[0]), counts[3], ¤tOffset) 91 Ω(currentOffset).Should(BeEquivalentTo(28 + 2*DenseDataLength)) 92 Ω(hllData.SparseData).Should(BeNil()) 93 Ω(hllData.DenseData).ShouldNot(BeNil()) 94 Ω(hllData.NonZeroRegisters).Should(BeEquivalentTo(0)) 95 96 // Sparse 97 hllData = readHLL(unsafe.Pointer(&hllVector[0]), counts[4], ¤tOffset) 98 Ω(currentOffset).Should(BeEquivalentTo(48 + 2*DenseDataLength)) 99 Ω(hllData.SparseData).ShouldNot(BeNil()) 100 Ω(hllData.DenseData).Should(BeNil()) 101 Ω(hllData.NonZeroRegisters).Should(BeEquivalentTo(5)) 102 }) 103 104 ginkgo.It("NewTimeSeriesHLLResult should work", func() { 105 data, err := ioutil.ReadFile("../../testing/data/query/hll") 106 Ω(err).Should(BeNil()) 107 108 expected := AQLQueryResult{ 109 "NULL": map[string]interface{}{ 110 "NULL": map[string]interface{}{ 111 "NULL": HLL{NonZeroRegisters: 3, 112 SparseData: []HLLRegister{{Index: 1, Rho: 255}, {Index: 2, Rho: 254}, {Index: 3, Rho: 253}}, 113 }, 114 }}, 115 "1": map[string]interface{}{ 116 "c": map[string]interface{}{ 117 "2": HLL{NonZeroRegisters: 2, DenseData: hllData[12 : 12+DenseDataLength]}, 118 }, 119 }, 120 "4294967295": map[string]interface{}{ 121 "d": map[string]interface{}{ 122 "514": HLL{NonZeroRegisters: 4, SparseData: []HLLRegister{{Index: 255, Rho: 1}, {Index: 254, Rho: 2}, {Index: 253, Rho: 3}, {Index: 252, Rho: 4}}}, 123 }, 124 }} 125 126 res, err := NewTimeSeriesHLLResult(data, HLLDataHeader) 127 Ω(err).Should(BeNil()) 128 Ω(res).Should(Equal(expected)) 129 }) 130 131 ginkgo.It("ParseHLLQueryResults should work", func() { 132 data, err := ioutil.ReadFile("../../testing/data/query/hll_query_results") 133 Ω(err).Should(BeNil()) 134 results, errs, err := ParseHLLQueryResults(data) 135 Ω(errs).Should(HaveLen(2)) 136 Ω(results).Should(HaveLen(2)) 137 Ω(errs[1].Error()).Should(Equal("test")) 138 Ω(results[0]).Should(Equal(AQLQueryResult{ 139 "NULL": map[string]interface{}{ 140 "NULL": map[string]interface{}{ 141 "NULL": HLL{NonZeroRegisters: 3, 142 SparseData: []HLLRegister{{Index: 1, Rho: 255}, {Index: 2, Rho: 254}, {Index: 3, Rho: 253}}, 143 }, 144 }}, 145 "1": map[string]interface{}{ 146 "c": map[string]interface{}{ 147 "2": HLL{NonZeroRegisters: 2, DenseData: hllData[12 : 12+DenseDataLength]}, 148 }, 149 }, 150 "4294967295": map[string]interface{}{ 151 "d": map[string]interface{}{ 152 "514": HLL{NonZeroRegisters: 4, SparseData: []HLLRegister{{Index: 255, Rho: 1}, {Index: 254, Rho: 2}, {Index: 253, Rho: 3}, {Index: 252, Rho: 4}}}, 153 }, 154 }})) 155 }) 156 157 ginkgo.It("Computes hll correctly", func() { 158 h := HLL{ 159 SparseData: []HLLRegister{ 160 { 161 100, 162 1, 163 }, { 164 200, 2, 165 }, 166 }, 167 NonZeroRegisters: 2, 168 } 169 Ω(h.Compute()).Should(Equal(2.0)) 170 }) 171 172 ginkgo.It("Parse empty hll result", func() { 173 data, err := ioutil.ReadFile("../../testing/data/query/hll_empty_results") 174 Ω(err).Should(BeNil()) 175 results, errs, err := ParseHLLQueryResults(data) 176 fmt.Println(errs, err) 177 Ω(results).Should(Equal([]AQLQueryResult{{}})) 178 Ω(errs).Should(Equal([]error{nil})) 179 Ω(err).Should(BeNil()) 180 }) 181 182 ginkgo.It("encodes and decodes", func() { 183 h1 := HLL{ 184 SparseData: []HLLRegister{ 185 { 186 Index: 100, 187 Rho: 1, 188 }, 189 { 190 Index: 200, 191 Rho: 2, 192 }, 193 }, 194 NonZeroRegisters: 2, 195 } 196 197 var h2 HLL 198 h2.Decode(h1.Encode()) 199 Ω(h2).Should(Equal(h1)) 200 201 hllDenseData := make([]byte, 1<<hllP) 202 hllDenseData[100] = 1 203 hllDenseData[200] = 2 204 h1 = HLL{ 205 DenseData: hllDenseData, 206 NonZeroRegisters: 2, 207 } 208 h2 = HLL{} 209 h2.Decode(h1.Encode()) 210 Ω(h2).Should(Equal(h1)) 211 }) 212 213 ginkgo.It("stores data in sparse or dense format", func() { 214 var h HLL 215 h.Set(100, 1) 216 h.Set(200, 2) 217 Ω(h).Should(Equal(HLL{ 218 SparseData: []HLLRegister{ 219 {Index: 100, Rho: 1}, 220 {Index: 200, Rho: 2}, 221 }, 222 DenseData: nil, 223 NonZeroRegisters: 2, 224 })) 225 226 for i := 201; i < 4300; i++ { 227 h.Set(uint16(i), 3) 228 } 229 Ω(h.SparseData).Should(BeNil()) 230 Ω(len(h.DenseData)).Should(Equal(0x4000)) 231 Ω(h.DenseData[100]).Should(Equal(byte(1))) 232 Ω(h.DenseData[200]).Should(Equal(byte(2))) 233 Ω(h.DenseData[201]).Should(Equal(byte(3))) 234 Ω(h.DenseData[4299]).Should(Equal(byte(3))) 235 Ω(h.DenseData[4300]).Should(Equal(byte(0))) 236 Ω(h.NonZeroRegisters).Should(Equal(uint16(4101))) 237 }) 238 })