github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/common/hll_test.go (about)

     1  //  Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package common
    16  
    17  import (
    18  	"fmt"
    19  	"github.com/onsi/ginkgo"
    20  	. "github.com/onsi/gomega"
    21  	memCom "github.com/uber/aresdb/memstore/common"
    22  	"io/ioutil"
    23  	"unsafe"
    24  )
    25  
    26  var _ = ginkgo.Describe("hll", func() {
    27  	hllData := [DenseDataLength + 28]byte{}
    28  	hllData[12] = 1
    29  	hllData[13] = 1
    30  
    31  	ginkgo.It("CalculateSizes should work", func() {
    32  		hllData := HLLData{
    33  			DimIndexes:                     make([]int, 7),
    34  			DataTypes:                      make([]memCom.DataType, 7),
    35  			PaddedRawDimValuesVectorLength: 100,
    36  			ResultSize:                     10,
    37  			PaddedHLLVectorLength:          DenseDataLength + 32,
    38  		}
    39  		headerSize, totalSize := hllData.CalculateSizes()
    40  		Ω(headerSize).Should(BeEquivalentTo(56))
    41  		Ω(totalSize).Should(BeEquivalentTo(16596))
    42  
    43  		hllData.EnumDicts = map[int][]string{
    44  			1: {"a", "b", "c", "d"}, // 4 + 8 + 4 + 8 = 24
    45  			2: {},                   // 8
    46  		}
    47  
    48  		headerSize, totalSize = hllData.CalculateSizes()
    49  		Ω(headerSize).Should(BeEquivalentTo(88))
    50  		Ω(totalSize).Should(BeEquivalentTo(16628))
    51  	})
    52  
    53  	ginkgo.It("CalculateEnumCasesBytes should work", func() {
    54  		Ω(CalculateEnumCasesBytes([]string{"ss", "a", "b"})).Should(BeEquivalentTo(16))
    55  		Ω(CalculateEnumCasesBytes([]string{"ss"})).Should(BeEquivalentTo(8))
    56  		Ω(CalculateEnumCasesBytes([]string{})).Should(BeEquivalentTo(0))
    57  	})
    58  
    59  	ginkgo.It("readHLL should work", func() {
    60  		counts := []uint16{3, DenseDataLength, 4, DenseDataLength, 5}
    61  		hllVector := [2*DenseDataLength + 48]byte{}
    62  
    63  		hllVector[12] = 1
    64  		hllVector[13] = 1
    65  
    66  		var currentOffset int64
    67  		var hllData HLL
    68  		// Sparse
    69  		hllData = readHLL(unsafe.Pointer(&hllVector[0]), counts[0], &currentOffset)
    70  		Ω(currentOffset).Should(BeEquivalentTo(12))
    71  		Ω(hllData.SparseData).ShouldNot(BeNil())
    72  		Ω(hllData.DenseData).Should(BeNil())
    73  		Ω(hllData.NonZeroRegisters).Should(BeEquivalentTo(3))
    74  
    75  		// Dense
    76  		hllData = readHLL(unsafe.Pointer(&hllVector[0]), counts[1], &currentOffset)
    77  		Ω(currentOffset).Should(BeEquivalentTo(12 + DenseDataLength))
    78  		Ω(hllData.SparseData).Should(BeNil())
    79  		Ω(hllData.DenseData).ShouldNot(BeNil())
    80  		Ω(hllData.NonZeroRegisters).Should(BeEquivalentTo(2))
    81  
    82  		// Sparse
    83  		hllData = readHLL(unsafe.Pointer(&hllVector[0]), counts[2], &currentOffset)
    84  		Ω(currentOffset).Should(BeEquivalentTo(28 + DenseDataLength))
    85  		Ω(hllData.SparseData).ShouldNot(BeNil())
    86  		Ω(hllData.DenseData).Should(BeNil())
    87  		Ω(hllData.NonZeroRegisters).Should(BeEquivalentTo(4))
    88  
    89  		// Dense
    90  		hllData = readHLL(unsafe.Pointer(&hllVector[0]), counts[3], &currentOffset)
    91  		Ω(currentOffset).Should(BeEquivalentTo(28 + 2*DenseDataLength))
    92  		Ω(hllData.SparseData).Should(BeNil())
    93  		Ω(hllData.DenseData).ShouldNot(BeNil())
    94  		Ω(hllData.NonZeroRegisters).Should(BeEquivalentTo(0))
    95  
    96  		// Sparse
    97  		hllData = readHLL(unsafe.Pointer(&hllVector[0]), counts[4], &currentOffset)
    98  		Ω(currentOffset).Should(BeEquivalentTo(48 + 2*DenseDataLength))
    99  		Ω(hllData.SparseData).ShouldNot(BeNil())
   100  		Ω(hllData.DenseData).Should(BeNil())
   101  		Ω(hllData.NonZeroRegisters).Should(BeEquivalentTo(5))
   102  	})
   103  
   104  	ginkgo.It("NewTimeSeriesHLLResult should work", func() {
   105  		data, err := ioutil.ReadFile("../../testing/data/query/hll")
   106  		Ω(err).Should(BeNil())
   107  
   108  		expected := AQLQueryResult{
   109  			"NULL": map[string]interface{}{
   110  				"NULL": map[string]interface{}{
   111  					"NULL": HLL{NonZeroRegisters: 3,
   112  						SparseData: []HLLRegister{{Index: 1, Rho: 255}, {Index: 2, Rho: 254}, {Index: 3, Rho: 253}},
   113  					},
   114  				}},
   115  			"1": map[string]interface{}{
   116  				"c": map[string]interface{}{
   117  					"2": HLL{NonZeroRegisters: 2, DenseData: hllData[12 : 12+DenseDataLength]},
   118  				},
   119  			},
   120  			"4294967295": map[string]interface{}{
   121  				"d": map[string]interface{}{
   122  					"514": HLL{NonZeroRegisters: 4, SparseData: []HLLRegister{{Index: 255, Rho: 1}, {Index: 254, Rho: 2}, {Index: 253, Rho: 3}, {Index: 252, Rho: 4}}},
   123  				},
   124  			}}
   125  
   126  		res, err := NewTimeSeriesHLLResult(data, HLLDataHeader)
   127  		Ω(err).Should(BeNil())
   128  		Ω(res).Should(Equal(expected))
   129  	})
   130  
   131  	ginkgo.It("ParseHLLQueryResults should work", func() {
   132  		data, err := ioutil.ReadFile("../../testing/data/query/hll_query_results")
   133  		Ω(err).Should(BeNil())
   134  		results, errs, err := ParseHLLQueryResults(data)
   135  		Ω(errs).Should(HaveLen(2))
   136  		Ω(results).Should(HaveLen(2))
   137  		Ω(errs[1].Error()).Should(Equal("test"))
   138  		Ω(results[0]).Should(Equal(AQLQueryResult{
   139  			"NULL": map[string]interface{}{
   140  				"NULL": map[string]interface{}{
   141  					"NULL": HLL{NonZeroRegisters: 3,
   142  						SparseData: []HLLRegister{{Index: 1, Rho: 255}, {Index: 2, Rho: 254}, {Index: 3, Rho: 253}},
   143  					},
   144  				}},
   145  			"1": map[string]interface{}{
   146  				"c": map[string]interface{}{
   147  					"2": HLL{NonZeroRegisters: 2, DenseData: hllData[12 : 12+DenseDataLength]},
   148  				},
   149  			},
   150  			"4294967295": map[string]interface{}{
   151  				"d": map[string]interface{}{
   152  					"514": HLL{NonZeroRegisters: 4, SparseData: []HLLRegister{{Index: 255, Rho: 1}, {Index: 254, Rho: 2}, {Index: 253, Rho: 3}, {Index: 252, Rho: 4}}},
   153  				},
   154  			}}))
   155  	})
   156  
   157  	ginkgo.It("Computes hll correctly", func() {
   158  		h := HLL{
   159  			SparseData: []HLLRegister{
   160  				{
   161  					100,
   162  					1,
   163  				}, {
   164  					200, 2,
   165  				},
   166  			},
   167  			NonZeroRegisters: 2,
   168  		}
   169  		Ω(h.Compute()).Should(Equal(2.0))
   170  	})
   171  
   172  	ginkgo.It("Parse empty hll result", func() {
   173  		data, err := ioutil.ReadFile("../../testing/data/query/hll_empty_results")
   174  		Ω(err).Should(BeNil())
   175  		results, errs, err := ParseHLLQueryResults(data)
   176  		fmt.Println(errs, err)
   177  		Ω(results).Should(Equal([]AQLQueryResult{{}}))
   178  		Ω(errs).Should(Equal([]error{nil}))
   179  		Ω(err).Should(BeNil())
   180  	})
   181  
   182  	ginkgo.It("encodes and decodes", func() {
   183  		h1 := HLL{
   184  			SparseData: []HLLRegister{
   185  				{
   186  					Index: 100,
   187  					Rho:   1,
   188  				},
   189  				{
   190  					Index: 200,
   191  					Rho:   2,
   192  				},
   193  			},
   194  			NonZeroRegisters: 2,
   195  		}
   196  
   197  		var h2 HLL
   198  		h2.Decode(h1.Encode())
   199  		Ω(h2).Should(Equal(h1))
   200  
   201  		hllDenseData := make([]byte, 1<<hllP)
   202  		hllDenseData[100] = 1
   203  		hllDenseData[200] = 2
   204  		h1 = HLL{
   205  			DenseData:        hllDenseData,
   206  			NonZeroRegisters: 2,
   207  		}
   208  		h2 = HLL{}
   209  		h2.Decode(h1.Encode())
   210  		Ω(h2).Should(Equal(h1))
   211  	})
   212  
   213  	ginkgo.It("stores data in sparse or dense format", func() {
   214  		var h HLL
   215  		h.Set(100, 1)
   216  		h.Set(200, 2)
   217  		Ω(h).Should(Equal(HLL{
   218  			SparseData: []HLLRegister{
   219  				{Index: 100, Rho: 1},
   220  				{Index: 200, Rho: 2},
   221  			},
   222  			DenseData:        nil,
   223  			NonZeroRegisters: 2,
   224  		}))
   225  
   226  		for i := 201; i < 4300; i++ {
   227  			h.Set(uint16(i), 3)
   228  		}
   229  		Ω(h.SparseData).Should(BeNil())
   230  		Ω(len(h.DenseData)).Should(Equal(0x4000))
   231  		Ω(h.DenseData[100]).Should(Equal(byte(1)))
   232  		Ω(h.DenseData[200]).Should(Equal(byte(2)))
   233  		Ω(h.DenseData[201]).Should(Equal(byte(3)))
   234  		Ω(h.DenseData[4299]).Should(Equal(byte(3)))
   235  		Ω(h.DenseData[4300]).Should(Equal(byte(0)))
   236  		Ω(h.NonZeroRegisters).Should(Equal(uint16(4101)))
   237  	})
   238  })