github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/hll.go (about)

     1  //  Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package query
    16  
    17  import (
    18  	"bytes"
    19  	memCom "github.com/uber/aresdb/memstore/common"
    20  	"github.com/uber/aresdb/memutils"
    21  	queryCom "github.com/uber/aresdb/query/common"
    22  	"github.com/uber/aresdb/utils"
    23  	"math"
    24  	"time"
    25  	"unsafe"
    26  )
    27  
    28  // HLLQueryResults holds the buffer to store multiple hll query results or errors.
    29  type HLLQueryResults struct {
    30  	buffer bytes.Buffer
    31  }
    32  
    33  // NewHLLQueryResults returns a new NewHLLQueryResults and writes the magical header and
    34  // padding to underlying buffer.
    35  func NewHLLQueryResults() *HLLQueryResults {
    36  	r := &HLLQueryResults{}
    37  	header := queryCom.HLLDataHeader
    38  	r.buffer.Write((*(*[4]byte)(unsafe.Pointer(&header)))[:])
    39  	// Padding.
    40  	var bs [4]byte
    41  	r.buffer.Write(bs[:])
    42  	return r
    43  }
    44  
    45  // WriteResult write result to the buffer.
    46  func (r *HLLQueryResults) WriteResult(result []byte) {
    47  	totalSize := uint32(len(result))
    48  	// Write total size.
    49  	r.buffer.Write((*(*[4]byte)(unsafe.Pointer(&totalSize)))[:])
    50  	// 0 stands for result.
    51  	r.buffer.WriteByte(byte(0))
    52  	// Padding.
    53  	var bs [3]byte
    54  	r.buffer.Write(bs[:])
    55  	r.buffer.Write(result)
    56  }
    57  
    58  // WriteError write error to the buffer.
    59  func (r *HLLQueryResults) WriteError(err error) {
    60  	totalSize := len(err.Error())
    61  	// Write total size.
    62  	r.buffer.Write((*(*[4]byte)(unsafe.Pointer(&totalSize)))[:])
    63  	// 1 stands for error.
    64  	r.buffer.WriteByte(byte(1))
    65  	// Padding.
    66  	var bs [3]byte
    67  	r.buffer.Write(bs[:])
    68  	strErr := err.Error()
    69  	padding := (8 - (len(strErr) & 7)) & 8
    70  	r.buffer.Write([]byte(strErr))
    71  	if padding > 0 {
    72  		paddingBytes := make([]byte, padding)
    73  		r.buffer.Write(paddingBytes)
    74  	}
    75  }
    76  
    77  // GetBytes returns the underlying bytes.
    78  func (r *HLLQueryResults) GetBytes() []byte {
    79  	return r.buffer.Bytes()
    80  }
    81  
    82  // HLLDataWriter is the struct to serialize HLL Data struct.
    83  type HLLDataWriter struct {
    84  	queryCom.HLLData
    85  	buffer []byte
    86  }
    87  
    88  // SerializeHLL allocates buffer based on the metadata and then serializes hll data into the buffer.
    89  func (qc *AQLQueryContext) SerializeHLL(dataTypes []memCom.DataType,
    90  	enumDicts map[int][]string, timeDimensions []int) ([]byte, error) {
    91  	oopkContext := qc.OOPK
    92  	paddedRawDimValuesVectorLength := (uint32(dimValResVectorSize(oopkContext.ResultSize, oopkContext.NumDimsPerDimWidth)) + 7) / 8 * 8
    93  	paddedCountLength := uint32(2*oopkContext.ResultSize+7) / 8 * 8
    94  	paddedHLLVectorLength := (qc.OOPK.hllVectorSize + 7) / 8 * 8
    95  	builder := HLLDataWriter{
    96  		HLLData: queryCom.HLLData{
    97  			ResultSize:                     uint32(oopkContext.ResultSize),
    98  			NumDimsPerDimWidth:             oopkContext.NumDimsPerDimWidth,
    99  			DimIndexes:                     oopkContext.DimensionVectorIndex,
   100  			DataTypes:                      dataTypes,
   101  			EnumDicts:                      enumDicts,
   102  			PaddedRawDimValuesVectorLength: paddedRawDimValuesVectorLength,
   103  			PaddedHLLVectorLength:          paddedHLLVectorLength,
   104  		},
   105  	}
   106  
   107  	headerSize, totalSize := builder.CalculateSizes()
   108  	builder.buffer = make([]byte, totalSize)
   109  	if err := builder.SerializeHeader(); err != nil {
   110  		return nil, err
   111  	}
   112  
   113  	// Copy dim values vector from device.
   114  	dimVectorH := unsafe.Pointer(&builder.buffer[headerSize])
   115  	asyncCopyDimensionVector(dimVectorH, oopkContext.currentBatch.dimensionVectorD[0].getPointer(),
   116  		oopkContext.ResultSize, 0, oopkContext.NumDimsPerDimWidth, oopkContext.ResultSize, oopkContext.currentBatch.resultCapacity,
   117  		memutils.AsyncCopyDeviceToHost, qc.cudaStreams[0], qc.Device)
   118  
   119  	memutils.AsyncCopyDeviceToHost(unsafe.Pointer(&builder.buffer[headerSize+paddedRawDimValuesVectorLength]),
   120  		oopkContext.hllDimRegIDCountD.getPointer(), oopkContext.ResultSize*2, qc.cudaStreams[0], qc.Device)
   121  
   122  	memutils.AsyncCopyDeviceToHost(unsafe.Pointer(&builder.buffer[headerSize+paddedRawDimValuesVectorLength+paddedCountLength]),
   123  		oopkContext.hllVectorD.getPointer(), int(qc.OOPK.hllVectorSize), qc.cudaStreams[0], qc.Device)
   124  	memutils.WaitForCudaStream(qc.cudaStreams[0], qc.Device)
   125  
   126  	// Fix time dimension by substracting the timezone.
   127  	if len(timeDimensions) > 0 && qc.fixedTimezone.String() != time.UTC.String() {
   128  		// length is equal to length of timeDimensions
   129  		dimPtrs := make([][2]unsafe.Pointer, len(timeDimensions))
   130  
   131  		for i := 0; i < len(timeDimensions); i++ {
   132  			dimIndex := timeDimensions[i]
   133  			dimVectorIndex := qc.OOPK.DimensionVectorIndex[dimIndex]
   134  			valueOffset, nullOffset := queryCom.GetDimensionStartOffsets(oopkContext.NumDimsPerDimWidth, dimVectorIndex, int(qc.OOPK.ResultSize))
   135  			dimPtrs[i] = [2]unsafe.Pointer{utils.MemAccess(dimVectorH, valueOffset), utils.MemAccess(dimVectorH, nullOffset)}
   136  		}
   137  
   138  		for rowNumber := 0; rowNumber < oopkContext.ResultSize; rowNumber++ {
   139  			for i := 0; i < len(timeDimensions); i++ {
   140  				valueStart, nullStart := dimPtrs[i][0], dimPtrs[i][1]
   141  				// We don't need to do anything for null.
   142  				if *(*uint8)(utils.MemAccess(nullStart, rowNumber)) == 0 {
   143  					continue
   144  				}
   145  
   146  				valuePtr := (*uint32)(utils.MemAccess(valueStart, rowNumber*4))
   147  				// Don't need to check type of time dimension, they should be guaranteed by AQL Compiler.
   148  
   149  				newVal := int64(*valuePtr)
   150  				if qc.fromTime != nil {
   151  					_, fromOffset := qc.fromTime.Time.Zone()
   152  					_, toOffset := qc.toTime.Time.Zone()
   153  					newVal = utils.AdjustOffset(fromOffset, toOffset, qc.dstswitch, int64(*valuePtr))
   154  				}
   155  
   156  				if newVal >= math.MaxUint32 {
   157  					newVal = math.MaxUint32
   158  				}
   159  
   160  				if newVal <= 0 {
   161  					newVal = 0
   162  				}
   163  				*valuePtr = uint32(newVal)
   164  			}
   165  		}
   166  	}
   167  
   168  	return builder.buffer, nil
   169  }
   170  
   171  // SerializeHeader serialize HLL header
   172  //	-----------query result 0-------------------
   173  //	 <header>
   174  //	 [uint8] num_enum_columns [uint8] bytes per dim ... [padding for 8 bytes]
   175  //	 [uint32] result_size [uint32] raw_dim_values_vector_length
   176  //	 [uint8] dim_index_0... [uint8] dim_index_n [padding for 8 bytes]
   177  //	 [uint32] data_type_0...[uint32] data_type_n [padding for 8 bytes]
   178  //
   179  //	 <enum cases 0>
   180  //	 [uint32_t] number of bytes of enum cases [uint16] column_index [2 bytes: padding]
   181  //	 <enum values 0> delimited by "\u0000\n" [padding for 8 bytes]
   182  //
   183  // 	 <end of header>
   184  func (builder *HLLDataWriter) SerializeHeader() error {
   185  	writer := utils.NewBufferWriter(builder.buffer)
   186  
   187  	// num_enum_columns
   188  	if err := writer.AppendUint8(uint8(len(builder.EnumDicts))); err != nil {
   189  		return err
   190  	}
   191  
   192  	// bytes per dim
   193  	if err := writer.Append([]byte(builder.NumDimsPerDimWidth[:])); err != nil {
   194  		return err
   195  	}
   196  	writer.AlignBytes(8)
   197  
   198  	// result_size
   199  	if err := writer.AppendUint32(builder.ResultSize); err != nil {
   200  		return err
   201  	}
   202  
   203  	// raw_dim_values_vector_length
   204  	if err := writer.AppendUint32(builder.PaddedRawDimValuesVectorLength); err != nil {
   205  		return err
   206  	}
   207  
   208  	// dim_indexes
   209  	for _, dimIndex := range builder.DimIndexes {
   210  		if err := writer.AppendUint8(uint8(dimIndex)); err != nil {
   211  			return err
   212  		}
   213  	}
   214  	writer.AlignBytes(8)
   215  
   216  	// data_types
   217  	for _, dataType := range builder.DataTypes {
   218  		if err := writer.AppendUint32(uint32(dataType)); err != nil {
   219  			return err
   220  		}
   221  	}
   222  	writer.AlignBytes(8)
   223  
   224  	// Write enum cases.
   225  	for columnID, enumCases := range builder.EnumDicts {
   226  		enumCasesBytes := queryCom.CalculateEnumCasesBytes(enumCases)
   227  		if err := writer.AppendUint32(enumCasesBytes); err != nil {
   228  			return err
   229  		}
   230  
   231  		if err := writer.AppendUint16(uint16(columnID)); err != nil {
   232  			return err
   233  		}
   234  
   235  		// padding
   236  		writer.SkipBytes(2)
   237  
   238  		var enumCaseBytesWritten uint32
   239  		for _, enumCase := range enumCases {
   240  			if err := writer.Append([]byte(enumCase)); err != nil {
   241  				return err
   242  			}
   243  
   244  			if err := writer.Append([]byte(queryCom.EnumDelimiter)); err != nil {
   245  				return err
   246  			}
   247  
   248  			enumCaseBytesWritten += uint32(len(enumCase)) + 2
   249  		}
   250  
   251  		writer.SkipBytes(int(enumCasesBytes - enumCaseBytesWritten))
   252  	}
   253  	return nil
   254  }