github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/hll.go (about) 1 // Copyright (c) 2017-2018 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package query 16 17 import ( 18 "bytes" 19 memCom "github.com/uber/aresdb/memstore/common" 20 "github.com/uber/aresdb/memutils" 21 queryCom "github.com/uber/aresdb/query/common" 22 "github.com/uber/aresdb/utils" 23 "math" 24 "time" 25 "unsafe" 26 ) 27 28 // HLLQueryResults holds the buffer to store multiple hll query results or errors. 29 type HLLQueryResults struct { 30 buffer bytes.Buffer 31 } 32 33 // NewHLLQueryResults returns a new NewHLLQueryResults and writes the magical header and 34 // padding to underlying buffer. 35 func NewHLLQueryResults() *HLLQueryResults { 36 r := &HLLQueryResults{} 37 header := queryCom.HLLDataHeader 38 r.buffer.Write((*(*[4]byte)(unsafe.Pointer(&header)))[:]) 39 // Padding. 40 var bs [4]byte 41 r.buffer.Write(bs[:]) 42 return r 43 } 44 45 // WriteResult write result to the buffer. 46 func (r *HLLQueryResults) WriteResult(result []byte) { 47 totalSize := uint32(len(result)) 48 // Write total size. 49 r.buffer.Write((*(*[4]byte)(unsafe.Pointer(&totalSize)))[:]) 50 // 0 stands for result. 51 r.buffer.WriteByte(byte(0)) 52 // Padding. 53 var bs [3]byte 54 r.buffer.Write(bs[:]) 55 r.buffer.Write(result) 56 } 57 58 // WriteError write error to the buffer. 59 func (r *HLLQueryResults) WriteError(err error) { 60 totalSize := len(err.Error()) 61 // Write total size. 62 r.buffer.Write((*(*[4]byte)(unsafe.Pointer(&totalSize)))[:]) 63 // 1 stands for error. 64 r.buffer.WriteByte(byte(1)) 65 // Padding. 66 var bs [3]byte 67 r.buffer.Write(bs[:]) 68 strErr := err.Error() 69 padding := (8 - (len(strErr) & 7)) & 8 70 r.buffer.Write([]byte(strErr)) 71 if padding > 0 { 72 paddingBytes := make([]byte, padding) 73 r.buffer.Write(paddingBytes) 74 } 75 } 76 77 // GetBytes returns the underlying bytes. 78 func (r *HLLQueryResults) GetBytes() []byte { 79 return r.buffer.Bytes() 80 } 81 82 // HLLDataWriter is the struct to serialize HLL Data struct. 83 type HLLDataWriter struct { 84 queryCom.HLLData 85 buffer []byte 86 } 87 88 // SerializeHLL allocates buffer based on the metadata and then serializes hll data into the buffer. 89 func (qc *AQLQueryContext) SerializeHLL(dataTypes []memCom.DataType, 90 enumDicts map[int][]string, timeDimensions []int) ([]byte, error) { 91 oopkContext := qc.OOPK 92 paddedRawDimValuesVectorLength := (uint32(dimValResVectorSize(oopkContext.ResultSize, oopkContext.NumDimsPerDimWidth)) + 7) / 8 * 8 93 paddedCountLength := uint32(2*oopkContext.ResultSize+7) / 8 * 8 94 paddedHLLVectorLength := (qc.OOPK.hllVectorSize + 7) / 8 * 8 95 builder := HLLDataWriter{ 96 HLLData: queryCom.HLLData{ 97 ResultSize: uint32(oopkContext.ResultSize), 98 NumDimsPerDimWidth: oopkContext.NumDimsPerDimWidth, 99 DimIndexes: oopkContext.DimensionVectorIndex, 100 DataTypes: dataTypes, 101 EnumDicts: enumDicts, 102 PaddedRawDimValuesVectorLength: paddedRawDimValuesVectorLength, 103 PaddedHLLVectorLength: paddedHLLVectorLength, 104 }, 105 } 106 107 headerSize, totalSize := builder.CalculateSizes() 108 builder.buffer = make([]byte, totalSize) 109 if err := builder.SerializeHeader(); err != nil { 110 return nil, err 111 } 112 113 // Copy dim values vector from device. 114 dimVectorH := unsafe.Pointer(&builder.buffer[headerSize]) 115 asyncCopyDimensionVector(dimVectorH, oopkContext.currentBatch.dimensionVectorD[0].getPointer(), 116 oopkContext.ResultSize, 0, oopkContext.NumDimsPerDimWidth, oopkContext.ResultSize, oopkContext.currentBatch.resultCapacity, 117 memutils.AsyncCopyDeviceToHost, qc.cudaStreams[0], qc.Device) 118 119 memutils.AsyncCopyDeviceToHost(unsafe.Pointer(&builder.buffer[headerSize+paddedRawDimValuesVectorLength]), 120 oopkContext.hllDimRegIDCountD.getPointer(), oopkContext.ResultSize*2, qc.cudaStreams[0], qc.Device) 121 122 memutils.AsyncCopyDeviceToHost(unsafe.Pointer(&builder.buffer[headerSize+paddedRawDimValuesVectorLength+paddedCountLength]), 123 oopkContext.hllVectorD.getPointer(), int(qc.OOPK.hllVectorSize), qc.cudaStreams[0], qc.Device) 124 memutils.WaitForCudaStream(qc.cudaStreams[0], qc.Device) 125 126 // Fix time dimension by substracting the timezone. 127 if len(timeDimensions) > 0 && qc.fixedTimezone.String() != time.UTC.String() { 128 // length is equal to length of timeDimensions 129 dimPtrs := make([][2]unsafe.Pointer, len(timeDimensions)) 130 131 for i := 0; i < len(timeDimensions); i++ { 132 dimIndex := timeDimensions[i] 133 dimVectorIndex := qc.OOPK.DimensionVectorIndex[dimIndex] 134 valueOffset, nullOffset := queryCom.GetDimensionStartOffsets(oopkContext.NumDimsPerDimWidth, dimVectorIndex, int(qc.OOPK.ResultSize)) 135 dimPtrs[i] = [2]unsafe.Pointer{utils.MemAccess(dimVectorH, valueOffset), utils.MemAccess(dimVectorH, nullOffset)} 136 } 137 138 for rowNumber := 0; rowNumber < oopkContext.ResultSize; rowNumber++ { 139 for i := 0; i < len(timeDimensions); i++ { 140 valueStart, nullStart := dimPtrs[i][0], dimPtrs[i][1] 141 // We don't need to do anything for null. 142 if *(*uint8)(utils.MemAccess(nullStart, rowNumber)) == 0 { 143 continue 144 } 145 146 valuePtr := (*uint32)(utils.MemAccess(valueStart, rowNumber*4)) 147 // Don't need to check type of time dimension, they should be guaranteed by AQL Compiler. 148 149 newVal := int64(*valuePtr) 150 if qc.fromTime != nil { 151 _, fromOffset := qc.fromTime.Time.Zone() 152 _, toOffset := qc.toTime.Time.Zone() 153 newVal = utils.AdjustOffset(fromOffset, toOffset, qc.dstswitch, int64(*valuePtr)) 154 } 155 156 if newVal >= math.MaxUint32 { 157 newVal = math.MaxUint32 158 } 159 160 if newVal <= 0 { 161 newVal = 0 162 } 163 *valuePtr = uint32(newVal) 164 } 165 } 166 } 167 168 return builder.buffer, nil 169 } 170 171 // SerializeHeader serialize HLL header 172 // -----------query result 0------------------- 173 // <header> 174 // [uint8] num_enum_columns [uint8] bytes per dim ... [padding for 8 bytes] 175 // [uint32] result_size [uint32] raw_dim_values_vector_length 176 // [uint8] dim_index_0... [uint8] dim_index_n [padding for 8 bytes] 177 // [uint32] data_type_0...[uint32] data_type_n [padding for 8 bytes] 178 // 179 // <enum cases 0> 180 // [uint32_t] number of bytes of enum cases [uint16] column_index [2 bytes: padding] 181 // <enum values 0> delimited by "\u0000\n" [padding for 8 bytes] 182 // 183 // <end of header> 184 func (builder *HLLDataWriter) SerializeHeader() error { 185 writer := utils.NewBufferWriter(builder.buffer) 186 187 // num_enum_columns 188 if err := writer.AppendUint8(uint8(len(builder.EnumDicts))); err != nil { 189 return err 190 } 191 192 // bytes per dim 193 if err := writer.Append([]byte(builder.NumDimsPerDimWidth[:])); err != nil { 194 return err 195 } 196 writer.AlignBytes(8) 197 198 // result_size 199 if err := writer.AppendUint32(builder.ResultSize); err != nil { 200 return err 201 } 202 203 // raw_dim_values_vector_length 204 if err := writer.AppendUint32(builder.PaddedRawDimValuesVectorLength); err != nil { 205 return err 206 } 207 208 // dim_indexes 209 for _, dimIndex := range builder.DimIndexes { 210 if err := writer.AppendUint8(uint8(dimIndex)); err != nil { 211 return err 212 } 213 } 214 writer.AlignBytes(8) 215 216 // data_types 217 for _, dataType := range builder.DataTypes { 218 if err := writer.AppendUint32(uint32(dataType)); err != nil { 219 return err 220 } 221 } 222 writer.AlignBytes(8) 223 224 // Write enum cases. 225 for columnID, enumCases := range builder.EnumDicts { 226 enumCasesBytes := queryCom.CalculateEnumCasesBytes(enumCases) 227 if err := writer.AppendUint32(enumCasesBytes); err != nil { 228 return err 229 } 230 231 if err := writer.AppendUint16(uint16(columnID)); err != nil { 232 return err 233 } 234 235 // padding 236 writer.SkipBytes(2) 237 238 var enumCaseBytesWritten uint32 239 for _, enumCase := range enumCases { 240 if err := writer.Append([]byte(enumCase)); err != nil { 241 return err 242 } 243 244 if err := writer.Append([]byte(queryCom.EnumDelimiter)); err != nil { 245 return err 246 } 247 248 enumCaseBytesWritten += uint32(len(enumCase)) + 2 249 } 250 251 writer.SkipBytes(int(enumCasesBytes - enumCaseBytesWritten)) 252 } 253 return nil 254 }