github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/stats.go (about) 1 // Copyright (c) 2017-2018 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package query 16 17 import ( 18 "encoding/json" 19 "fmt" 20 "math" 21 "sort" 22 "time" 23 "unsafe" 24 25 "github.com/uber/aresdb/memutils" 26 "github.com/uber/aresdb/utils" 27 ) 28 29 // stageName represents each query stage. 30 type stageName string 31 32 const ( 33 prepareForeignTableTiming stageName = "prepareForeignTable" 34 transferTiming = "transfer" 35 prepareForFilteringTiming = "prepareForFiltering" 36 initIndexVectorTiming = "initIndexVector" 37 filterEvalTiming = "filterEval" 38 prepareForeignRecordIDsTiming = "prepareForeignRecordIDs" 39 foreignTableFilterEvalTiming = "foreignTableFilterEval" 40 geoIntersectEvalTiming = "geoIntersectEval" 41 prepareForDimAndMeasureTiming = "prepareForDimAndMeasure" 42 dimEvalTiming = "dimEval" 43 measureEvalTiming = "measureEval" 44 hllEvalTiming = "hllEval" 45 sortEvalTiming = "sortEval" 46 reduceEvalTiming = "reduceEval" 47 expandEvalTiming = "expandEval" 48 cleanupTiming = "cleanUpEval" 49 resultTransferTiming = "resultTransfer" 50 finalCleanupTiming = "finalCleanUp" 51 ) 52 53 // oopkBatchStats stores stats for a single batch execution. 54 type oopkBatchStats struct { 55 // Store timings for each stage of a single batch. 56 timings map[stageName]float64 57 // totalTiming for this batch. 58 totalTiming float64 59 batchID int32 60 batchSize int 61 bytesTransferred int 62 numTransferCalls int 63 } 64 65 // oopkStageSummaryStats stores running info for each stage. 66 type oopkStageSummaryStats struct { 67 name stageName 68 max float64 69 min float64 70 avg float64 71 count int 72 total float64 73 percentage float64 74 } 75 76 // MarshalJSON marshals the message to JSON in a custom way. 77 func (s *oopkStageSummaryStats) MarshalJSON() ([]byte, error) { 78 return json.Marshal(s.total) 79 } 80 81 type stageSummaryStatsSlice []*oopkStageSummaryStats 82 83 // Len implements sort.Sort interface for stageSummaryStatsSlice. 84 func (s stageSummaryStatsSlice) Len() int { 85 return len(s) 86 } 87 88 // Swap implements sort.Sort interface for stageSummaryStatsSlice. 89 func (s stageSummaryStatsSlice) Swap(i, j int) { 90 s[i], s[j] = s[j], s[i] 91 } 92 93 // Less implements sort.Sort interface for stageSummaryStatsSlice. 94 func (s stageSummaryStatsSlice) Less(i, j int) bool { 95 return s[i].total < s[j].total 96 } 97 98 // oopkQueryStats stores the overall stats for a query. 99 type oopkQueryStats struct { 100 // stats for each stage. Sorted by total time. 101 stageStats []*oopkStageSummaryStats 102 // mapping from stage name to stage stats. 103 Name2Stage map[stageName]*oopkStageSummaryStats `json:"stages"` 104 105 // Total timing for all query stages **including transfer**. 106 TotalTiming float64 `json:"latency"` 107 108 // Total number of batches. 109 NumBatches int `json:"batches"` 110 // Total number of records processed on GPU. 111 // A record could represent multiple data record if firstColumn is compressed. 112 NumRecords int `json:"records"` 113 114 // For archive batch, we skip process empty batch. For live batch, we will skip it 115 // if its min or max value does not pass main table filters or time filters. 116 NumBatchSkipped int `json:"numBatchSkipped"` 117 118 // Stats for input data transferred via PCIe. 119 BytesTransferred int `json:"tranBytes"` 120 NumTransferCalls int `json:"tranCalls"` 121 } 122 123 // NumRows implements the utils.TableDataSource for stats. 124 func (stats oopkQueryStats) NumRows() int { 125 return len(stats.stageStats) 126 } 127 128 // GetValue implements the utils.TableDataSource for stats. **Notes** row boundary 129 // are not checked! 130 func (stats oopkQueryStats) GetValue(row, col int) interface{} { 131 rowValue := stats.stageStats[row] 132 switch col { 133 case 0: 134 return rowValue.name 135 case 1: 136 return rowValue.avg 137 case 2: 138 return rowValue.max 139 case 3: 140 return rowValue.min 141 case 4: 142 return rowValue.count 143 case 5: 144 return rowValue.total 145 case 6: 146 return fmt.Sprintf("%.2f%%", rowValue.percentage*100) 147 } 148 return nil 149 } 150 151 // ColumnHeaders implements the utils.TableDataSource for stats. 152 func (stats oopkQueryStats) ColumnHeaders() []string { 153 return []string{"stage", "avg", "max", "minCallName", "count", "total", "percentage"} 154 } 155 156 // reportTimingForCurrentBatch will first wait for current cuda stream if the debug mode is set and change the timing stat accordingly. 157 // It will add to the total timing as well. Therefore this function should only be called one time for each stage. 158 func (qc *AQLQueryContext) reportTimingForCurrentBatch(stream unsafe.Pointer, start *time.Time, name stageName) { 159 if qc.Debug { 160 memutils.WaitForCudaStream(stream, qc.Device) 161 now := utils.Now() 162 value := now.Sub(*start).Seconds() * 1000 163 qc.OOPK.currentBatch.stats.timings[name] = value 164 qc.OOPK.currentBatch.stats.totalTiming += value 165 *start = now 166 } 167 } 168 169 // reportTimingForCurrentBatch is similar to reportTimingForCurrentBatch except that it modifies the query stats for the 170 // whole query. It's usually should be called once for each stage 171 func (qc *AQLQueryContext) reportTiming(stream unsafe.Pointer, start *time.Time, name stageName) { 172 if qc.Debug { 173 if stream != nil { 174 memutils.WaitForCudaStream(stream, qc.Device) 175 } 176 now := utils.Now() 177 value := now.Sub(*start).Seconds() * 1000 178 queryStats := &qc.OOPK.LiveBatchStats 179 queryStats.applyStageStats(name, value) 180 *start = now 181 } 182 } 183 184 // applyStageStats applies the stage stats to the overall query stats and compute max,minCallName and total for that 185 // stage. 186 func (stats *oopkQueryStats) applyStageStats(name stageName, value float64) { 187 if _, ok := stats.Name2Stage[name]; !ok { 188 stats.Name2Stage[name] = &oopkStageSummaryStats{name: name, max: -1, min: math.MaxFloat64} 189 } 190 stageStats := stats.Name2Stage[name] 191 stageStats.max = math.Max(stageStats.max, value) 192 stageStats.min = math.Min(stageStats.max, value) 193 stageStats.count++ 194 stageStats.total += value 195 stats.TotalTiming += value 196 } 197 198 // applyBatchStats applies the current batch stats onto the overall query stats. It computes information 199 // like max, minCallName, average for each stage as well as the percentage. 200 func (stats *oopkQueryStats) applyBatchStats(batchStats oopkBatchStats) { 201 for name, value := range batchStats.timings { 202 stats.applyStageStats(name, value) 203 } 204 stats.NumBatches++ 205 stats.NumRecords += batchStats.batchSize 206 stats.BytesTransferred += batchStats.bytesTransferred 207 stats.NumTransferCalls += batchStats.numTransferCalls 208 } 209 210 // writeToLog writes the summary stats for this query in a tabular format to logger. 211 func (stats *oopkQueryStats) writeToLog() { 212 if stats.NumBatches+stats.NumBatchSkipped > 0 { 213 // Compute average and percentage. 214 stats.stageStats = make([]*oopkStageSummaryStats, 0, len(stats.Name2Stage)) 215 for _, oopkStageStats := range stats.Name2Stage { 216 oopkStageStats.avg = oopkStageStats.total / float64(stats.NumBatches) 217 oopkStageStats.percentage = oopkStageStats.total / stats.TotalTiming 218 stats.stageStats = append(stats.stageStats, oopkStageStats) 219 } 220 sort.Sort(sort.Reverse(stageSummaryStatsSlice(stats.stageStats))) 221 222 utils.GetQueryLogger().Infof("Total timing: %f", stats.TotalTiming) 223 utils.GetQueryLogger().Infof("Num batches: %d", stats.NumBatches) 224 utils.GetQueryLogger().Infof("Num batches skipped: %d", stats.NumBatchSkipped) 225 // Create tabular output. 226 summary := utils.WriteTable(stats) 227 utils.GetQueryLogger().Info("\n" + summary) 228 } 229 } 230 231 // reportBatch will report OOPK batch related stats to the query logger. 232 func (qc *AQLQueryContext) reportBatch(isArchiveBatch bool) { 233 if qc.Debug { 234 batchType := "live batch" 235 if isArchiveBatch { 236 batchType = "archive batch" 237 } 238 stats := qc.OOPK.currentBatch.stats 239 utils.GetQueryLogger(). 240 With( 241 "timings", stats.timings, 242 "total", stats.totalTiming, 243 "batchID", stats.batchID, 244 "batchSize", stats.batchSize, 245 "batchType", batchType, 246 ).Infof("Query stats") 247 if isArchiveBatch { 248 qc.OOPK.ArchiveBatchStats.applyBatchStats(stats) 249 } else { 250 qc.OOPK.LiveBatchStats.applyBatchStats(stats) 251 } 252 } 253 }