github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/stats.go (about)

     1  //  Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package query
    16  
    17  import (
    18  	"encoding/json"
    19  	"fmt"
    20  	"math"
    21  	"sort"
    22  	"time"
    23  	"unsafe"
    24  
    25  	"github.com/uber/aresdb/memutils"
    26  	"github.com/uber/aresdb/utils"
    27  )
    28  
    29  // stageName represents each query stage.
    30  type stageName string
    31  
    32  const (
    33  	prepareForeignTableTiming     stageName = "prepareForeignTable"
    34  	transferTiming                          = "transfer"
    35  	prepareForFilteringTiming               = "prepareForFiltering"
    36  	initIndexVectorTiming                   = "initIndexVector"
    37  	filterEvalTiming                        = "filterEval"
    38  	prepareForeignRecordIDsTiming           = "prepareForeignRecordIDs"
    39  	foreignTableFilterEvalTiming            = "foreignTableFilterEval"
    40  	geoIntersectEvalTiming                  = "geoIntersectEval"
    41  	prepareForDimAndMeasureTiming           = "prepareForDimAndMeasure"
    42  	dimEvalTiming                           = "dimEval"
    43  	measureEvalTiming                       = "measureEval"
    44  	hllEvalTiming                           = "hllEval"
    45  	sortEvalTiming                          = "sortEval"
    46  	reduceEvalTiming                        = "reduceEval"
    47  	expandEvalTiming                        = "expandEval"
    48  	cleanupTiming                           = "cleanUpEval"
    49  	resultTransferTiming                    = "resultTransfer"
    50  	finalCleanupTiming                      = "finalCleanUp"
    51  )
    52  
    53  // oopkBatchStats stores stats for a single batch execution.
    54  type oopkBatchStats struct {
    55  	// Store timings for each stage of a single batch.
    56  	timings map[stageName]float64
    57  	// totalTiming for this batch.
    58  	totalTiming      float64
    59  	batchID          int32
    60  	batchSize        int
    61  	bytesTransferred int
    62  	numTransferCalls int
    63  }
    64  
    65  // oopkStageSummaryStats stores running info for each stage.
    66  type oopkStageSummaryStats struct {
    67  	name       stageName
    68  	max        float64
    69  	min        float64
    70  	avg        float64
    71  	count      int
    72  	total      float64
    73  	percentage float64
    74  }
    75  
    76  // MarshalJSON marshals the message to JSON in a custom way.
    77  func (s *oopkStageSummaryStats) MarshalJSON() ([]byte, error) {
    78  	return json.Marshal(s.total)
    79  }
    80  
    81  type stageSummaryStatsSlice []*oopkStageSummaryStats
    82  
    83  // Len implements sort.Sort interface for stageSummaryStatsSlice.
    84  func (s stageSummaryStatsSlice) Len() int {
    85  	return len(s)
    86  }
    87  
    88  // Swap implements sort.Sort interface for stageSummaryStatsSlice.
    89  func (s stageSummaryStatsSlice) Swap(i, j int) {
    90  	s[i], s[j] = s[j], s[i]
    91  }
    92  
    93  // Less implements sort.Sort interface for stageSummaryStatsSlice.
    94  func (s stageSummaryStatsSlice) Less(i, j int) bool {
    95  	return s[i].total < s[j].total
    96  }
    97  
    98  // oopkQueryStats stores the overall stats for a query.
    99  type oopkQueryStats struct {
   100  	// stats for each stage. Sorted by total time.
   101  	stageStats []*oopkStageSummaryStats
   102  	// mapping from stage name to stage stats.
   103  	Name2Stage map[stageName]*oopkStageSummaryStats `json:"stages"`
   104  
   105  	// Total timing for all query stages **including transfer**.
   106  	TotalTiming float64 `json:"latency"`
   107  
   108  	// Total number of batches.
   109  	NumBatches int `json:"batches"`
   110  	// Total number of records processed on GPU.
   111  	// A record could represent multiple data record if firstColumn is compressed.
   112  	NumRecords int `json:"records"`
   113  
   114  	// For archive batch, we skip process empty batch. For live batch, we will skip it
   115  	// if its min or max value does not pass main table filters or time filters.
   116  	NumBatchSkipped int `json:"numBatchSkipped"`
   117  
   118  	// Stats for input data transferred via PCIe.
   119  	BytesTransferred int `json:"tranBytes"`
   120  	NumTransferCalls int `json:"tranCalls"`
   121  }
   122  
   123  // NumRows implements the utils.TableDataSource for stats.
   124  func (stats oopkQueryStats) NumRows() int {
   125  	return len(stats.stageStats)
   126  }
   127  
   128  // GetValue implements the utils.TableDataSource for stats. **Notes** row boundary
   129  // are not checked!
   130  func (stats oopkQueryStats) GetValue(row, col int) interface{} {
   131  	rowValue := stats.stageStats[row]
   132  	switch col {
   133  	case 0:
   134  		return rowValue.name
   135  	case 1:
   136  		return rowValue.avg
   137  	case 2:
   138  		return rowValue.max
   139  	case 3:
   140  		return rowValue.min
   141  	case 4:
   142  		return rowValue.count
   143  	case 5:
   144  		return rowValue.total
   145  	case 6:
   146  		return fmt.Sprintf("%.2f%%", rowValue.percentage*100)
   147  	}
   148  	return nil
   149  }
   150  
   151  // ColumnHeaders implements the utils.TableDataSource for stats.
   152  func (stats oopkQueryStats) ColumnHeaders() []string {
   153  	return []string{"stage", "avg", "max", "minCallName", "count", "total", "percentage"}
   154  }
   155  
   156  // reportTimingForCurrentBatch will first wait for current cuda stream if the debug mode is set and change the timing stat accordingly.
   157  // It will add to the total timing as well. Therefore this function should only be called one time for each stage.
   158  func (qc *AQLQueryContext) reportTimingForCurrentBatch(stream unsafe.Pointer, start *time.Time, name stageName) {
   159  	if qc.Debug {
   160  		memutils.WaitForCudaStream(stream, qc.Device)
   161  		now := utils.Now()
   162  		value := now.Sub(*start).Seconds() * 1000
   163  		qc.OOPK.currentBatch.stats.timings[name] = value
   164  		qc.OOPK.currentBatch.stats.totalTiming += value
   165  		*start = now
   166  	}
   167  }
   168  
   169  // reportTimingForCurrentBatch is similar to reportTimingForCurrentBatch except that it modifies the query stats for the
   170  // whole query. It's usually should be called once for each stage
   171  func (qc *AQLQueryContext) reportTiming(stream unsafe.Pointer, start *time.Time, name stageName) {
   172  	if qc.Debug {
   173  		if stream != nil {
   174  			memutils.WaitForCudaStream(stream, qc.Device)
   175  		}
   176  		now := utils.Now()
   177  		value := now.Sub(*start).Seconds() * 1000
   178  		queryStats := &qc.OOPK.LiveBatchStats
   179  		queryStats.applyStageStats(name, value)
   180  		*start = now
   181  	}
   182  }
   183  
   184  // applyStageStats applies the stage stats to the overall query stats and compute max,minCallName and total for that
   185  // stage.
   186  func (stats *oopkQueryStats) applyStageStats(name stageName, value float64) {
   187  	if _, ok := stats.Name2Stage[name]; !ok {
   188  		stats.Name2Stage[name] = &oopkStageSummaryStats{name: name, max: -1, min: math.MaxFloat64}
   189  	}
   190  	stageStats := stats.Name2Stage[name]
   191  	stageStats.max = math.Max(stageStats.max, value)
   192  	stageStats.min = math.Min(stageStats.max, value)
   193  	stageStats.count++
   194  	stageStats.total += value
   195  	stats.TotalTiming += value
   196  }
   197  
   198  // applyBatchStats applies the current batch stats onto the overall query stats. It computes information
   199  // like max, minCallName, average for each stage as well as the percentage.
   200  func (stats *oopkQueryStats) applyBatchStats(batchStats oopkBatchStats) {
   201  	for name, value := range batchStats.timings {
   202  		stats.applyStageStats(name, value)
   203  	}
   204  	stats.NumBatches++
   205  	stats.NumRecords += batchStats.batchSize
   206  	stats.BytesTransferred += batchStats.bytesTransferred
   207  	stats.NumTransferCalls += batchStats.numTransferCalls
   208  }
   209  
   210  // writeToLog writes the summary stats for this query in a tabular format to logger.
   211  func (stats *oopkQueryStats) writeToLog() {
   212  	if stats.NumBatches+stats.NumBatchSkipped > 0 {
   213  		// Compute average and percentage.
   214  		stats.stageStats = make([]*oopkStageSummaryStats, 0, len(stats.Name2Stage))
   215  		for _, oopkStageStats := range stats.Name2Stage {
   216  			oopkStageStats.avg = oopkStageStats.total / float64(stats.NumBatches)
   217  			oopkStageStats.percentage = oopkStageStats.total / stats.TotalTiming
   218  			stats.stageStats = append(stats.stageStats, oopkStageStats)
   219  		}
   220  		sort.Sort(sort.Reverse(stageSummaryStatsSlice(stats.stageStats)))
   221  
   222  		utils.GetQueryLogger().Infof("Total timing: %f", stats.TotalTiming)
   223  		utils.GetQueryLogger().Infof("Num batches: %d", stats.NumBatches)
   224  		utils.GetQueryLogger().Infof("Num batches skipped: %d", stats.NumBatchSkipped)
   225  		// Create tabular output.
   226  		summary := utils.WriteTable(stats)
   227  		utils.GetQueryLogger().Info("\n" + summary)
   228  	}
   229  }
   230  
   231  // reportBatch will report OOPK batch related stats to the query logger.
   232  func (qc *AQLQueryContext) reportBatch(isArchiveBatch bool) {
   233  	if qc.Debug {
   234  		batchType := "live batch"
   235  		if isArchiveBatch {
   236  			batchType = "archive batch"
   237  		}
   238  		stats := qc.OOPK.currentBatch.stats
   239  		utils.GetQueryLogger().
   240  			With(
   241  				"timings", stats.timings,
   242  				"total", stats.totalTiming,
   243  				"batchID", stats.batchID,
   244  				"batchSize", stats.batchSize,
   245  				"batchType", batchType,
   246  			).Infof("Query stats")
   247  		if isArchiveBatch {
   248  			qc.OOPK.ArchiveBatchStats.applyBatchStats(stats)
   249  		} else {
   250  			qc.OOPK.LiveBatchStats.applyBatchStats(stats)
   251  		}
   252  	}
   253  }