github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/aql_batchexecutor.go (about)

     1  //  Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package query
    16  
    17  import (
    18  	"fmt"
    19  	"github.com/uber/aresdb/memutils"
    20  	queryCom "github.com/uber/aresdb/query/common"
    21  	"time"
    22  	"unsafe"
    23  )
    24  
    25  // BatchExecutor is batch executor interface for both Non-aggregation query and Aggregation query
    26  type BatchExecutor interface {
    27  	// filter operation
    28  	filter()
    29  	// join operation
    30  	join()
    31  	// project of measure/select columns
    32  	project()
    33  	// reduce to sort and aggregate result
    34  	reduce()
    35  	// prepare work before execution
    36  	preExec(lastBatch bool, start time.Time)
    37  	// post execution after execution
    38  	postExec(start time.Time)
    39  }
    40  
    41  // DummyBatchExecutorImpl is a dummy executor which do nothing
    42  type DummyBatchExecutorImpl struct {
    43  }
    44  
    45  // NewDummyBatchExecutor create a dummy BatchExecutor
    46  func NewDummyBatchExecutor() BatchExecutor {
    47  	return &DummyBatchExecutorImpl{}
    48  }
    49  
    50  func (e *DummyBatchExecutorImpl) filter() {
    51  }
    52  
    53  func (e *DummyBatchExecutorImpl) join() {
    54  }
    55  
    56  func (e *DummyBatchExecutorImpl) project() {
    57  }
    58  
    59  func (e *DummyBatchExecutorImpl) reduce() {
    60  }
    61  
    62  func (e *DummyBatchExecutorImpl) preExec(lastBatch bool, start time.Time) {
    63  }
    64  
    65  func (e *DummyBatchExecutorImpl) postExec(start time.Time) {
    66  }
    67  
    68  // BatchExecutorImpl is batch executor implementation for original aggregation query
    69  type BatchExecutorImpl struct {
    70  	qc                  *AQLQueryContext
    71  	batchID             int32
    72  	isLastBatch         bool
    73  	customFilterFunc    customFilterExecutor
    74  	stream              unsafe.Pointer
    75  	start               time.Time
    76  	sizeBeforeGeoFilter int
    77  }
    78  
    79  // NewBatchExecutor is to create a BatchExecutor.
    80  func NewBatchExecutor(qc *AQLQueryContext, batchID int32, customFilterFunc customFilterExecutor, stream unsafe.Pointer) BatchExecutor {
    81  	if qc.isNonAggregationQuery {
    82  		return &NonAggrBatchExecutorImpl{
    83  			BatchExecutorImpl: &BatchExecutorImpl{
    84  				qc:               qc,
    85  				batchID:          batchID,
    86  				customFilterFunc: customFilterFunc,
    87  				stream:           stream,
    88  			},
    89  		}
    90  	}
    91  
    92  	return &BatchExecutorImpl{
    93  		qc:               qc,
    94  		batchID:          batchID,
    95  		customFilterFunc: customFilterFunc,
    96  		stream:           stream,
    97  	}
    98  }
    99  
   100  // filter
   101  func (e *BatchExecutorImpl) filter() {
   102  	// process main table common filter
   103  	e.qc.doProfile(func() {
   104  		for _, filter := range e.qc.OOPK.MainTableCommonFilters {
   105  			e.qc.OOPK.currentBatch.processExpression(filter, nil,
   106  				e.qc.TableScanners, e.qc.OOPK.foreignTables, e.stream, e.qc.Device, e.qc.OOPK.currentBatch.filterAction)
   107  		}
   108  		e.customFilterFunc(e.stream)
   109  		e.qc.reportTimingForCurrentBatch(e.stream, &e.start, filterEvalTiming)
   110  	}, "filters", e.stream)
   111  }
   112  
   113  // join
   114  func (e *BatchExecutorImpl) join() {
   115  	e.qc.doProfile(func() {
   116  		// join foreign tables
   117  		for joinTableID, foreignTable := range e.qc.OOPK.foreignTables {
   118  			if foreignTable != nil {
   119  				// prepare foreign table recordIDs
   120  				// Note:
   121  				// RecordID {
   122  				//   int32_t batchID
   123  				// 	 uint32_t index
   124  				// }
   125  				// takes up 8 bytes
   126  				e.qc.OOPK.currentBatch.foreignTableRecordIDsD = append(e.qc.OOPK.currentBatch.foreignTableRecordIDsD, deviceAllocate(8*e.qc.OOPK.currentBatch.size, e.qc.Device))
   127  				mainTableJoinColumnIndex := e.qc.TableScanners[0].ColumnsByIDs[foreignTable.remoteJoinColumn.ColumnID]
   128  				// perform hash lookup
   129  				e.qc.OOPK.currentBatch.prepareForeignRecordIDs(mainTableJoinColumnIndex, joinTableID, *foreignTable, e.stream, e.qc.Device)
   130  			}
   131  		}
   132  		e.qc.reportTimingForCurrentBatch(e.stream, &e.start, prepareForeignRecordIDsTiming)
   133  	}, "joins", e.stream)
   134  
   135  	e.qc.doProfile(func() {
   136  		// process filters that involves foreign table columns if any
   137  		for _, filter := range e.qc.OOPK.ForeignTableCommonFilters {
   138  			e.qc.OOPK.currentBatch.processExpression(filter, nil,
   139  				e.qc.TableScanners, e.qc.OOPK.foreignTables, e.stream, e.qc.Device, e.qc.OOPK.currentBatch.filterAction)
   140  		}
   141  		e.qc.reportTimingForCurrentBatch(e.stream, &e.start, foreignTableFilterEvalTiming)
   142  	}, "filters", e.stream)
   143  
   144  	if e.qc.OOPK.geoIntersection != nil {
   145  		// allocate two predicate vector for geo intersect
   146  		numWords := (e.qc.OOPK.geoIntersection.numShapes + 31) / 32
   147  		e.qc.OOPK.currentBatch.geoPredicateVectorD = deviceAllocate(e.qc.OOPK.currentBatch.size*4*numWords, e.qc.Device)
   148  	}
   149  
   150  	e.sizeBeforeGeoFilter = e.qc.OOPK.currentBatch.size
   151  	e.qc.doProfile(func() {
   152  		if e.qc.OOPK.geoIntersection != nil {
   153  			pointColumnIndex := e.qc.TableScanners[e.qc.OOPK.geoIntersection.pointTableID].
   154  				ColumnsByIDs[e.qc.OOPK.geoIntersection.pointColumnID]
   155  			e.qc.OOPK.currentBatch.geoIntersect(
   156  				e.qc.OOPK.geoIntersection,
   157  				pointColumnIndex,
   158  				e.qc.OOPK.foreignTables,
   159  				e.qc.OOPK.currentBatch.geoPredicateVectorD,
   160  				e.stream, e.qc.Device)
   161  		}
   162  		e.qc.reportTimingForCurrentBatch(e.stream, &e.start, geoIntersectEvalTiming)
   163  	}, "geo_intersect", e.stream)
   164  }
   165  
   166  // evalMeasures is to fill measure values
   167  func (e *BatchExecutorImpl) evalMeasures() {
   168  	// measure evaluation.
   169  	e.qc.doProfile(func() {
   170  		measureExprRootAction := e.qc.OOPK.currentBatch.makeWriteToMeasureVectorAction(e.qc.OOPK.AggregateType, e.qc.OOPK.MeasureBytes)
   171  		e.qc.OOPK.currentBatch.processExpression(e.qc.OOPK.Measure, nil, e.qc.TableScanners, e.qc.OOPK.foreignTables, e.stream, e.qc.Device, measureExprRootAction)
   172  		e.qc.reportTimingForCurrentBatch(e.stream, &e.start, measureEvalTiming)
   173  	}, "measure", e.stream)
   174  }
   175  
   176  // evalDimensions is to fill dimension values
   177  func (e *BatchExecutorImpl) evalDimensions(prevResultSize int) {
   178  	// dimension expression evaluation.
   179  	for dimIndex, dimension := range e.qc.OOPK.Dimensions {
   180  		e.qc.doProfile(func() {
   181  			dimVectorIndex := e.qc.OOPK.DimensionVectorIndex[dimIndex]
   182  			dimValueOffset, dimNullOffset := queryCom.GetDimensionStartOffsets(e.qc.OOPK.NumDimsPerDimWidth, dimVectorIndex, e.qc.OOPK.currentBatch.resultCapacity)
   183  			if e.qc.OOPK.geoIntersection != nil && e.qc.OOPK.geoIntersection.dimIndex == dimIndex {
   184  				e.qc.OOPK.currentBatch.writeGeoShapeDim(
   185  					e.qc.OOPK.geoIntersection, e.qc.OOPK.currentBatch.geoPredicateVectorD,
   186  					dimValueOffset, dimNullOffset, e.sizeBeforeGeoFilter, prevResultSize, e.stream, e.qc.Device)
   187  			} else {
   188  				dimensionExprRootAction := e.qc.OOPK.currentBatch.makeWriteToDimensionVectorAction(dimValueOffset, dimNullOffset, prevResultSize)
   189  				e.qc.OOPK.currentBatch.processExpression(dimension, nil,
   190  					e.qc.TableScanners, e.qc.OOPK.foreignTables, e.stream, e.qc.Device, dimensionExprRootAction)
   191  			}
   192  		}, fmt.Sprintf("dim%d", dimIndex), e.stream)
   193  	}
   194  
   195  	e.qc.reportTimingForCurrentBatch(e.stream, &e.start, dimEvalTiming)
   196  }
   197  
   198  // project is to generate dimension and measure values
   199  func (e *BatchExecutorImpl) project() {
   200  	// Prepare for dimension and measure evaluation.
   201  	e.qc.OOPK.currentBatch.prepareForDimAndMeasureEval(e.qc.OOPK.DimRowBytes, e.qc.OOPK.MeasureBytes, e.qc.OOPK.NumDimsPerDimWidth, e.qc.OOPK.IsHLL(), e.stream)
   202  
   203  	e.qc.reportTimingForCurrentBatch(e.stream, &e.start, prepareForDimAndMeasureTiming)
   204  
   205  	e.evalDimensions(e.qc.OOPK.currentBatch.resultSize)
   206  
   207  	e.evalMeasures()
   208  
   209  	// wait for stream to clean up non used buffer before final aggregation
   210  	memutils.WaitForCudaStream(e.stream, e.qc.Device)
   211  	e.qc.OOPK.currentBatch.cleanupBeforeAggregation()
   212  }
   213  
   214  // reduce is to aggregate measures based on dimensions and aggregation function
   215  func (e *BatchExecutorImpl) reduce() {
   216  	// init dimIndexVectorD for sorting and reducing
   217  	if e.qc.OOPK.IsHLL() {
   218  		initIndexVector(e.qc.OOPK.currentBatch.dimIndexVectorD[0].getPointer(), 0, e.qc.OOPK.currentBatch.resultSize, e.stream, e.qc.Device)
   219  		initIndexVector(e.qc.OOPK.currentBatch.dimIndexVectorD[1].getPointer(), e.qc.OOPK.currentBatch.resultSize, e.qc.OOPK.currentBatch.resultSize+e.qc.OOPK.currentBatch.size, e.stream, e.qc.Device)
   220  	} else {
   221  		initIndexVector(e.qc.OOPK.currentBatch.dimIndexVectorD[0].getPointer(), 0, e.qc.OOPK.currentBatch.resultSize+e.qc.OOPK.currentBatch.size, e.stream, e.qc.Device)
   222  	}
   223  
   224  	if e.qc.OOPK.IsHLL() {
   225  		e.qc.doProfile(func() {
   226  			e.qc.OOPK.hllVectorD, e.qc.OOPK.hllDimRegIDCountD, e.qc.OOPK.hllVectorSize =
   227  				e.qc.OOPK.currentBatch.hll(e.qc.OOPK.NumDimsPerDimWidth, e.isLastBatch, e.stream, e.qc.Device)
   228  			e.qc.reportTimingForCurrentBatch(e.stream, &e.start, hllEvalTiming)
   229  		}, "hll", e.stream)
   230  	} else {
   231  		// sort by key.
   232  		e.qc.doProfile(func() {
   233  			e.qc.OOPK.currentBatch.sortByKey(e.qc.OOPK.NumDimsPerDimWidth, e.stream, e.qc.Device)
   234  			e.qc.reportTimingForCurrentBatch(e.stream, &e.start, sortEvalTiming)
   235  		}, "sort", e.stream)
   236  
   237  		// reduce by key.
   238  		e.qc.doProfile(func() {
   239  			e.qc.OOPK.currentBatch.reduceByKey(e.qc.OOPK.NumDimsPerDimWidth, e.qc.OOPK.MeasureBytes, e.qc.OOPK.AggregateType, e.stream, e.qc.Device)
   240  			e.qc.reportTimingForCurrentBatch(e.stream, &e.start, reduceEvalTiming)
   241  		}, "reduce", e.stream)
   242  	}
   243  	memutils.WaitForCudaStream(e.stream, e.qc.Device)
   244  }
   245  
   246  func (e *BatchExecutorImpl) preExec(isLastBatch bool, start time.Time) {
   247  	e.isLastBatch = isLastBatch
   248  	// initialize index vector.
   249  	if !e.qc.OOPK.currentBatch.indexVectorD.isNull() {
   250  		initIndexVector(e.qc.OOPK.currentBatch.indexVectorD.getPointer(), 0, e.qc.OOPK.currentBatch.size, e.stream, e.qc.Device)
   251  	}
   252  	e.qc.reportTimingForCurrentBatch(e.stream, &start, initIndexVectorTiming)
   253  }
   254  
   255  func (e *BatchExecutorImpl) postExec(start time.Time) {
   256  	// swap result buffer before next batch
   257  	e.qc.OOPK.currentBatch.swapResultBufferForNextBatch()
   258  	e.qc.reportTimingForCurrentBatch(e.stream, &start, cleanupTiming)
   259  	e.qc.reportBatch(e.batchID > 0)
   260  
   261  	// Only profile one batch.
   262  	e.qc.Profiling = ""
   263  }