github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/time_bucketizer.go (about)

     1  //  Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package query
    16  
    17  import (
    18  	"fmt"
    19  	"github.com/uber/aresdb/query/common"
    20  	"github.com/uber/aresdb/query/expr"
    21  	"github.com/uber/aresdb/utils"
    22  	"strconv"
    23  	"strings"
    24  	"time"
    25  )
    26  
    27  // used to convert supported time units (string) to single char format
    28  var bucketSizeToNormalized = map[string]string{
    29  	"minutes": "m",
    30  	"minute":  "m",
    31  	"day":     "d",
    32  	"hours":   "h",
    33  	"hour":    "h",
    34  }
    35  
    36  // mapping from bucketizer to the functor to get the start of the bucketizer.
    37  var irregularBucketizer2Functor = map[string]expr.Token{
    38  	"month":   expr.GET_MONTH_START,
    39  	"quarter": expr.GET_QUARTER_START,
    40  	"year":    expr.GET_YEAR_START,
    41  	"week":    expr.GET_WEEK_START,
    42  }
    43  
    44  // regularRecurringTimeBucketizer is in the format of "x of y" where y is a regular time interval of which number of
    45  // seconds is fixed, e.g (week, day, hour). Y is called the bucket Size and x is called base Unit.
    46  type regularRecurringTimeBucketizer struct {
    47  	baseUnit   int
    48  	bucketSize int
    49  }
    50  
    51  // mapping from regular recurring time bucketizer str to base Unit and bucket Size.
    52  var tbStr2regularRecurringTimeBucketizer = map[string]regularRecurringTimeBucketizer{
    53  	"time of day":  {baseUnit: 1, bucketSize: common.SecondsPerDay},
    54  	"hour of day":  {baseUnit: common.SecondsPerHour, bucketSize: common.SecondsPerDay},
    55  	"hour of week": {baseUnit: common.SecondsPerHour, bucketSize: common.SecondsPerWeek},
    56  	"day of week":  {baseUnit: common.SecondsPerDay, bucketSize: common.SecondsPerWeek},
    57  }
    58  
    59  //
    60  var irregularRecurringBucketizer2Functor = map[string]expr.Token{
    61  	"day of month":    expr.GET_DAY_OF_MONTH,
    62  	"day of year":     expr.GET_DAY_OF_YEAR,
    63  	"month of year":   expr.GET_MONTH_OF_YEAR,
    64  	"quarter of year": expr.GET_QUARTER_OF_YEAR,
    65  }
    66  
    67  // buildTimeDimensionExpr constructs sub ast based on several query params:
    68  // the time bucketizer string and the timezone string.
    69  // we parse time bucketizer into bucketInSeconds, for timezone string:
    70  // if fixed (non-UTC) timezone is passed in, we extend the ast to `(timeColumn CONVERT_TZ fixed_timezone_offset) FLOOR bucketInSeconds`
    71  // if timezoneColumn exists, we extend the ast to `(timeColumn CONVERT_TZ timezoneColumn) FLOOR bucketInSeconds`
    72  func (qc *AQLQueryContext) buildTimeDimensionExpr(timeBucketizerString string, timeColumn expr.Expr) (expr.Expr, error) {
    73  	var bucketizerExpr expr.Expr
    74  	var err error
    75  	timeColumnWithOffsetExpr := timeColumn
    76  
    77  	// construct TimeSeriesBucketizer expr
    78  	if qc.timezoneTable.tableColumn != "" {
    79  		var timezoneTableID, timezoneColumnID int
    80  		timezoneColumn := fmt.Sprintf("%s.%s", qc.timezoneTable.tableAlias, qc.timezoneTable.tableColumn)
    81  		timezoneTableID = qc.TableIDByAlias[qc.timezoneTable.tableAlias]
    82  		timezoneColumnID = qc.TableScanners[timezoneTableID].Schema.ColumnIDs[qc.timezoneTable.tableColumn]
    83  		// expand ast by offsetting timezone column
    84  		timeColumnWithOffsetExpr = &expr.BinaryExpr{
    85  			Op:  expr.CONVERT_TZ,
    86  			LHS: timeColumn,
    87  			RHS: &expr.VarRef{
    88  				Val:      timezoneColumn,
    89  				TableID:  timezoneTableID,
    90  				ColumnID: timezoneColumnID,
    91  			},
    92  		}
    93  	} else if qc.fixedTimezone.String() != time.UTC.String() {
    94  		_, fromOffset := qc.fromTime.Time.Zone()
    95  		_, toOffset := qc.toTime.Time.Zone()
    96  		if fromOffset != toOffset {
    97  			offsetDiff := fromOffset - toOffset
    98  			switchTs, err := utils.CalculateDSTSwitchTs(qc.fromTime.Time.Unix(), qc.toTime.Time.Unix(), qc.fixedTimezone)
    99  			if err != nil {
   100  				return nil, err
   101  			}
   102  			qc.dstswitch = switchTs
   103  			// simulate IF statement. sub ast: timeCol + fromOffset + (timeCol > switchTs) * offsetDiff
   104  			// where (timeCol > switchTs) will return 1 or 0
   105  			timeColumnWithOffsetExpr = &expr.BinaryExpr{
   106  				Op:  expr.ADD,
   107  				LHS: timeColumn,
   108  				RHS: &expr.BinaryExpr{
   109  					Op: expr.ADD,
   110  					LHS: &expr.NumberLiteral{
   111  						Expr:     strconv.Itoa(fromOffset),
   112  						Int:      fromOffset,
   113  						ExprType: expr.Signed,
   114  					},
   115  					RHS: &expr.BinaryExpr{
   116  						Op: expr.MUL,
   117  						LHS: &expr.NumberLiteral{
   118  							Expr:     strconv.Itoa(offsetDiff),
   119  							Int:      offsetDiff,
   120  							ExprType: expr.Signed,
   121  						},
   122  						RHS: &expr.BinaryExpr{
   123  							Op:  expr.GTE,
   124  							LHS: timeColumn,
   125  							RHS: &expr.NumberLiteral{
   126  								Expr: strconv.Itoa(int(switchTs)),
   127  								Int:  int(switchTs),
   128  							},
   129  							ExprType: expr.Boolean,
   130  						},
   131  						ExprType: expr.Signed,
   132  					},
   133  				},
   134  			}
   135  		} else {
   136  			timeColumnWithOffsetExpr = &expr.BinaryExpr{
   137  				Op:  expr.CONVERT_TZ,
   138  				LHS: timeColumn,
   139  				RHS: &expr.NumberLiteral{
   140  					Expr: strconv.Itoa(fromOffset),
   141  					Int:  fromOffset,
   142  				},
   143  			}
   144  		}
   145  
   146  	}
   147  
   148  	bucketizerExpr, err = parseRecurringTimeBucketizer(timeBucketizerString, timeColumnWithOffsetExpr)
   149  	if err != nil || bucketizerExpr != nil {
   150  		return bucketizerExpr, err
   151  	}
   152  
   153  	if bucketizerExpr = parseIrregularTimeBucketizer(timeBucketizerString, timeColumnWithOffsetExpr); bucketizerExpr != nil {
   154  		return bucketizerExpr, nil
   155  	}
   156  
   157  	timeBucket, err := common.ParseRegularTimeBucketizer(timeBucketizerString)
   158  	if err != nil {
   159  		return nil, err
   160  	}
   161  	bucketInSeconds := timeBucket.Size * common.BucketSizeToseconds[timeBucket.Unit]
   162  
   163  	bucketizerExpr = &expr.BinaryExpr{
   164  		Op:  expr.FLOOR,
   165  		LHS: timeColumnWithOffsetExpr,
   166  		RHS: &expr.NumberLiteral{
   167  			Expr:     strconv.Itoa(bucketInSeconds),
   168  			Int:      bucketInSeconds,
   169  			ExprType: expr.Unsigned,
   170  		},
   171  	}
   172  
   173  	return bucketizerExpr, nil
   174  }
   175  
   176  // getRegularRecurringTimeBucketizer converts a time bucketizer string to a regularRecurringTimeBucketizer struct.
   177  // Nil means it does not match.
   178  func getRegularRecurringTimeBucketizer(tbStr string) (*regularRecurringTimeBucketizer, error) {
   179  	if strings.HasSuffix(tbStr, "minutes of day") {
   180  		comps := strings.Fields(tbStr)
   181  		if len(comps) < 4 {
   182  			return nil, utils.StackError(nil, "Must put number before minutes of day: got %s", tbStr)
   183  		}
   184  		n, err := strconv.Atoi(comps[0])
   185  		if err != nil {
   186  			return nil, utils.StackError(err, "Cannot parse the number before minutes of day: got %s", tbStr)
   187  		}
   188  
   189  		if n < 2 || n > 30 || 30%n != 0 {
   190  			return nil, utils.StackError(err, "Only {2,3,4,5,6,10,15,20,30} minutes of day are allowed :"+
   191  				" got %s", tbStr)
   192  		}
   193  		return &regularRecurringTimeBucketizer{baseUnit: 60 * n, bucketSize: common.SecondsPerDay}, nil
   194  	}
   195  
   196  	if tb, ok := tbStr2regularRecurringTimeBucketizer[tbStr]; ok {
   197  		return &tb, nil
   198  	}
   199  	return nil, nil
   200  }
   201  
   202  // parseRecurringTimeBucketizer parses the time bucketizer string into a composite expression tree if it's a recurring
   203  // time bucketizer. The tree will be like floor((timeColumn % bucketSize), unitSize) if it's a regular recurring
   204  // time bucketizer, e.g.(each individual time duration contains the same amount of seconds). Otherwise it will
   205  // return a AST of a special function call. E.g. getDayOfMonth.
   206  func parseRecurringTimeBucketizer(timeBucketizerString string, timeColumnExpr expr.Expr) (expr.Expr, error) {
   207  	tb, err := getRegularRecurringTimeBucketizer(timeBucketizerString)
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  
   212  	if tb != nil {
   213  		var e expr.Expr
   214  		if tb.baseUnit > 1 {
   215  			adjustedTimeExpr := timeColumnExpr
   216  			// if bucket size is equal to week, we need to adjust it to Monday by subtracting number of seconds per
   217  			// 4 days (since 1970-01-01 is a Thursday).
   218  			if tb.bucketSize == common.SecondsPerWeek {
   219  				adjustedTimeExpr = &expr.BinaryExpr{
   220  					Op:  expr.SUB,
   221  					LHS: adjustedTimeExpr,
   222  					RHS: &expr.NumberLiteral{
   223  						Expr:     strconv.Itoa(common.SecondsPer4Day),
   224  						Int:      common.SecondsPer4Day,
   225  						ExprType: expr.Unsigned,
   226  					},
   227  				}
   228  			}
   229  
   230  			e = &expr.BinaryExpr{
   231  				Op: expr.FLOOR,
   232  				LHS: &expr.BinaryExpr{
   233  					Op:  expr.MOD,
   234  					LHS: adjustedTimeExpr,
   235  					RHS: &expr.NumberLiteral{
   236  						Expr:     strconv.Itoa(tb.bucketSize),
   237  						Int:      tb.bucketSize,
   238  						ExprType: expr.Unsigned,
   239  					},
   240  				},
   241  				RHS: &expr.NumberLiteral{
   242  					Expr:     strconv.Itoa(tb.baseUnit),
   243  					Int:      tb.baseUnit,
   244  					ExprType: expr.Unsigned,
   245  				},
   246  			}
   247  		} else {
   248  			e = &expr.BinaryExpr{
   249  				Op:  expr.MOD,
   250  				LHS: timeColumnExpr,
   251  				RHS: &expr.NumberLiteral{
   252  					Expr:     strconv.Itoa(tb.bucketSize),
   253  					Int:      tb.bucketSize,
   254  					ExprType: expr.Unsigned,
   255  				},
   256  			}
   257  		}
   258  
   259  		// if base unit >= day, we need to divide it by the base unit.
   260  		if tb.baseUnit >= common.SecondsPerDay {
   261  			// For division, everything is converted to float.
   262  			val := float64(tb.baseUnit)
   263  			e = &expr.BinaryExpr{
   264  				Op:  expr.DIV,
   265  				LHS: e,
   266  				RHS: &expr.NumberLiteral{
   267  					Expr:     strconv.FormatFloat(val, 'f', 2, 64),
   268  					Val:      val,
   269  					ExprType: expr.Float,
   270  				},
   271  			}
   272  		}
   273  		return e, nil
   274  	}
   275  
   276  	if functorToken, ok := irregularRecurringBucketizer2Functor[timeBucketizerString]; ok {
   277  		return &expr.UnaryExpr{
   278  			Op:       functorToken,
   279  			Expr:     timeColumnExpr,
   280  			ExprType: expr.Unsigned,
   281  		}, nil
   282  	}
   283  
   284  	return nil, nil
   285  }
   286  
   287  // parseIrregularTimeBucketizer parses the time bucketizer into a UnaryExpr with the corresponding functor as the OP
   288  // node and original time column expression as the call argument. Return nil if it is not a irregular time series
   289  // bucketizer.
   290  func parseIrregularTimeBucketizer(timeBucketizerString string, timeColumn expr.Expr) expr.Expr {
   291  	if functorToken, ok := irregularBucketizer2Functor[timeBucketizerString]; ok {
   292  		return &expr.UnaryExpr{
   293  			Op:       functorToken,
   294  			Expr:     timeColumn,
   295  			ExprType: expr.Unsigned,
   296  		}
   297  	}
   298  	return nil
   299  }