github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/querytracker/snhasher.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package querytracker
    18  
    19  import (
    20  	"fmt"
    21  	"sort"
    22  	"strings"
    23  
    24  	"github.com/cespare/xxhash"
    25  	"github.com/siglens/siglens/pkg/segment/structs"
    26  	"github.com/siglens/siglens/pkg/segment/utils"
    27  )
    28  
    29  /*
    30  	  ALGORITHM for creating a unique hash
    31  
    32  	   1. In each struct maintain a hid (hashid), it is pre-determined way of creating hash out of
    33  		  of elements of that struct in the sequence they are defined in the go files
    34  	   2. If a struct has sub-structs then we recursively call the hashid func
    35  	   3. if the hid is already present then we use it else we calculate it for that data type
    36  	   4. we keep doing recursive until we get base data types of go like int, bool, string, etc...
    37  	   5. This is a pretty standard way of creating id borrowed from the java world
    38  */
    39  func GetHashForQuery(n *structs.SearchNode) string {
    40  	return fmt.Sprintf("%v", getHashForSearchNode(n))
    41  }
    42  
    43  func GetHashForAggs(a *structs.QueryAggregators) string {
    44  	return fmt.Sprintf("%v", getHashForAggregators(a))
    45  }
    46  
    47  func getHashForAggregators(a *structs.QueryAggregators) uint64 {
    48  
    49  	if a == nil {
    50  		return 0
    51  	}
    52  
    53  	// the only parts of aggs we need to hash are the groupby configs
    54  	// the bucketing/sorting/early exiting does not change pqs vs not, agileTree vs not
    55  	val := fmt.Sprintf("%v:%v:%v",
    56  		getHashForGroupBy(a.GroupByRequest),
    57  		getHashForSegmentStats(a.MeasureOperations),
    58  		getHashForTimeHistogram(a.TimeHistogram),
    59  	)
    60  
    61  	return xxhash.Sum64String(val)
    62  }
    63  
    64  func getHashForSearchNode(sn *structs.SearchNode) uint64 {
    65  
    66  	if sn == nil {
    67  		return 0
    68  	}
    69  
    70  	val := fmt.Sprintf("%v:%v:%v",
    71  		getHashForSearchCondition(sn.AndSearchConditions),
    72  		getHashForSearchCondition(sn.OrSearchConditions),
    73  		getHashForSearchCondition(sn.ExclusionSearchConditions),
    74  	)
    75  
    76  	return xxhash.Sum64String(val)
    77  }
    78  
    79  func getHashForSearchCondition(sc *structs.SearchCondition) uint64 {
    80  
    81  	if sc == nil {
    82  		return 0
    83  	}
    84  
    85  	sqhids := make([]uint64, len(sc.SearchQueries))
    86  	for _, sq := range sc.SearchQueries {
    87  		sqhids = append(sqhids, getHashForSearchQuery(sq))
    88  	}
    89  	sort.Slice(sqhids, func(i, j int) bool { return sqhids[i] < sqhids[j] })
    90  
    91  	snhids := make([]uint64, len(sc.SearchNode))
    92  	for _, sn := range sc.SearchNode {
    93  		snhids = append(snhids, getHashForSearchNode(sn))
    94  	}
    95  	sort.Slice(snhids, func(i, j int) bool { return snhids[i] < snhids[j] })
    96  
    97  	var sb strings.Builder
    98  	for _, entry := range sqhids {
    99  		sb.WriteString(fmt.Sprintf("%v:", entry))
   100  	}
   101  
   102  	for _, entry := range snhids {
   103  		sb.WriteString(fmt.Sprintf("%v:", entry))
   104  	}
   105  
   106  	return xxhash.Sum64String(sb.String())
   107  }
   108  
   109  func getHashForSearchQuery(sq *structs.SearchQuery) uint64 {
   110  
   111  	if sq == nil {
   112  		return 0
   113  	}
   114  
   115  	val := fmt.Sprintf("%v:%v:%v:%v",
   116  		getHashForSearchExpression(sq.ExpressionFilter),
   117  		getHashForMatchFilter(sq.MatchFilter),
   118  		sq.SearchType,
   119  		getHashForQueryInfo(sq.QueryInfo))
   120  	return xxhash.Sum64String(val)
   121  }
   122  
   123  func getHashForSearchExpression(se *structs.SearchExpression) uint64 {
   124  
   125  	if se == nil {
   126  		return 0
   127  	}
   128  
   129  	val := fmt.Sprintf("%v:%v:%v:%v",
   130  		getHashForSearchExpressionInput(se.LeftSearchInput),
   131  		se.FilterOp,
   132  		getHashForSearchExpressionInput(se.RightSearchInput),
   133  		getHashForSearchInfo(se.SearchInfo))
   134  	return xxhash.Sum64String(val)
   135  }
   136  
   137  func getHashForMatchFilter(mf *structs.MatchFilter) uint64 {
   138  
   139  	if mf == nil {
   140  		return 0
   141  	}
   142  
   143  	mwords := make([]string, len(mf.MatchWords))
   144  	for _, w := range mf.MatchWords {
   145  		mwords = append(mwords, string(w))
   146  	}
   147  
   148  	sort.Strings(mwords)
   149  
   150  	val := fmt.Sprintf("%v:%v:%v:%v:%v",
   151  		mf.MatchColumn,
   152  		mwords,
   153  		mf.MatchOperator,
   154  		mf.MatchPhrase,
   155  		mf.MatchType)
   156  
   157  	return xxhash.Sum64String(val)
   158  }
   159  
   160  func getHashForQueryInfo(qi *structs.QueryInfo) uint64 {
   161  
   162  	if qi == nil {
   163  		return 0
   164  	}
   165  
   166  	val := fmt.Sprintf("%v:%v",
   167  		qi.ColName,
   168  		getHashForDtypeEnclosure(qi.QValDte))
   169  
   170  	return xxhash.Sum64String(val)
   171  }
   172  
   173  func getHashForSearchInfo(si *structs.SearchInfo) uint64 {
   174  
   175  	if si == nil {
   176  		return 0
   177  	}
   178  
   179  	val := fmt.Sprintf("%v:%v",
   180  		si.ColEncoding,
   181  		getHashForDtypeEnclosure(si.QValDte))
   182  
   183  	return xxhash.Sum64String(val)
   184  }
   185  
   186  func getHashForDtypeEnclosure(dte *utils.DtypeEnclosure) uint64 {
   187  
   188  	if dte == nil {
   189  		return 0
   190  	}
   191  
   192  	var val string
   193  	switch dte.Dtype {
   194  	case utils.SS_DT_BOOL:
   195  		val = fmt.Sprintf("%v:%v", dte.Dtype, dte.BoolVal)
   196  	case utils.SS_DT_STRING:
   197  		val = fmt.Sprintf("%v:%v", dte.Dtype, dte.StringVal)
   198  	case utils.SS_DT_UNSIGNED_NUM:
   199  		val = fmt.Sprintf("%v:%v", dte.Dtype, dte.UnsignedVal)
   200  	case utils.SS_DT_SIGNED_NUM:
   201  		val = fmt.Sprintf("%v:%v", dte.Dtype, dte.SignedVal)
   202  	case utils.SS_DT_FLOAT:
   203  		val = fmt.Sprintf("%v:%v", dte.Dtype, dte.FloatVal)
   204  	}
   205  
   206  	return xxhash.Sum64String(val)
   207  }
   208  
   209  func getHashForSearchExpressionInput(sei *structs.SearchExpressionInput) uint64 {
   210  
   211  	if sei == nil {
   212  		return 0
   213  	}
   214  
   215  	val := fmt.Sprintf("%v:%v:%v",
   216  		sei.ColumnName,
   217  		getHashForExpression(sei.ComplexRelation),
   218  		getHashForDtypeEnclosure(sei.ColumnValue))
   219  
   220  	return xxhash.Sum64String(val)
   221  }
   222  
   223  func getHashForExpression(e *structs.Expression) uint64 {
   224  
   225  	if e == nil {
   226  		return 0
   227  	}
   228  
   229  	val := fmt.Sprintf("%v:%v:%v",
   230  		getHashForExpressionInput(e.LeftInput),
   231  		e.ExpressionOp,
   232  		getHashForExpressionInput(e.RightInput))
   233  
   234  	return xxhash.Sum64String(val)
   235  }
   236  
   237  func getHashForExpressionInput(ei *structs.ExpressionInput) uint64 {
   238  
   239  	if ei == nil {
   240  		return 0
   241  	}
   242  
   243  	val := fmt.Sprintf("%v:%v",
   244  		getHashForDtypeEnclosure(ei.ColumnValue),
   245  		ei.ColumnName)
   246  
   247  	return xxhash.Sum64String(val)
   248  }
   249  
   250  func getHashForGroupBy(r *structs.GroupByRequest) uint64 {
   251  	if r == nil {
   252  		return 0
   253  	}
   254  
   255  	val := fmt.Sprintf("%v:%v",
   256  		getHashForGroupByColumns(r.GroupByColumns),
   257  		getHashForMeasureOperations(r.MeasureOperations))
   258  	return xxhash.Sum64String(val)
   259  }
   260  
   261  func getHashForSegmentStats(mOps []*structs.MeasureAggregator) uint64 {
   262  	return getHashForMeasureOperations(mOps)
   263  }
   264  
   265  func getHashForTimeHistogram(tb *structs.TimeBucket) uint64 {
   266  	if tb == nil {
   267  		return 0
   268  	}
   269  
   270  	var sb strings.Builder
   271  	sb.WriteString(fmt.Sprintf("%v:", tb.EndTime))
   272  	sb.WriteString(fmt.Sprintf("%v:", tb.StartTime))
   273  	sb.WriteString(fmt.Sprintf("%v", tb.IntervalMillis))
   274  	return xxhash.Sum64String(sb.String())
   275  }
   276  
   277  func getHashForGroupByColumns(cols []string) uint64 {
   278  	if len(cols) == 0 {
   279  		return 0
   280  	}
   281  
   282  	sort.Strings(cols)
   283  	var sb strings.Builder
   284  	for _, entry := range cols {
   285  		sb.WriteString(fmt.Sprintf("%v:", entry))
   286  	}
   287  	return xxhash.Sum64String(sb.String())
   288  }
   289  
   290  func getHashForMeasureOperations(measureOps []*structs.MeasureAggregator) uint64 {
   291  	if len(measureOps) == 0 {
   292  		return 0
   293  	}
   294  
   295  	temp := make([]string, len(measureOps))
   296  	for idx, m := range measureOps {
   297  		temp[idx] = fmt.Sprintf("%+v-%+v", m.MeasureCol, m.MeasureFunc.String())
   298  	}
   299  	sort.Strings(temp)
   300  	var sb strings.Builder
   301  	for _, entry := range temp {
   302  		sb.WriteString(fmt.Sprintf("%v:", entry))
   303  	}
   304  	return xxhash.Sum64String(sb.String())
   305  }