github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/querytracker/snhasher.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package querytracker 18 19 import ( 20 "fmt" 21 "sort" 22 "strings" 23 24 "github.com/cespare/xxhash" 25 "github.com/siglens/siglens/pkg/segment/structs" 26 "github.com/siglens/siglens/pkg/segment/utils" 27 ) 28 29 /* 30 ALGORITHM for creating a unique hash 31 32 1. In each struct maintain a hid (hashid), it is pre-determined way of creating hash out of 33 of elements of that struct in the sequence they are defined in the go files 34 2. If a struct has sub-structs then we recursively call the hashid func 35 3. if the hid is already present then we use it else we calculate it for that data type 36 4. we keep doing recursive until we get base data types of go like int, bool, string, etc... 37 5. This is a pretty standard way of creating id borrowed from the java world 38 */ 39 func GetHashForQuery(n *structs.SearchNode) string { 40 return fmt.Sprintf("%v", getHashForSearchNode(n)) 41 } 42 43 func GetHashForAggs(a *structs.QueryAggregators) string { 44 return fmt.Sprintf("%v", getHashForAggregators(a)) 45 } 46 47 func getHashForAggregators(a *structs.QueryAggregators) uint64 { 48 49 if a == nil { 50 return 0 51 } 52 53 // the only parts of aggs we need to hash are the groupby configs 54 // the bucketing/sorting/early exiting does not change pqs vs not, agileTree vs not 55 val := fmt.Sprintf("%v:%v:%v", 56 getHashForGroupBy(a.GroupByRequest), 57 getHashForSegmentStats(a.MeasureOperations), 58 getHashForTimeHistogram(a.TimeHistogram), 59 ) 60 61 return xxhash.Sum64String(val) 62 } 63 64 func getHashForSearchNode(sn *structs.SearchNode) uint64 { 65 66 if sn == nil { 67 return 0 68 } 69 70 val := fmt.Sprintf("%v:%v:%v", 71 getHashForSearchCondition(sn.AndSearchConditions), 72 getHashForSearchCondition(sn.OrSearchConditions), 73 getHashForSearchCondition(sn.ExclusionSearchConditions), 74 ) 75 76 return xxhash.Sum64String(val) 77 } 78 79 func getHashForSearchCondition(sc *structs.SearchCondition) uint64 { 80 81 if sc == nil { 82 return 0 83 } 84 85 sqhids := make([]uint64, len(sc.SearchQueries)) 86 for _, sq := range sc.SearchQueries { 87 sqhids = append(sqhids, getHashForSearchQuery(sq)) 88 } 89 sort.Slice(sqhids, func(i, j int) bool { return sqhids[i] < sqhids[j] }) 90 91 snhids := make([]uint64, len(sc.SearchNode)) 92 for _, sn := range sc.SearchNode { 93 snhids = append(snhids, getHashForSearchNode(sn)) 94 } 95 sort.Slice(snhids, func(i, j int) bool { return snhids[i] < snhids[j] }) 96 97 var sb strings.Builder 98 for _, entry := range sqhids { 99 sb.WriteString(fmt.Sprintf("%v:", entry)) 100 } 101 102 for _, entry := range snhids { 103 sb.WriteString(fmt.Sprintf("%v:", entry)) 104 } 105 106 return xxhash.Sum64String(sb.String()) 107 } 108 109 func getHashForSearchQuery(sq *structs.SearchQuery) uint64 { 110 111 if sq == nil { 112 return 0 113 } 114 115 val := fmt.Sprintf("%v:%v:%v:%v", 116 getHashForSearchExpression(sq.ExpressionFilter), 117 getHashForMatchFilter(sq.MatchFilter), 118 sq.SearchType, 119 getHashForQueryInfo(sq.QueryInfo)) 120 return xxhash.Sum64String(val) 121 } 122 123 func getHashForSearchExpression(se *structs.SearchExpression) uint64 { 124 125 if se == nil { 126 return 0 127 } 128 129 val := fmt.Sprintf("%v:%v:%v:%v", 130 getHashForSearchExpressionInput(se.LeftSearchInput), 131 se.FilterOp, 132 getHashForSearchExpressionInput(se.RightSearchInput), 133 getHashForSearchInfo(se.SearchInfo)) 134 return xxhash.Sum64String(val) 135 } 136 137 func getHashForMatchFilter(mf *structs.MatchFilter) uint64 { 138 139 if mf == nil { 140 return 0 141 } 142 143 mwords := make([]string, len(mf.MatchWords)) 144 for _, w := range mf.MatchWords { 145 mwords = append(mwords, string(w)) 146 } 147 148 sort.Strings(mwords) 149 150 val := fmt.Sprintf("%v:%v:%v:%v:%v", 151 mf.MatchColumn, 152 mwords, 153 mf.MatchOperator, 154 mf.MatchPhrase, 155 mf.MatchType) 156 157 return xxhash.Sum64String(val) 158 } 159 160 func getHashForQueryInfo(qi *structs.QueryInfo) uint64 { 161 162 if qi == nil { 163 return 0 164 } 165 166 val := fmt.Sprintf("%v:%v", 167 qi.ColName, 168 getHashForDtypeEnclosure(qi.QValDte)) 169 170 return xxhash.Sum64String(val) 171 } 172 173 func getHashForSearchInfo(si *structs.SearchInfo) uint64 { 174 175 if si == nil { 176 return 0 177 } 178 179 val := fmt.Sprintf("%v:%v", 180 si.ColEncoding, 181 getHashForDtypeEnclosure(si.QValDte)) 182 183 return xxhash.Sum64String(val) 184 } 185 186 func getHashForDtypeEnclosure(dte *utils.DtypeEnclosure) uint64 { 187 188 if dte == nil { 189 return 0 190 } 191 192 var val string 193 switch dte.Dtype { 194 case utils.SS_DT_BOOL: 195 val = fmt.Sprintf("%v:%v", dte.Dtype, dte.BoolVal) 196 case utils.SS_DT_STRING: 197 val = fmt.Sprintf("%v:%v", dte.Dtype, dte.StringVal) 198 case utils.SS_DT_UNSIGNED_NUM: 199 val = fmt.Sprintf("%v:%v", dte.Dtype, dte.UnsignedVal) 200 case utils.SS_DT_SIGNED_NUM: 201 val = fmt.Sprintf("%v:%v", dte.Dtype, dte.SignedVal) 202 case utils.SS_DT_FLOAT: 203 val = fmt.Sprintf("%v:%v", dte.Dtype, dte.FloatVal) 204 } 205 206 return xxhash.Sum64String(val) 207 } 208 209 func getHashForSearchExpressionInput(sei *structs.SearchExpressionInput) uint64 { 210 211 if sei == nil { 212 return 0 213 } 214 215 val := fmt.Sprintf("%v:%v:%v", 216 sei.ColumnName, 217 getHashForExpression(sei.ComplexRelation), 218 getHashForDtypeEnclosure(sei.ColumnValue)) 219 220 return xxhash.Sum64String(val) 221 } 222 223 func getHashForExpression(e *structs.Expression) uint64 { 224 225 if e == nil { 226 return 0 227 } 228 229 val := fmt.Sprintf("%v:%v:%v", 230 getHashForExpressionInput(e.LeftInput), 231 e.ExpressionOp, 232 getHashForExpressionInput(e.RightInput)) 233 234 return xxhash.Sum64String(val) 235 } 236 237 func getHashForExpressionInput(ei *structs.ExpressionInput) uint64 { 238 239 if ei == nil { 240 return 0 241 } 242 243 val := fmt.Sprintf("%v:%v", 244 getHashForDtypeEnclosure(ei.ColumnValue), 245 ei.ColumnName) 246 247 return xxhash.Sum64String(val) 248 } 249 250 func getHashForGroupBy(r *structs.GroupByRequest) uint64 { 251 if r == nil { 252 return 0 253 } 254 255 val := fmt.Sprintf("%v:%v", 256 getHashForGroupByColumns(r.GroupByColumns), 257 getHashForMeasureOperations(r.MeasureOperations)) 258 return xxhash.Sum64String(val) 259 } 260 261 func getHashForSegmentStats(mOps []*structs.MeasureAggregator) uint64 { 262 return getHashForMeasureOperations(mOps) 263 } 264 265 func getHashForTimeHistogram(tb *structs.TimeBucket) uint64 { 266 if tb == nil { 267 return 0 268 } 269 270 var sb strings.Builder 271 sb.WriteString(fmt.Sprintf("%v:", tb.EndTime)) 272 sb.WriteString(fmt.Sprintf("%v:", tb.StartTime)) 273 sb.WriteString(fmt.Sprintf("%v", tb.IntervalMillis)) 274 return xxhash.Sum64String(sb.String()) 275 } 276 277 func getHashForGroupByColumns(cols []string) uint64 { 278 if len(cols) == 0 { 279 return 0 280 } 281 282 sort.Strings(cols) 283 var sb strings.Builder 284 for _, entry := range cols { 285 sb.WriteString(fmt.Sprintf("%v:", entry)) 286 } 287 return xxhash.Sum64String(sb.String()) 288 } 289 290 func getHashForMeasureOperations(measureOps []*structs.MeasureAggregator) uint64 { 291 if len(measureOps) == 0 { 292 return 0 293 } 294 295 temp := make([]string, len(measureOps)) 296 for idx, m := range measureOps { 297 temp[idx] = fmt.Sprintf("%+v-%+v", m.MeasureCol, m.MeasureFunc.String()) 298 } 299 sort.Strings(temp) 300 var sb strings.Builder 301 for _, entry := range temp { 302 sb.WriteString(fmt.Sprintf("%v:", entry)) 303 } 304 return xxhash.Sum64String(sb.String()) 305 }