github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/structs/segsearchstructs.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package structs 18 19 import ( 20 "bytes" 21 "errors" 22 "fmt" 23 "regexp" 24 "strings" 25 26 "github.com/bits-and-blooms/bloom/v3" 27 dtu "github.com/siglens/siglens/pkg/common/dtypeutils" 28 "github.com/siglens/siglens/pkg/config" 29 "github.com/siglens/siglens/pkg/segment/utils" 30 . "github.com/siglens/siglens/pkg/segment/utils" 31 log "github.com/sirupsen/logrus" 32 ) 33 34 type SearchQueryType uint8 35 36 const ( 37 MatchAll SearchQueryType = iota // match all data 38 MatchWords // match words in a single column 39 MatchWordsAllColumns // match words in any column 40 SimpleExpression // simple expression has one column name an operator and a value to compare 41 RegexExpression // regex expression has one column and a regex string column value 42 RegexExpressionAllColumns // apply RegexExpression on all columns 43 SimpleExpressionAllColumns // apply SimpleExpression on all columns 44 ComplexExpression // complex expression relates multiple columns 45 MatchDictArraySingleColumn 46 MatchDictArrayAllColumns 47 ) 48 49 type SegType uint8 50 51 const ( 52 UNKNOWN SegType = iota 53 RAW_SEARCH 54 PQS 55 UNROTATED_PQS 56 UNROTATED_RAW_SEARCH 57 SEGMENT_STATS_SEARCH 58 UNROTATED_SEGMENT_STATS_SEARCH 59 METRICS_SEARCH 60 UNROTATED_METRICS_SEARCH 61 ) 62 63 func (s SegType) String() string { 64 switch s { 65 case RAW_SEARCH: 66 return "RAW_SEARCH" 67 case PQS: 68 return "PQS" 69 case UNROTATED_PQS: 70 return "UNROTATED_PQS" 71 case UNROTATED_RAW_SEARCH: 72 return "UNROTATED_RAW_SEARCH" 73 case SEGMENT_STATS_SEARCH: 74 return "SEGMENT_STATS_SEARCH" 75 case UNROTATED_SEGMENT_STATS_SEARCH: 76 return "UNROTATED_SEGMENT_STATS_SEARCH" 77 case METRICS_SEARCH: 78 return "METRICS_SEARCH" 79 case UNROTATED_METRICS_SEARCH: 80 return "UNROTATED_METRICS_SEARCH" 81 default: 82 return "UNKNOWN" 83 } 84 } 85 86 // A flattened expression input used for searching 87 // TODO: flatten SearchExpressionInput with just []byte input 88 type SearchExpressionInput struct { 89 ColumnName string // columnName to search for 90 ComplexRelation *Expression // complex relations that have columns defined in both sides 91 ColumnValue *DtypeEnclosure // column value: "0", "abc", "abcd*", "0.213" 92 } 93 94 // A flattened expression used for searching 95 // leftSearchInput will always be defined, rightSearchInput may not be depending on filterOp 96 type SearchExpression struct { 97 LeftSearchInput *SearchExpressionInput 98 FilterOp FilterOperator 99 RightSearchInput *SearchExpressionInput 100 SearchInfo *SearchInfo 101 } 102 103 type SearchInfo struct { 104 ColEncoding []byte 105 QValDte *DtypeEnclosure 106 } 107 108 type SearchMetadataHolder struct { 109 BlockSummaries []*BlockSummary 110 BlockSummariesFile string 111 SearchTotalMemory uint64 // total memory that this search would take, BlockSummaries + raw search buffers 112 } 113 114 type BlockMetadataHolder struct { 115 BlkNum uint16 116 ColumnBlockOffset map[string]int64 117 ColumnBlockLen map[string]uint32 118 } 119 120 // a struct for raw search to apply search on specific blocks within a file 121 type SegmentSearchRequest struct { 122 SegmentKey string 123 SearchMetadata *SearchMetadataHolder 124 AllBlocksToSearch map[uint16]*BlockMetadataHolder // maps all blocks needed to search to the BlockMetadataHolder needed to read 125 VirtualTableName string 126 AllPossibleColumns map[string]bool // all possible columns for the segKey 127 LatestEpochMS uint64 // latest epoch time - used for query planning 128 SType SegType 129 CmiPassedCnames map[uint16]map[string]bool // maps blkNum -> colName -> true that have passed the cmi check 130 HasMatchedRrc bool // flag to denote matches, so that we decide whether to send a websocket update 131 } 132 133 // a holder struct for holding a cmi for a single block. Based on CmiType, either Bf or Ranges will be defined 134 type CmiContainer struct { 135 CmiType uint8 136 Loaded bool 137 Bf *bloom.BloomFilter 138 Ranges map[string]*Numbers 139 } 140 141 // even if only one block will be searched and parallelism=10, we will spawn 10 buffers, although 9 wont be used 142 // TODO: more accurate block summaries and colmeta sizing 143 func (ssr *SegmentSearchRequest) GetMaxSearchMemorySize(sNode *SearchNode, parallelismPerFile int64, bitsetMinSize uint16) uint64 { 144 145 // bitset size worst case is min(15000*num blocks, total record count) 146 var totalBits uint64 147 for i := 0; i < len(ssr.SearchMetadata.BlockSummaries); i++ { 148 if _, ok := ssr.AllBlocksToSearch[uint16(i)]; !ok { 149 continue 150 } 151 if ssr.SearchMetadata.BlockSummaries[i].RecCount > bitsetMinSize { 152 totalBits += uint64(ssr.SearchMetadata.BlockSummaries[i].RecCount) 153 } else { 154 totalBits += uint64(bitsetMinSize) 155 } 156 } 157 totalSize := uint64(totalBits / 8) 158 159 // for raw search & aggs its hard to calculate as memory for multi readers comes from a pool, 160 // hence we assume that there will be enough memory in the pool & in the buffer 161 if ssr.SearchMetadata == nil { 162 return uint64(totalSize) 163 } 164 165 totalSize += ssr.SearchMetadata.SearchTotalMemory 166 return totalSize 167 } 168 169 // function used to nil out block sum and colmeta 170 func (ssr *SegmentSearchRequest) CleanSearchMetadata() { 171 if ssr.SearchMetadata == nil { 172 return 173 } 174 ssr.SearchMetadata.BlockSummaries = nil 175 } 176 177 /* 178 * 179 180 Logical operator only dictates how the block numbers should be resolved 181 182 the CMIPassed names will always be unioned. 183 184 * 185 */ 186 func (ssr *SegmentSearchRequest) JoinRequest(toJoin *SegmentSearchRequest, op LogicalOperator) { 187 // merge blocksearch info 188 if op == And { 189 for blockNum := range ssr.AllBlocksToSearch { 190 if _, ok := toJoin.AllBlocksToSearch[blockNum]; !ok { 191 delete(ssr.AllBlocksToSearch, blockNum) 192 delete(ssr.CmiPassedCnames, blockNum) 193 continue 194 } 195 for cname := range toJoin.CmiPassedCnames[blockNum] { 196 ssr.CmiPassedCnames[blockNum][cname] = true 197 } 198 } 199 } else { 200 for blockNum, blockMeta := range toJoin.AllBlocksToSearch { 201 ssr.AllBlocksToSearch[blockNum] = blockMeta 202 if _, ok := ssr.CmiPassedCnames[blockNum]; !ok { 203 ssr.CmiPassedCnames[blockNum] = make(map[string]bool) 204 } 205 206 for cname := range toJoin.CmiPassedCnames[blockNum] { 207 ssr.CmiPassedCnames[blockNum][cname] = true 208 } 209 } 210 } 211 // merge columns 212 ssr.JoinColumnInfo(toJoin) 213 } 214 215 // merges toJoin.SearchColumns with ssr.SearchColumns 216 func (ssr *SegmentSearchRequest) JoinColumnInfo(toJoin *SegmentSearchRequest) { 217 // merge columns 218 for col := range toJoin.AllPossibleColumns { 219 ssr.AllPossibleColumns[col] = true 220 } 221 } 222 223 func (searchExp *SearchExpression) IsMatchAll() bool { 224 225 if searchExp.FilterOp != Equals { 226 return false 227 } 228 if searchExp.LeftSearchInput == nil || searchExp.RightSearchInput == nil { 229 return false // both left and right need to be defined 230 } 231 232 var colName string 233 var colValue *DtypeEnclosure 234 if len(searchExp.LeftSearchInput.ColumnName) > 0 { 235 colName = searchExp.LeftSearchInput.ColumnName 236 } else { 237 colName = searchExp.RightSearchInput.ColumnName 238 } 239 240 if searchExp.LeftSearchInput.ColumnValue != nil { 241 colValue = searchExp.LeftSearchInput.ColumnValue 242 } else if searchExp.RightSearchInput != nil && searchExp.RightSearchInput.ColumnValue != nil { 243 colValue = searchExp.RightSearchInput.ColumnValue 244 } 245 if colValue == nil { 246 return false 247 } 248 249 return colName == "*" && colValue.IsFullWildcard() 250 } 251 252 func (searchExp *SearchExpression) GetExpressionType() SearchQueryType { 253 if searchExp.LeftSearchInput.ComplexRelation != nil { 254 return ComplexExpression 255 } 256 if searchExp.RightSearchInput != nil && searchExp.RightSearchInput.ComplexRelation != nil { 257 return ComplexExpression 258 } 259 // at this point, all expressions are some kind of expression 260 var colName string 261 var colVal *DtypeEnclosure 262 if len(searchExp.LeftSearchInput.ColumnName) > 0 { 263 colName = searchExp.LeftSearchInput.ColumnName 264 } else { 265 colName = searchExp.RightSearchInput.ColumnName 266 } 267 if searchExp.LeftSearchInput.ColumnValue != nil { 268 colVal = searchExp.LeftSearchInput.ColumnValue 269 } else { 270 colVal = searchExp.RightSearchInput.ColumnValue 271 } 272 wildcardColName := colName == "*" 273 if colVal == nil { 274 if wildcardColName { 275 return RegexExpression 276 } 277 return SimpleExpression 278 } 279 regexCol := colVal.IsRegex() 280 if wildcardColName { 281 if regexCol { 282 return RegexExpressionAllColumns 283 } else { 284 return SimpleExpressionAllColumns 285 } 286 } 287 if regexCol { 288 return RegexExpression 289 } else { 290 return SimpleExpression 291 } 292 } 293 294 // parse a FilterInput to a friendly SearchInput for raw searching/expression matching 295 func getSearchInputFromFilterInput(filter *FilterInput, qid uint64) *SearchExpressionInput { 296 297 searchInput := SearchExpressionInput{} 298 299 if filter == nil { 300 return &searchInput 301 } 302 303 if len(filter.SubtreeResult) > 0 { // if filterSubtree is defined, only literal in search input 304 val, err := CreateDtypeEnclosure(filter.SubtreeResult, qid) 305 if err != nil { 306 // TODO: handle error 307 log.Errorf("qid=%d, getSearchInputFromFilterInput: Error creating dtype enclosure: %v", qid, err) 308 } 309 searchInput.ColumnValue = val 310 return &searchInput 311 } 312 313 if filter.Expression.RightInput == nil { // rightInput is nil, meaning only left expressionInput is defined and only has columnName or 314 expInput := filter.Expression.LeftInput 315 316 if len(expInput.ColumnName) > 0 { 317 searchInput.ColumnName = expInput.ColumnName 318 } else { 319 searchInput.ColumnValue = expInput.ColumnValue 320 } 321 } else { 322 searchInput.ComplexRelation = filter.Expression 323 } 324 325 return &searchInput 326 } 327 328 func GetSearchQueryFromFilterCriteria(criteria *FilterCriteria, qid uint64) *SearchQuery { 329 330 if criteria.MatchFilter != nil { 331 return extractSearchQueryFromMatchFilter(criteria.MatchFilter) 332 } else { 333 sq := extractSearchQueryFromExpressionFilter(criteria.ExpressionFilter, qid) 334 335 var colVal *DtypeEnclosure 336 if sq.ExpressionFilter.LeftSearchInput.ColumnValue != nil { 337 colVal = sq.ExpressionFilter.LeftSearchInput.ColumnValue 338 } else if sq.ExpressionFilter.RightSearchInput.ColumnValue != nil { 339 colVal = sq.ExpressionFilter.RightSearchInput.ColumnValue 340 } 341 342 if colVal != nil && colVal.Dtype == SS_DT_STRING && colVal.StringVal == "*" { 343 sq.SearchType = MatchAll 344 } 345 return sq 346 } 347 } 348 349 func extractSearchQueryFromMatchFilter(match *MatchFilter) *SearchQuery { 350 var qType SearchQueryType 351 currQuery := &SearchQuery{ 352 MatchFilter: match, 353 } 354 if match.MatchType == MATCH_DICT_ARRAY { 355 if match.MatchColumn == "*" { 356 qType = MatchDictArrayAllColumns 357 } else { 358 qType = MatchDictArraySingleColumn 359 } 360 currQuery.SearchType = qType 361 } else if match.MatchColumn == "*" { 362 qType = MatchWordsAllColumns 363 if match.MatchOperator == And { 364 if len(match.MatchWords) == 1 && bytes.Equal(match.MatchWords[0], STAR_BYTE) { 365 qType = MatchAll 366 } 367 } else if match.MatchOperator == Or { 368 for _, word := range match.MatchWords { 369 if bytes.Equal(word, STAR_BYTE) { 370 qType = MatchAll 371 break 372 } 373 } 374 } 375 currQuery.SearchType = qType 376 } else { 377 currQuery.SearchType = MatchWords 378 } 379 if match.MatchPhrase != nil && bytes.Contains(match.MatchPhrase, []byte("*")) { 380 cval := dtu.ReplaceWildcardStarWithRegex(string(match.MatchPhrase)) 381 rexpC, err := regexp.Compile(cval) 382 if err != nil { 383 log.Errorf("extractSearchQueryFromMatchFilter: regexp compile failed, err=%v", err) 384 } else { 385 currQuery.MatchFilter.Regexp = rexpC 386 } 387 } 388 389 return currQuery 390 } 391 392 func extractSearchQueryFromExpressionFilter(exp *ExpressionFilter, qid uint64) *SearchQuery { 393 leftSearchInput := getSearchInputFromFilterInput(exp.LeftInput, qid) 394 rightSearchInput := getSearchInputFromFilterInput(exp.RightInput, qid) 395 sq := &SearchQuery{ 396 ExpressionFilter: &SearchExpression{ 397 LeftSearchInput: leftSearchInput, 398 FilterOp: exp.FilterOperator, 399 RightSearchInput: rightSearchInput, 400 }, 401 } 402 expType := getSearchTypeFromSearchExpression(sq.ExpressionFilter) 403 sq.SearchType = expType 404 405 if sq.SearchType == RegexExpression || sq.SearchType == RegexExpressionAllColumns { 406 if sq.ExpressionFilter.LeftSearchInput.ColumnValue != nil && 407 sq.ExpressionFilter.LeftSearchInput.ColumnValue.Dtype == SS_DT_STRING { 408 409 cval := dtu.ReplaceWildcardStarWithRegex(sq.ExpressionFilter.LeftSearchInput.ColumnValue.StringVal) 410 rexpC, err := regexp.Compile(cval) 411 if err != nil { 412 log.Errorf("extractSearchQueryFromExpressionFilter: regexp compile failed, err=%v", err) 413 } else { 414 sq.ExpressionFilter.LeftSearchInput.ColumnValue.SetRegexp(rexpC) 415 } 416 } 417 } 418 return sq 419 } 420 421 func getSearchTypeFromSearchExpression(searchExp *SearchExpression) SearchQueryType { 422 423 if searchExp.IsMatchAll() { 424 return MatchAll 425 } 426 return searchExp.GetExpressionType() 427 } 428 429 // extract all columns from SearchInput 430 // ex: SearchExpressionInput{columnName="abc"} -> abc 431 // ex: SearchExpressionInput{complexRelation={literal=2,op=mult,columnName="def"}} -> "def" 432 func (search *SearchExpressionInput) getAllColumnsInSearch() map[string]string { 433 434 allColumns := make(map[string]string) 435 436 if len(search.ColumnName) > 0 { 437 allColumns[string(search.ColumnName)] = "" 438 } 439 440 if search.ComplexRelation != nil { 441 exp := search.ComplexRelation 442 if exp.LeftInput != nil && len(exp.LeftInput.ColumnName) > 0 { 443 allColumns[exp.LeftInput.ColumnName] = "" 444 } 445 446 if exp.RightInput != nil && len(exp.RightInput.ColumnName) > 0 { 447 allColumns[exp.RightInput.ColumnName] = "" 448 } 449 } 450 return allColumns 451 } 452 453 func (searchExp *SearchExpression) getAllColumnsInSearch() map[string]string { 454 455 allColumns := searchExp.LeftSearchInput.getAllColumnsInSearch() 456 457 if searchExp.RightSearchInput != nil { 458 rightColumns := searchExp.RightSearchInput.getAllColumnsInSearch() 459 460 for key, val := range rightColumns { 461 allColumns[key] = val 462 } 463 } 464 465 return allColumns 466 } 467 468 // returns a map with keys, a boolean, and error 469 // the map will contain only non wildcarded keys, 470 // if bool is true, the searchExpression contained a wildcard 471 func (searchExp *SearchExpression) GetAllBlockBloomKeysToSearch() (map[string]bool, bool, error) { 472 if searchExp.FilterOp != Equals { 473 return nil, false, errors.New("relation is not simple key1:value1") 474 } 475 if searchExp.LeftSearchInput != nil && searchExp.LeftSearchInput.ComplexRelation != nil { 476 // complex relations are not supported for blockbloom 477 return nil, false, errors.New("relation is not simple key1:value1") 478 } 479 if searchExp.RightSearchInput != nil && searchExp.RightSearchInput.ComplexRelation != nil { 480 return nil, false, errors.New("relation is not simple key1:value1") 481 } 482 allKeys := make(map[string]bool) 483 var colVal *DtypeEnclosure 484 if searchExp.LeftSearchInput != nil && searchExp.LeftSearchInput.ColumnValue != nil { 485 colVal = searchExp.LeftSearchInput.ColumnValue 486 } else if searchExp.RightSearchInput != nil && searchExp.RightSearchInput.ColumnValue != nil { 487 colVal = searchExp.RightSearchInput.ColumnValue 488 } 489 490 if colVal == nil { 491 return nil, false, errors.New("unable to extract column name and value from request") 492 } 493 494 if colVal.IsRegex() { 495 return allKeys, true, nil 496 } 497 if len(colVal.StringVal) == 0 { 498 return allKeys, false, errors.New("unable to extract column name and value from request") 499 } 500 allKeys[colVal.StringVal] = true 501 return allKeys, false, nil 502 } 503 504 func (match *MatchFilter) GetAllBlockBloomKeysToSearch() (map[string]bool, bool, LogicalOperator) { 505 allKeys := make(map[string]bool) 506 wildcardExists := false 507 if match.MatchType == MATCH_DICT_ARRAY { 508 mKey := match.MatchDictArray.MatchKey 509 mVal := match.MatchDictArray.MatchValue 510 var mValStr string 511 switch mVal.Dtype { 512 case utils.SS_DT_BOOL: 513 mValStr = fmt.Sprintf("%v", mVal.BoolVal) 514 case utils.SS_DT_STRING: 515 mValStr = fmt.Sprintf("%v", mVal.StringVal) 516 case utils.SS_DT_UNSIGNED_NUM: 517 mValStr = fmt.Sprintf("%v", mVal.UnsignedVal) 518 case utils.SS_DT_SIGNED_NUM: 519 mValStr = fmt.Sprintf("%v", mVal.SignedVal) 520 case utils.SS_DT_FLOAT: 521 mValStr = fmt.Sprintf("%v", mVal.FloatVal) 522 } 523 524 allKeys[string(mKey)] = true 525 allKeys[mValStr] = true 526 return allKeys, wildcardExists, And 527 } else { 528 for _, literal := range match.MatchWords { 529 530 if strings.Contains(string(literal), "*") { 531 wildcardExists = true 532 continue 533 } 534 allKeys[string(literal)] = true 535 } 536 // if only one matchWord then do And so that CMI logic will only pass blocks that pass 537 // bloom check 538 if len(allKeys) == 1 { 539 return allKeys, wildcardExists, And 540 } 541 } 542 return allKeys, wildcardExists, match.MatchOperator 543 } 544 545 func (ef *SearchExpression) IsTimeRangeFilter() bool { 546 if ef.IsMatchAll() { 547 return true 548 } 549 if ef.LeftSearchInput != nil && len(ef.LeftSearchInput.ColumnName) > 0 { 550 if ef.LeftSearchInput.ColumnName != config.GetTimeStampKey() { 551 return false 552 } 553 } 554 if ef.RightSearchInput != nil && len(ef.RightSearchInput.ColumnName) > 0 { 555 if ef.RightSearchInput.ColumnName != config.GetTimeStampKey() { 556 return false 557 } 558 } 559 return true 560 }