github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/query/querystatus.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package query 18 19 import ( 20 "fmt" 21 "math" 22 "sync" 23 "time" 24 25 "github.com/siglens/siglens/pkg/segment/results/blockresults" 26 "github.com/siglens/siglens/pkg/segment/results/segresults" 27 "github.com/siglens/siglens/pkg/segment/structs" 28 "github.com/siglens/siglens/pkg/segment/utils" 29 log "github.com/sirupsen/logrus" 30 ) 31 32 type QueryState int 33 34 var numStates = 4 35 36 const MAX_GRP_BUCKS = 3000 37 const CANCEL_QUERY_AFTER_SECONDS = 5 * 60 // If 0, the query will never timeout 38 39 type QueryUpdateType int 40 41 const ( 42 QUERY_UPDATE_LOCAL QueryUpdateType = iota + 1 43 QUERY_UPDATE_REMOTE 44 ) 45 46 type QueryUpdate struct { 47 QUpdate QueryUpdateType 48 SegKeyEnc uint16 49 RemoteID string 50 } 51 52 type QueryStateChanData struct { 53 StateName QueryState 54 QueryUpdate *QueryUpdate 55 PercentComplete float64 56 } 57 58 const ( 59 RUNNING QueryState = iota + 1 60 QUERY_UPDATE // flush segment counts & aggs & records (if matched) 61 COMPLETE 62 TIMEOUT 63 ERROR 64 ) 65 66 func (qs QueryState) String() string { 67 switch qs { 68 case RUNNING: 69 return "RUNNING" 70 case QUERY_UPDATE: 71 return "QUERY_UPDATE" 72 case COMPLETE: 73 return "COMPLETE" 74 case TIMEOUT: 75 return "TIMEOUT" 76 case ERROR: 77 return "ERROR" 78 default: 79 return fmt.Sprintf("UNKNOWN_QUERYSTATE_%d", qs) 80 } 81 } 82 83 type RunningQueryState struct { 84 isAsync bool 85 isCancelled bool 86 StateChan chan *QueryStateChanData // channel to send state changes of query 87 searchRes *segresults.SearchResults 88 rawRecords []*utils.RecordResultContainer 89 queryCount *structs.QueryCount 90 aggs *structs.QueryAggregators 91 searchHistogram map[string]*structs.AggregationResult 92 QType structs.QueryType 93 rqsLock *sync.Mutex 94 dqs *DistributedQueryService 95 totalSegments uint64 96 finishedSegments uint64 97 totalRecsSearched uint64 98 } 99 100 var allRunningQueries = map[uint64]*RunningQueryState{} 101 var arqMapLock *sync.RWMutex = &sync.RWMutex{} 102 103 func (rQuery *RunningQueryState) IsAsync() bool { 104 rQuery.rqsLock.Lock() 105 defer rQuery.rqsLock.Unlock() 106 return rQuery.isAsync 107 } 108 109 func (rQuery *RunningQueryState) SendQueryStateComplete() { 110 rQuery.StateChan <- &QueryStateChanData{StateName: COMPLETE} 111 } 112 113 // Starts tracking the query state. If async is true, the RunningQueryState.StateChan will be defined & will be sent updates 114 // If async, updates will be sent for any update to RunningQueryState. Caller is responsible to call DeleteQuery 115 func StartQuery(qid uint64, async bool) (*RunningQueryState, error) { 116 arqMapLock.Lock() 117 defer arqMapLock.Unlock() 118 if _, ok := allRunningQueries[qid]; ok { 119 log.Errorf("StartQuery: qid %+v already exists!", qid) 120 return nil, fmt.Errorf("qid has already been started") 121 } 122 123 var stateChan chan *QueryStateChanData 124 if async { 125 stateChan = make(chan *QueryStateChanData, numStates) 126 stateChan <- &QueryStateChanData{StateName: RUNNING} 127 } 128 129 // If the query runs too long, cancel it. 130 if CANCEL_QUERY_AFTER_SECONDS != 0 { 131 go func() { 132 time.Sleep(time.Duration(CANCEL_QUERY_AFTER_SECONDS) * time.Second) 133 134 arqMapLock.RLock() 135 rQuery, ok := allRunningQueries[qid] 136 arqMapLock.RUnlock() 137 138 if ok { 139 log.Infof("qid: %v Canceling query due to timeout (%v seconds)", qid, CANCEL_QUERY_AFTER_SECONDS) 140 rQuery.StateChan <- &QueryStateChanData{StateName: TIMEOUT} 141 CancelQuery(qid) 142 } 143 }() 144 } 145 146 runningState := &RunningQueryState{ 147 StateChan: stateChan, 148 rqsLock: &sync.Mutex{}, 149 isAsync: async, 150 } 151 allRunningQueries[qid] = runningState 152 return runningState, nil 153 } 154 155 // Removes reference to qid. If qid does not exist this is a noop 156 func DeleteQuery(qid uint64) { 157 arqMapLock.Lock() 158 delete(allRunningQueries, qid) 159 arqMapLock.Unlock() 160 } 161 162 func associateSearchInfoWithQid(qid uint64, result *segresults.SearchResults, aggs *structs.QueryAggregators, dqs *DistributedQueryService, 163 qType structs.QueryType) error { 164 arqMapLock.RLock() 165 rQuery, ok := allRunningQueries[qid] 166 arqMapLock.RUnlock() 167 if !ok { 168 log.Errorf("associateSearchResultWithQid: qid %+v does not exist!", qid) 169 return fmt.Errorf("qid does not exist") 170 } 171 172 rQuery.rqsLock.Lock() 173 rQuery.searchRes = result 174 rQuery.aggs = aggs 175 rQuery.dqs = dqs 176 rQuery.QType = qType 177 rQuery.rqsLock.Unlock() 178 179 return nil 180 } 181 182 // increments the finished segments. If incr is 0, then the current query is finished and a histogram will be flushed 183 func incrementNumFinishedSegments(incr int, qid uint64, recsSearched uint64, 184 skEnc uint16, doBuckPull bool, sstMap map[string]*structs.SegStats) { 185 arqMapLock.RLock() 186 rQuery, ok := allRunningQueries[qid] 187 arqMapLock.RUnlock() 188 if !ok { 189 log.Errorf("updateTotalSegmentsInQuery: qid %+v does not exist!", qid) 190 return 191 } 192 193 rQuery.rqsLock.Lock() 194 rQuery.finishedSegments += uint64(incr) 195 perComp := float64(0) 196 if rQuery.totalSegments != 0 { 197 val := float64(rQuery.finishedSegments) / float64(rQuery.totalSegments) * 100 198 perComp = toFixed(val, 3) 199 } 200 201 rQuery.totalRecsSearched += recsSearched 202 if rQuery.searchRes != nil { 203 rQuery.queryCount = rQuery.searchRes.GetQueryCount() 204 rQuery.rawRecords = rQuery.searchRes.GetResultsCopy() 205 if doBuckPull { 206 rQuery.searchHistogram = rQuery.searchRes.GetBucketResults() 207 } 208 if sstMap != nil && rQuery.isAsync { 209 rQuery.searchRes.AddSSTMap(sstMap, skEnc) 210 } 211 } 212 rQuery.rqsLock.Unlock() 213 if rQuery.isAsync { 214 rQuery.StateChan <- &QueryStateChanData{StateName: QUERY_UPDATE, 215 QueryUpdate: &QueryUpdate{ 216 QUpdate: QUERY_UPDATE_LOCAL, 217 SegKeyEnc: skEnc, 218 }, 219 PercentComplete: perComp} 220 } 221 } 222 223 func setTotalSegmentsToSearch(qid uint64, numSegments uint64) error { 224 arqMapLock.RLock() 225 rQuery, ok := allRunningQueries[qid] 226 arqMapLock.RUnlock() 227 if !ok { 228 log.Errorf("setTotalSegmentsToSearch: qid %+v does not exist!", qid) 229 return fmt.Errorf("qid does not exist") 230 } 231 232 rQuery.rqsLock.Lock() 233 rQuery.totalSegments = numSegments 234 rQuery.rqsLock.Unlock() 235 236 return nil 237 } 238 239 func GetTotalSegmentsToSearch(qid uint64) (uint64, error) { 240 arqMapLock.RLock() 241 rQuery, ok := allRunningQueries[qid] 242 arqMapLock.RUnlock() 243 if !ok { 244 return 0, fmt.Errorf("qid does not exist") 245 } 246 247 rQuery.rqsLock.Lock() 248 defer rQuery.rqsLock.Unlock() 249 return rQuery.totalSegments, nil 250 } 251 252 func setQidAsFinished(qid uint64) { 253 arqMapLock.RLock() 254 rQuery, ok := allRunningQueries[qid] 255 arqMapLock.RUnlock() 256 if !ok { 257 log.Errorf("setRRCsAsCompleted: qid %+v does not exist!", qid) 258 return 259 } 260 261 // Only async queries need to send COMPLETE, but if we need to do post 262 // aggregations, we'll send COMPLETE once we're done with those. 263 if rQuery.isAsync && (rQuery.aggs == nil || rQuery.aggs.Next == nil) { 264 rQuery.StateChan <- &QueryStateChanData{StateName: COMPLETE} 265 } 266 } 267 268 func CancelQuery(qid uint64) { 269 arqMapLock.RLock() 270 rQuery, ok := allRunningQueries[qid] 271 arqMapLock.RUnlock() 272 if !ok { 273 log.Errorf("CancelQuery: qid %+v does not exist!", qid) 274 return 275 } 276 rQuery.rqsLock.Lock() 277 rQuery.isCancelled = true 278 rQuery.rqsLock.Unlock() 279 } 280 281 func GetBucketsForQid(qid uint64) (map[string]*structs.AggregationResult, error) { 282 arqMapLock.RLock() 283 rQuery, ok := allRunningQueries[qid] 284 arqMapLock.RUnlock() 285 if !ok { 286 log.Errorf("GetBucketsForQid: qid %+v does not exist!", qid) 287 return nil, fmt.Errorf("qid does not exist") 288 } 289 290 if rQuery.searchHistogram == nil { 291 return nil, fmt.Errorf("GetBucketsForQid: searchHistogram does not exist for qid %+v", qid) 292 } 293 return rQuery.searchHistogram, nil 294 } 295 296 func SetFinalStatsForQid(qid uint64, nodeResult *structs.NodeResult) error { 297 arqMapLock.RLock() 298 defer arqMapLock.RUnlock() 299 300 rQuery, ok := allRunningQueries[qid] 301 if !ok { 302 log.Errorf("SetConvertedBucketsForQid: qid %+v does not exist!", qid) 303 return fmt.Errorf("qid does not exist") 304 } 305 306 return rQuery.searchRes.SetFinalStatsFromNodeResult(nodeResult) 307 } 308 309 // gets the measure results for the running query. 310 // if the query is segment stats, it will delete the input segkeyenc 311 func GetMeasureResultsForQid(qid uint64, pullGrpBucks bool, skenc uint16, limit int) ([]*structs.BucketHolder, []string, []string, int) { 312 313 arqMapLock.RLock() 314 rQuery, ok := allRunningQueries[qid] 315 if !ok { 316 log.Errorf("GetMeasureResultsForQid: qid %+v does not exist!", qid) 317 arqMapLock.RUnlock() 318 return nil, nil, nil, 0 319 } 320 defer arqMapLock.RUnlock() 321 322 if rQuery.searchRes == nil { 323 return nil, nil, nil, 0 324 } 325 switch rQuery.QType { 326 case structs.SegmentStatsCmd: 327 return rQuery.searchRes.GetSegmentStatsResults(skenc) 328 case structs.GroupByCmd: 329 if pullGrpBucks { 330 rowCnt := MAX_GRP_BUCKS 331 if limit != -1 { 332 rowCnt = limit 333 } 334 335 // If after stats block's group by there is a statistic block's group by, we should only keep the groupby cols of the statistic block 336 bucketHolderArr, retMFuns, aggGroupByCols, added := rQuery.searchRes.GetGroupyByBuckets(rowCnt) 337 338 statisticGroupByCols := rQuery.searchRes.GetStatisticGroupByCols() 339 // If there is only one group by in the agg, we do not need to change groupbycols 340 if len(statisticGroupByCols) > 0 && !rQuery.searchRes.IsOnlyStatisticGroupBy() { 341 aggGroupByCols = statisticGroupByCols 342 } 343 344 // Remove unused columns for Rename block 345 aggGroupByCols = rQuery.searchRes.RemoveUnusedGroupByCols(aggGroupByCols) 346 return bucketHolderArr, retMFuns, aggGroupByCols, added 347 } else { 348 return nil, nil, nil, 0 349 } 350 default: 351 return nil, nil, nil, 0 352 } 353 } 354 355 func GetQueryType(qid uint64) structs.QueryType { 356 arqMapLock.RLock() 357 rQuery, ok := allRunningQueries[qid] 358 if !ok { 359 log.Errorf("GetQueryType: qid %+v does not exist!", qid) 360 arqMapLock.RUnlock() 361 return structs.InvalidCmd 362 } 363 defer arqMapLock.RUnlock() 364 365 return rQuery.QType 366 } 367 368 // Get remote raw logs and columns based on the remoteID and all RRCs 369 func GetRemoteRawLogInfo(remoteID string, inrrcs []*utils.RecordResultContainer, qid uint64) ([]map[string]interface{}, []string, error) { 370 arqMapLock.RLock() 371 rQuery, ok := allRunningQueries[qid] 372 if !ok { 373 log.Errorf("GetQueryType: qid %+v does not exist!", qid) 374 arqMapLock.RUnlock() 375 return nil, nil, fmt.Errorf("qid does not exist") 376 } 377 defer arqMapLock.RUnlock() 378 379 return rQuery.searchRes.GetRemoteInfo(remoteID, inrrcs) 380 } 381 382 func round(num float64) int { 383 return int(num + math.Copysign(0.5, num)) 384 } 385 386 // Function to truncate float64 to a given precision 387 func toFixed(num float64, precision int) float64 { 388 output := math.Pow(10, float64(precision)) 389 return float64(round(num*output)) / output 390 } 391 392 func checkForCancelledQuery(qid uint64) (bool, error) { 393 arqMapLock.RLock() 394 rQuery, ok := allRunningQueries[qid] 395 arqMapLock.RUnlock() 396 if !ok { 397 log.Errorf("GetStateForQid: qid %+v does not exist!", qid) 398 return false, fmt.Errorf("qid does not exist") 399 } 400 401 rQuery.rqsLock.Lock() 402 defer rQuery.rqsLock.Unlock() 403 404 if rQuery.isCancelled { 405 return true, nil 406 } 407 return false, nil 408 } 409 410 // returns the rrcs, query counts, map of segkey encoding, and errors 411 func GetRawRecordInfoForQid(scroll int, qid uint64) ([]*utils.RecordResultContainer, uint64, map[uint16]string, error) { 412 arqMapLock.RLock() 413 rQuery, ok := allRunningQueries[qid] 414 arqMapLock.RUnlock() 415 if !ok { 416 log.Errorf("GetRawRecordInforForQid: qid %+v does not exist!", qid) 417 return nil, 0, nil, fmt.Errorf("qid does not exist") 418 } 419 420 rQuery.rqsLock.Lock() 421 defer rQuery.rqsLock.Unlock() 422 if rQuery.queryCount == nil || rQuery.rawRecords == nil { 423 eres := make([]*utils.RecordResultContainer, 0) 424 return eres, 0, nil, nil 425 } 426 427 if len(rQuery.rawRecords) <= scroll { 428 eres := make([]*utils.RecordResultContainer, 0) 429 return eres, 0, nil, nil 430 } 431 skCopy := make(map[uint16]string, len(rQuery.searchRes.SegEncToKey)) 432 for k, v := range rQuery.searchRes.SegEncToKey { 433 skCopy[k] = v 434 } 435 return rQuery.rawRecords[scroll:], rQuery.queryCount.TotalCount, skCopy, nil 436 } 437 438 // returns rrcs, raw time buckets, raw groupby buckets, querycounts, map of segkey encoding, and errors 439 func GetQueryResponseForRPC(scroll int, qid uint64) ([]*utils.RecordResultContainer, *blockresults.TimeBuckets, 440 *blockresults.GroupByBuckets, map[uint16]string, error) { 441 arqMapLock.RLock() 442 rQuery, ok := allRunningQueries[qid] 443 arqMapLock.RUnlock() 444 if !ok { 445 log.Errorf("GetQueryResponseForRPC: qid %+v does not exist!", qid) 446 return nil, nil, nil, nil, fmt.Errorf("qid does not exist") 447 } 448 449 if rQuery.queryCount == nil || rQuery.rawRecords == nil { 450 eres := make([]*utils.RecordResultContainer, 0) 451 return eres, nil, nil, nil, nil 452 } 453 var eres []*utils.RecordResultContainer 454 if rQuery.rawRecords == nil { 455 eres = make([]*utils.RecordResultContainer, 0) 456 } else if len(rQuery.rawRecords) <= scroll { 457 eres = make([]*utils.RecordResultContainer, 0) 458 } else { 459 eres = rQuery.rawRecords[scroll:] 460 } 461 skCopy := make(map[uint16]string, len(rQuery.searchRes.SegEncToKey)) 462 for k, v := range rQuery.searchRes.SegEncToKey { 463 skCopy[k] = v 464 } 465 switch rQuery.QType { 466 case structs.SegmentStatsCmd: 467 // SegStats will be streamed back on each query update. So, we don't need to return anything here 468 return eres, nil, nil, skCopy, nil 469 case structs.GroupByCmd: 470 timeBuckets, groupBuckets := rQuery.searchRes.GetRunningBuckets() 471 return eres, timeBuckets, groupBuckets, skCopy, nil 472 default: 473 return eres, nil, nil, skCopy, nil 474 } 475 } 476 477 // Gets the json encoding of segstats for RPC. 478 // Returns encoded segstats for the given segkeyEnc and qid, bool if the query is segstats or not, and error 479 func GetEncodedSegStatsForRPC(qid uint64, segKeyEnc uint16) ([]byte, bool, error) { 480 arqMapLock.RLock() 481 rQuery, ok := allRunningQueries[qid] 482 arqMapLock.RUnlock() 483 if !ok { 484 log.Errorf("GetEncodedSegStatsForRPC: qid %+v does not exist!", qid) 485 return nil, false, fmt.Errorf("qid does not exist") 486 } 487 488 if rQuery.QType != structs.SegmentStatsCmd { 489 return nil, false, nil 490 } 491 retVal, err := rQuery.searchRes.GetEncodedSegStats(segKeyEnc) 492 return retVal, true, err 493 } 494 495 // returns the query counts for the qid. If qid does not exist, this will return a QueryCount set to 0 496 func GetQueryCountInfoForQid(qid uint64) *structs.QueryCount { 497 arqMapLock.RLock() 498 rQuery, ok := allRunningQueries[qid] 499 arqMapLock.RUnlock() 500 if !ok { 501 log.Errorf("GetQueryCountInfoForQid: qid %+v does not exist!", qid) 502 return zeroHitsQueryCount() 503 } 504 505 if rQuery.queryCount == nil { 506 log.Infof("qid=%d, GetQueryCountInfoForQid: query count for qid %+v does not exist. Defaulting to 0", qid, qid) 507 return zeroHitsQueryCount() 508 } 509 510 return rQuery.queryCount 511 } 512 513 // returns the query counts and searched count for the qid. If qid does not exist, this will return a QueryCount set to 0 514 func GetQueryInfoForQid(qid uint64) (*structs.QueryCount, uint64, error) { 515 arqMapLock.RLock() 516 rQuery, ok := allRunningQueries[qid] 517 arqMapLock.RUnlock() 518 if !ok { 519 log.Errorf("GetQueryCountInfoForQid: qid %+v does not exist!", qid) 520 return nil, 0, fmt.Errorf("qid does not exist") 521 } 522 523 if rQuery.queryCount == nil { 524 log.Infof("qid=%d, GetQueryCountInfoForQid: query count for qid %+v does not exist. Defaulting to 0", qid, qid) 525 return nil, 0, fmt.Errorf("query count does not eixst") 526 } 527 528 return rQuery.queryCount, rQuery.totalRecsSearched, nil 529 } 530 531 func zeroHitsQueryCount() *structs.QueryCount { 532 return &structs.QueryCount{ 533 TotalCount: 0, 534 Op: utils.Equals, 535 EarlyExit: true, 536 } 537 } 538 539 func GetTotalsRecsSearchedForQid(qid uint64) (uint64, error) { 540 arqMapLock.RLock() 541 rQuery, ok := allRunningQueries[qid] 542 arqMapLock.RUnlock() 543 if !ok { 544 log.Errorf("GetTotalsRecsSreachedForQid: qid %+v does not exist!", qid) 545 return 0, fmt.Errorf("qid does not exist") 546 } 547 548 rQuery.rqsLock.Lock() 549 defer rQuery.rqsLock.Unlock() 550 551 return rQuery.totalRecsSearched, nil 552 } 553 554 // returns the length of rrcs that exist in *search.SearchResults 555 // this will be used to determine if more scrolling can be done 556 func GetNumMatchedRRCs(qid uint64) (uint64, error) { 557 arqMapLock.RLock() 558 rQuery, ok := allRunningQueries[qid] 559 arqMapLock.RUnlock() 560 if !ok { 561 log.Errorf("GetNumMatchedRRCs: qid %+v does not exist!", qid) 562 return 0, fmt.Errorf("qid does not exist") 563 } 564 565 rQuery.rqsLock.Lock() 566 defer rQuery.rqsLock.Unlock() 567 568 if rQuery.rawRecords == nil { 569 return 0, nil 570 } 571 return uint64(len(rQuery.rawRecords)), nil 572 573 } 574 575 func GetUniqueSearchErrors(qid uint64) (string, error) { 576 arqMapLock.RLock() 577 rQuery, ok := allRunningQueries[qid] 578 arqMapLock.RUnlock() 579 var result string 580 if !ok { 581 log.Errorf("GetQueryTotalErrors: qid %+v does not exist!", qid) 582 return result, fmt.Errorf("qid does not exist") 583 } 584 searchErrors := rQuery.searchRes.GetAllErrors() 585 occurred := map[string]bool{} 586 587 if len(searchErrors) == 0 { 588 return result, nil 589 } 590 591 for _, e := range searchErrors { 592 err := e.Error() 593 if !occurred[err] { 594 occurred[err] = true 595 result += err + ", " 596 } 597 } 598 return result, nil 599 }