github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/reader/record/rrcreader.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package record 18 19 import ( 20 "fmt" 21 "sort" 22 "strings" 23 "sync" 24 "time" 25 26 "github.com/siglens/siglens/pkg/config" 27 agg "github.com/siglens/siglens/pkg/segment/aggregations" 28 "github.com/siglens/siglens/pkg/segment/query" 29 "github.com/siglens/siglens/pkg/segment/search" 30 "github.com/siglens/siglens/pkg/segment/structs" 31 "github.com/siglens/siglens/pkg/segment/utils" 32 log "github.com/sirupsen/logrus" 33 ) 34 35 var ( 36 nodeResMap = make(map[uint64]*structs.NodeResult) 37 mapMutex sync.Mutex 38 ) 39 40 func GetOrCreateNodeRes(qid uint64) *structs.NodeResult { 41 mapMutex.Lock() 42 defer mapMutex.Unlock() 43 44 // Check if the nodeRes instance exists for the given qid 45 if nr, exists := nodeResMap[qid]; exists { 46 return nr 47 } 48 49 // If not exists, create a new instance and add it to the map 50 nr := &structs.NodeResult{} 51 nodeResMap[qid] = nr 52 53 return nr 54 } 55 56 func buildSegMap(allrrc []*utils.RecordResultContainer, segEncToKey map[uint16]string) (map[string]*utils.BlkRecIdxContainer, map[string]int) { 57 segmap := make(map[string]*utils.BlkRecIdxContainer) 58 recordIndexInFinal := make(map[string]int) 59 60 for idx, rrc := range allrrc { 61 if rrc.SegKeyInfo.IsRemote { 62 log.Debugf("buildSegMap: skipping remote segment:%v", rrc.SegKeyInfo.RecordId) 63 continue 64 } 65 segkey, ok := segEncToKey[rrc.SegKeyInfo.SegKeyEnc] 66 if !ok { 67 log.Errorf("buildSegMap: could not find segenc:%v in map", rrc.SegKeyInfo.SegKeyEnc) 68 continue 69 } 70 blkIdxsCtr, ok := segmap[segkey] 71 if !ok { 72 innermap := make(map[uint16]map[uint16]uint64) 73 blkIdxsCtr = &utils.BlkRecIdxContainer{BlkRecIndexes: innermap, VirtualTableName: rrc.VirtualTableName} 74 segmap[segkey] = blkIdxsCtr 75 } 76 _, ok = blkIdxsCtr.BlkRecIndexes[rrc.BlockNum] 77 if !ok { 78 blkIdxsCtr.BlkRecIndexes[rrc.BlockNum] = make(map[uint16]uint64) 79 } 80 blkIdxsCtr.BlkRecIndexes[rrc.BlockNum][rrc.RecordNum] = rrc.TimeStamp 81 82 recordIndent := fmt.Sprintf("%s_%d_%d", segkey, rrc.BlockNum, rrc.RecordNum) 83 recordIndexInFinal[recordIndent] = idx 84 } 85 86 return segmap, recordIndexInFinal 87 } 88 89 func prepareOutputTransforms(aggs *structs.QueryAggregators) (map[string]int, map[string]string, bool, bool, []string, map[string]string) { 90 rawIncludeValuesIndicies := make(map[string]int) 91 valuesToLabels := make(map[string]string) 92 logfmtRequest := false 93 tableColumnsExist := false 94 if aggs != nil && aggs.OutputTransforms != nil && aggs.OutputTransforms.OutputColumns != nil { 95 logfmtRequest = aggs.OutputTransforms.OutputColumns.Logfmt 96 tableColumnsExist = true 97 for _, rawIncludeValue := range aggs.OutputTransforms.OutputColumns.IncludeValues { 98 if !logfmtRequest { 99 rawIncludeValuesIndicies[rawIncludeValue.ColName] = rawIncludeValue.Index 100 } 101 valuesToLabels[rawIncludeValue.ColName] = rawIncludeValue.Label 102 } 103 } 104 var hardcodedArray = []string{} 105 var renameHardcodedColumns = make(map[string]string) 106 if aggs != nil && aggs.OutputTransforms != nil && aggs.OutputTransforms.HarcodedCol != nil { 107 hardcodedArray = append(hardcodedArray, aggs.OutputTransforms.HarcodedCol...) 108 109 for key, value := range aggs.OutputTransforms.RenameHardcodedColumns { 110 111 renameHardcodedColumns[value] = key 112 } 113 114 } 115 116 return rawIncludeValuesIndicies, valuesToLabels, logfmtRequest, tableColumnsExist, hardcodedArray, renameHardcodedColumns 117 } 118 119 func applyHardcodedColumns(hardcodedArray []string, renameHardcodedColumns map[string]string, allRecords []map[string]interface{}, finalCols map[string]bool) ([]map[string]interface{}, map[string]bool) { 120 if len(hardcodedArray) > 0 { 121 for key := range renameHardcodedColumns { 122 finalCols[key] = true 123 } 124 record := make(map[string]interface{}) 125 for key, val := range renameHardcodedColumns { 126 record[key] = val 127 128 } 129 allRecords[0] = record 130 allRecords = allRecords[:1] 131 } 132 133 return allRecords, finalCols 134 } 135 136 func finalizeRecords(allRecords []map[string]interface{}, finalCols map[string]bool, numProcessedRecords int, recsAggRecords []map[string]interface{}, transactionArgsExist bool) ([]map[string]interface{}, []string) { 137 colsSlice := make([]string, len(finalCols)) 138 idx := 0 139 for colName := range finalCols { 140 colsSlice[idx] = colName 141 idx++ 142 } 143 144 // Some commands (like dedup) can remove records from the final result, so 145 // remove the blank records from allRecords to get finalRecords. 146 var finalRecords []map[string]interface{} 147 if transactionArgsExist { 148 finalRecords = recsAggRecords 149 } else if numProcessedRecords == len(allRecords) { 150 finalRecords = allRecords 151 } else { 152 finalRecords = make([]map[string]interface{}, numProcessedRecords) 153 idx = 0 154 for _, record := range allRecords { 155 if idx >= numProcessedRecords { 156 break 157 } 158 159 if record != nil { 160 finalRecords[idx] = record 161 idx++ 162 } 163 } 164 } 165 166 sort.Strings(colsSlice) 167 168 return finalRecords, colsSlice 169 } 170 171 // Gets all raw json records from RRCs. If esResponse is false, _id and _type will not be added to any record 172 func GetJsonFromAllRrc(allrrc []*utils.RecordResultContainer, esResponse bool, qid uint64, 173 segEncToKey map[uint16]string, aggs *structs.QueryAggregators) ([]map[string]interface{}, []string, error) { 174 175 sTime := time.Now() 176 nodeRes := GetOrCreateNodeRes(qid) 177 segmap, recordIndexInFinal := buildSegMap(allrrc, segEncToKey) 178 rawIncludeValuesIndicies, valuesToLabels, logfmtRequest, tableColumnsExist, hardcodedArray, renameHardcodedColumns := prepareOutputTransforms(aggs) 179 180 allRecords := make([]map[string]interface{}, len(allrrc)) 181 finalCols := make(map[string]bool) 182 numProcessedRecords := 0 183 184 var resultRecMap map[string]bool 185 186 hasQueryAggergatorBlock := aggs.HasQueryAggergatorBlockInChain() 187 transactionArgsExist := aggs.HasTransactionArgumentsInChain() 188 recsAggRecords := make([]map[string]interface{}, 0) 189 var numTotalSegments uint64 190 191 processSingleSegment := func(currSeg string, virtualTableName string, blkRecIndexes map[uint16]map[uint16]uint64, isLastBlk bool) { 192 recs, cols, err := GetRecordsFromSegment(currSeg, virtualTableName, blkRecIndexes, 193 config.GetTimeStampKey(), esResponse, qid, aggs) 194 if err != nil { 195 log.Errorf("GetJsonFromAllRrc: failed to read recs from segfile=%v, err=%v", currSeg, err) 196 return 197 } 198 for cName := range cols { 199 finalCols[cName] = true 200 } 201 202 for key := range renameHardcodedColumns { 203 finalCols[key] = true 204 } 205 206 if hasQueryAggergatorBlock || transactionArgsExist { 207 208 numTotalSegments, err = query.GetTotalSegmentsToSearch(qid) 209 if err != nil { 210 // For synchronous queries, the query is deleted by this 211 // point, but segmap has all the segments that the query 212 // searched. 213 // For async queries, the segmap has just one segment 214 // because we process them as the search completes, but the 215 // query isn't deleted until all segments get processed, so 216 // we shouldn't get to this block for async queries. 217 numTotalSegments = uint64(len(segmap)) 218 } 219 220 /** 221 * Overview of Aggregation Processing: 222 * 1. Initiate the process by executing PostQueryBucketCleaning to prepare records for aggregation. 223 * 2. Evaluate the PerformAggsOnRecs flag post-cleanup: 224 * - True: Indicates not all aggregations were processed. In this case: 225 * a. Perform aggregations on records using performAggsOnRecs. This function requires all the segments to be processed before proceeding to the next step. 226 * b. Evaluate the CheckNextAgg flag from the result: 227 * i. If true, reset PerformAggsOnRecs to false, update aggs with NextQueryAgg, and loop for additional cleaning. 228 * ii. If false or if resultRecMap is empty, it implies additional segments may require processing; exit the loop for further segment evaluation. 229 * - False: All aggregations for the current segment have been processed; exit the loop to either process the next segment or return the final results. 230 * 3. The loop facilitates sequential data processing, ensuring each or all the segments are thoroughly processed before proceeding to the next, 231 * adapting dynamically based on the flags set by the PostQueryBucketCleaning and PerformAggsOnRecs functions. 232 */ 233 for { 234 finishesSegment := isLastBlk 235 agg.PostQueryBucketCleaning(nodeRes, aggs, recs, recordIndexInFinal, finalCols, numTotalSegments, finishesSegment) 236 237 // If TransactionEventRecords exist, process them first. This implies there might be segments left for TransactionEvent processing. 238 if len(nodeRes.TransactionEventRecords) > 0 { 239 240 _, exists := nodeRes.TransactionEventRecords["CHECK_NEXT_AGG"] 241 242 if exists { 243 // Reset the TransactionEventRecords and update aggs with NextQueryAgg to loop for next Aggs processing. 244 delete(nodeRes.TransactionEventRecords, "CHECK_NEXT_AGG") 245 aggs = &structs.QueryAggregators{Next: nodeRes.NextQueryAgg.Next} 246 } else { 247 break // Break out of the loop to process next segment. 248 } 249 } else if nodeRes.PerformAggsOnRecs { 250 resultRecMap = search.PerformAggsOnRecs(nodeRes, aggs, recs, finalCols, numTotalSegments, finishesSegment, qid) 251 // By default reset PerformAggsOnRecs flag, otherwise the execution will immediately return here from PostQueryBucketCleaning; 252 // Without performing the aggs from the start for the next segment or next bulk. 253 nodeRes.PerformAggsOnRecs = false 254 if len(resultRecMap) > 0 { 255 boolVal, exists := resultRecMap["CHECK_NEXT_AGG"] 256 if exists && boolVal { 257 // Update aggs with NextQueryAgg to loop for additional cleaning. 258 aggs = nodeRes.NextQueryAgg 259 } else { 260 break 261 } 262 } else { 263 // Not checking or processing Next Agg. This implies that there might be more segments to process. 264 // Break out of the loop and continue processing the next segment. 265 break 266 } 267 } else { 268 // No need to perform aggs on recs. All the Aggs are Processed. 269 break 270 } 271 } 272 // For other cmds, if we cannot map recInden to an index, we simply append the record to allRecords 273 // However, for the sort cmd, we should assign the length of the result set to be the same as recordIndexInFinal 274 // This way, when mapping the results to allRecords, we can preserve the order of the results rather than just appending them to the end of allRecords 275 if len(recordIndexInFinal) > len(allRecords) { 276 allRecords = make([]map[string]interface{}, len(recordIndexInFinal)) 277 } 278 } 279 280 numProcessedRecords += len(recs) 281 for recInden, record := range recs { 282 for key, val := range renameHardcodedColumns { 283 record[key] = val 284 } 285 286 unknownIndex := false 287 idx, ok := recordIndexInFinal[recInden] 288 if !ok { 289 // For async queries where we need all records before we 290 // can return any (like dedup with a sortby), once we can 291 // get to this block because processing the dedup may 292 // return some records from previous segments and since 293 // it's an async query we're running this function with 294 // len(segmap)=1 because we try to process the data as the 295 // searched complete. 296 log.Infof("qid=%d, GetJsonFromAllRrc: Did not find index for record indentifier %s.", qid, recInden) 297 unknownIndex = true 298 } 299 if logfmtRequest { 300 record = addKeyValuePairs(record) 301 } 302 includeValues := make(map[string]interface{}) 303 for cname, val := range record { 304 if len(valuesToLabels[cname]) > 0 { 305 actualIndex := rawIncludeValuesIndicies[cname] 306 switch valType := val.(type) { 307 case []interface{}: 308 if actualIndex > len(valType)-1 || actualIndex < 0 { 309 log.Errorf("GetJsonFromAllRrc: index=%v out of bounds for column=%v of length %v", actualIndex, cname, len(valType)) 310 continue 311 } 312 includeValues[valuesToLabels[cname]] = valType[actualIndex] 313 case interface{}: 314 log.Errorf("GetJsonFromAllRrc: accessing object in %v as array!", cname) 315 continue 316 default: 317 log.Errorf("GetJsonFromAllRrc: unsupported value type") 318 continue 319 } 320 } 321 322 } 323 for label, val := range includeValues { 324 if record[label] != nil { 325 log.Errorf("GetJsonFromAllRrc: accessing object in %v as array!", label) //case where label == original column 326 continue 327 } 328 record[label] = val 329 } 330 331 delete(recordIndexInFinal, recInden) 332 333 if unknownIndex { 334 allRecords = append(allRecords, record) 335 } else { 336 allRecords[idx] = record 337 } 338 339 if transactionArgsExist { 340 recsAggRecords = append(recsAggRecords, record) 341 } 342 } 343 } 344 345 if !(tableColumnsExist || aggs.OutputTransforms == nil || hasQueryAggergatorBlock || transactionArgsExist) { 346 allRecords, finalCols = applyHardcodedColumns(hardcodedArray, renameHardcodedColumns, allRecords, finalCols) 347 } else { 348 for currSeg, blkIds := range segmap { 349 blkIdsIndex := 0 350 for blkNum, recNums := range blkIds.BlkRecIndexes { 351 blkIdsIndex++ 352 isLastBlk := blkIdsIndex == len(blkIds.BlkRecIndexes) 353 354 blkRecIndexes := make(map[uint16]map[uint16]uint64) 355 blkRecIndexes[blkNum] = recNums 356 processSingleSegment(currSeg, blkIds.VirtualTableName, blkRecIndexes, isLastBlk) 357 } 358 } 359 } 360 361 if nodeRes.RecsAggsProcessedSegments >= numTotalSegments { 362 delete(nodeResMap, qid) 363 } 364 365 finalRecords, colsSlice := finalizeRecords(allRecords, finalCols, numProcessedRecords, recsAggRecords, transactionArgsExist) 366 log.Infof("qid=%d, GetJsonFromAllRrc: Got %v raw records from files in %+v", qid, len(finalRecords), time.Since(sTime)) 367 368 return finalRecords, colsSlice, nil 369 } 370 371 func addKeyValuePairs(record map[string]interface{}) map[string]interface{} { 372 for _, value := range record { 373 if strValue, ok := value.(string); ok { 374 // Check if the string value has key-value pairs 375 keyValuePairs, err := extractKeyValuePairsFromString(strValue) 376 if err == nil { 377 // Add key-value pairs to the record 378 for k, v := range keyValuePairs { 379 record[k] = v 380 } 381 } 382 } 383 } 384 return record 385 } 386 387 func extractKeyValuePairsFromString(str string) (map[string]interface{}, error) { 388 keyValuePairs := make(map[string]interface{}) 389 pairs := strings.Split(str, ",") 390 391 for _, pair := range pairs { 392 parts := strings.Split(pair, "=") 393 if len(parts) == 2 { 394 key := strings.TrimSpace(parts[0]) 395 value := strings.TrimSpace(parts[1]) 396 keyValuePairs[key] = utils.GetLiteralFromString(value) 397 } else { 398 return nil, fmt.Errorf("invalid key-value pair: %s", pair) 399 } 400 } 401 402 return keyValuePairs, nil 403 }