github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/reader/record/rrcreader.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package record
    18  
    19  import (
    20  	"fmt"
    21  	"sort"
    22  	"strings"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/siglens/siglens/pkg/config"
    27  	agg "github.com/siglens/siglens/pkg/segment/aggregations"
    28  	"github.com/siglens/siglens/pkg/segment/query"
    29  	"github.com/siglens/siglens/pkg/segment/search"
    30  	"github.com/siglens/siglens/pkg/segment/structs"
    31  	"github.com/siglens/siglens/pkg/segment/utils"
    32  	log "github.com/sirupsen/logrus"
    33  )
    34  
    35  var (
    36  	nodeResMap = make(map[uint64]*structs.NodeResult)
    37  	mapMutex   sync.Mutex
    38  )
    39  
    40  func GetOrCreateNodeRes(qid uint64) *structs.NodeResult {
    41  	mapMutex.Lock()
    42  	defer mapMutex.Unlock()
    43  
    44  	// Check if the nodeRes instance exists for the given qid
    45  	if nr, exists := nodeResMap[qid]; exists {
    46  		return nr
    47  	}
    48  
    49  	// If not exists, create a new instance and add it to the map
    50  	nr := &structs.NodeResult{}
    51  	nodeResMap[qid] = nr
    52  
    53  	return nr
    54  }
    55  
    56  func buildSegMap(allrrc []*utils.RecordResultContainer, segEncToKey map[uint16]string) (map[string]*utils.BlkRecIdxContainer, map[string]int) {
    57  	segmap := make(map[string]*utils.BlkRecIdxContainer)
    58  	recordIndexInFinal := make(map[string]int)
    59  
    60  	for idx, rrc := range allrrc {
    61  		if rrc.SegKeyInfo.IsRemote {
    62  			log.Debugf("buildSegMap: skipping remote segment:%v", rrc.SegKeyInfo.RecordId)
    63  			continue
    64  		}
    65  		segkey, ok := segEncToKey[rrc.SegKeyInfo.SegKeyEnc]
    66  		if !ok {
    67  			log.Errorf("buildSegMap: could not find segenc:%v in map", rrc.SegKeyInfo.SegKeyEnc)
    68  			continue
    69  		}
    70  		blkIdxsCtr, ok := segmap[segkey]
    71  		if !ok {
    72  			innermap := make(map[uint16]map[uint16]uint64)
    73  			blkIdxsCtr = &utils.BlkRecIdxContainer{BlkRecIndexes: innermap, VirtualTableName: rrc.VirtualTableName}
    74  			segmap[segkey] = blkIdxsCtr
    75  		}
    76  		_, ok = blkIdxsCtr.BlkRecIndexes[rrc.BlockNum]
    77  		if !ok {
    78  			blkIdxsCtr.BlkRecIndexes[rrc.BlockNum] = make(map[uint16]uint64)
    79  		}
    80  		blkIdxsCtr.BlkRecIndexes[rrc.BlockNum][rrc.RecordNum] = rrc.TimeStamp
    81  
    82  		recordIndent := fmt.Sprintf("%s_%d_%d", segkey, rrc.BlockNum, rrc.RecordNum)
    83  		recordIndexInFinal[recordIndent] = idx
    84  	}
    85  
    86  	return segmap, recordIndexInFinal
    87  }
    88  
    89  func prepareOutputTransforms(aggs *structs.QueryAggregators) (map[string]int, map[string]string, bool, bool, []string, map[string]string) {
    90  	rawIncludeValuesIndicies := make(map[string]int)
    91  	valuesToLabels := make(map[string]string)
    92  	logfmtRequest := false
    93  	tableColumnsExist := false
    94  	if aggs != nil && aggs.OutputTransforms != nil && aggs.OutputTransforms.OutputColumns != nil {
    95  		logfmtRequest = aggs.OutputTransforms.OutputColumns.Logfmt
    96  		tableColumnsExist = true
    97  		for _, rawIncludeValue := range aggs.OutputTransforms.OutputColumns.IncludeValues {
    98  			if !logfmtRequest {
    99  				rawIncludeValuesIndicies[rawIncludeValue.ColName] = rawIncludeValue.Index
   100  			}
   101  			valuesToLabels[rawIncludeValue.ColName] = rawIncludeValue.Label
   102  		}
   103  	}
   104  	var hardcodedArray = []string{}
   105  	var renameHardcodedColumns = make(map[string]string)
   106  	if aggs != nil && aggs.OutputTransforms != nil && aggs.OutputTransforms.HarcodedCol != nil {
   107  		hardcodedArray = append(hardcodedArray, aggs.OutputTransforms.HarcodedCol...)
   108  
   109  		for key, value := range aggs.OutputTransforms.RenameHardcodedColumns {
   110  
   111  			renameHardcodedColumns[value] = key
   112  		}
   113  
   114  	}
   115  
   116  	return rawIncludeValuesIndicies, valuesToLabels, logfmtRequest, tableColumnsExist, hardcodedArray, renameHardcodedColumns
   117  }
   118  
   119  func applyHardcodedColumns(hardcodedArray []string, renameHardcodedColumns map[string]string, allRecords []map[string]interface{}, finalCols map[string]bool) ([]map[string]interface{}, map[string]bool) {
   120  	if len(hardcodedArray) > 0 {
   121  		for key := range renameHardcodedColumns {
   122  			finalCols[key] = true
   123  		}
   124  		record := make(map[string]interface{})
   125  		for key, val := range renameHardcodedColumns {
   126  			record[key] = val
   127  
   128  		}
   129  		allRecords[0] = record
   130  		allRecords = allRecords[:1]
   131  	}
   132  
   133  	return allRecords, finalCols
   134  }
   135  
   136  func finalizeRecords(allRecords []map[string]interface{}, finalCols map[string]bool, numProcessedRecords int, recsAggRecords []map[string]interface{}, transactionArgsExist bool) ([]map[string]interface{}, []string) {
   137  	colsSlice := make([]string, len(finalCols))
   138  	idx := 0
   139  	for colName := range finalCols {
   140  		colsSlice[idx] = colName
   141  		idx++
   142  	}
   143  
   144  	// Some commands (like dedup) can remove records from the final result, so
   145  	// remove the blank records from allRecords to get finalRecords.
   146  	var finalRecords []map[string]interface{}
   147  	if transactionArgsExist {
   148  		finalRecords = recsAggRecords
   149  	} else if numProcessedRecords == len(allRecords) {
   150  		finalRecords = allRecords
   151  	} else {
   152  		finalRecords = make([]map[string]interface{}, numProcessedRecords)
   153  		idx = 0
   154  		for _, record := range allRecords {
   155  			if idx >= numProcessedRecords {
   156  				break
   157  			}
   158  
   159  			if record != nil {
   160  				finalRecords[idx] = record
   161  				idx++
   162  			}
   163  		}
   164  	}
   165  
   166  	sort.Strings(colsSlice)
   167  
   168  	return finalRecords, colsSlice
   169  }
   170  
   171  // Gets all raw json records from RRCs. If esResponse is false, _id and _type will not be added to any record
   172  func GetJsonFromAllRrc(allrrc []*utils.RecordResultContainer, esResponse bool, qid uint64,
   173  	segEncToKey map[uint16]string, aggs *structs.QueryAggregators) ([]map[string]interface{}, []string, error) {
   174  
   175  	sTime := time.Now()
   176  	nodeRes := GetOrCreateNodeRes(qid)
   177  	segmap, recordIndexInFinal := buildSegMap(allrrc, segEncToKey)
   178  	rawIncludeValuesIndicies, valuesToLabels, logfmtRequest, tableColumnsExist, hardcodedArray, renameHardcodedColumns := prepareOutputTransforms(aggs)
   179  
   180  	allRecords := make([]map[string]interface{}, len(allrrc))
   181  	finalCols := make(map[string]bool)
   182  	numProcessedRecords := 0
   183  
   184  	var resultRecMap map[string]bool
   185  
   186  	hasQueryAggergatorBlock := aggs.HasQueryAggergatorBlockInChain()
   187  	transactionArgsExist := aggs.HasTransactionArgumentsInChain()
   188  	recsAggRecords := make([]map[string]interface{}, 0)
   189  	var numTotalSegments uint64
   190  
   191  	processSingleSegment := func(currSeg string, virtualTableName string, blkRecIndexes map[uint16]map[uint16]uint64, isLastBlk bool) {
   192  		recs, cols, err := GetRecordsFromSegment(currSeg, virtualTableName, blkRecIndexes,
   193  			config.GetTimeStampKey(), esResponse, qid, aggs)
   194  		if err != nil {
   195  			log.Errorf("GetJsonFromAllRrc: failed to read recs from segfile=%v, err=%v", currSeg, err)
   196  			return
   197  		}
   198  		for cName := range cols {
   199  			finalCols[cName] = true
   200  		}
   201  
   202  		for key := range renameHardcodedColumns {
   203  			finalCols[key] = true
   204  		}
   205  
   206  		if hasQueryAggergatorBlock || transactionArgsExist {
   207  
   208  			numTotalSegments, err = query.GetTotalSegmentsToSearch(qid)
   209  			if err != nil {
   210  				// For synchronous queries, the query is deleted by this
   211  				// point, but segmap has all the segments that the query
   212  				// searched.
   213  				// For async queries, the segmap has just one segment
   214  				// because we process them as the search completes, but the
   215  				// query isn't deleted until all segments get processed, so
   216  				// we shouldn't get to this block for async queries.
   217  				numTotalSegments = uint64(len(segmap))
   218  			}
   219  
   220  			/**
   221  			* Overview of Aggregation Processing:
   222  			* 1. Initiate the process by executing PostQueryBucketCleaning to prepare records for aggregation.
   223  			* 2. Evaluate the PerformAggsOnRecs flag post-cleanup:
   224  			*    - True: Indicates not all aggregations were processed. In this case:
   225  			*       a. Perform aggregations on records using performAggsOnRecs. This function requires all the segments to be processed before proceeding to the next step.
   226  			*       b. Evaluate the CheckNextAgg flag from the result:
   227  			*          i. If true, reset PerformAggsOnRecs to false, update aggs with NextQueryAgg, and loop for additional cleaning.
   228  			*          ii. If false or if resultRecMap is empty, it implies additional segments may require processing; exit the loop for further segment evaluation.
   229  			*    - False: All aggregations for the current segment have been processed; exit the loop to either process the next segment or return the final results.
   230  			* 3. The loop facilitates sequential data processing, ensuring each or all the segments are thoroughly processed before proceeding to the next,
   231  			*    adapting dynamically based on the flags set by the PostQueryBucketCleaning and PerformAggsOnRecs functions.
   232  			 */
   233  			for {
   234  				finishesSegment := isLastBlk
   235  				agg.PostQueryBucketCleaning(nodeRes, aggs, recs, recordIndexInFinal, finalCols, numTotalSegments, finishesSegment)
   236  
   237  				// If TransactionEventRecords exist, process them first. This implies there might be segments left for TransactionEvent processing.
   238  				if len(nodeRes.TransactionEventRecords) > 0 {
   239  
   240  					_, exists := nodeRes.TransactionEventRecords["CHECK_NEXT_AGG"]
   241  
   242  					if exists {
   243  						// Reset the TransactionEventRecords and update aggs with NextQueryAgg to loop for next Aggs processing.
   244  						delete(nodeRes.TransactionEventRecords, "CHECK_NEXT_AGG")
   245  						aggs = &structs.QueryAggregators{Next: nodeRes.NextQueryAgg.Next}
   246  					} else {
   247  						break // Break out of the loop to process next segment.
   248  					}
   249  				} else if nodeRes.PerformAggsOnRecs {
   250  					resultRecMap = search.PerformAggsOnRecs(nodeRes, aggs, recs, finalCols, numTotalSegments, finishesSegment, qid)
   251  					// By default reset PerformAggsOnRecs flag, otherwise the execution will immediately return here from PostQueryBucketCleaning;
   252  					// Without performing the aggs from the start for the next segment or next bulk.
   253  					nodeRes.PerformAggsOnRecs = false
   254  					if len(resultRecMap) > 0 {
   255  						boolVal, exists := resultRecMap["CHECK_NEXT_AGG"]
   256  						if exists && boolVal {
   257  							// Update aggs with NextQueryAgg to loop for additional cleaning.
   258  							aggs = nodeRes.NextQueryAgg
   259  						} else {
   260  							break
   261  						}
   262  					} else {
   263  						// Not checking or processing Next Agg. This implies that there might be more segments to process.
   264  						// Break out of the loop and continue processing the next segment.
   265  						break
   266  					}
   267  				} else {
   268  					// No need to perform aggs on recs. All the Aggs are Processed.
   269  					break
   270  				}
   271  			}
   272  			// For other cmds, if we cannot map recInden to an index, we simply append the record to allRecords
   273  			// However, for the sort cmd, we should assign the length of the result set to be the same as recordIndexInFinal
   274  			// This way, when mapping the results to allRecords, we can preserve the order of the results rather than just appending them to the end of allRecords
   275  			if len(recordIndexInFinal) > len(allRecords) {
   276  				allRecords = make([]map[string]interface{}, len(recordIndexInFinal))
   277  			}
   278  		}
   279  
   280  		numProcessedRecords += len(recs)
   281  		for recInden, record := range recs {
   282  			for key, val := range renameHardcodedColumns {
   283  				record[key] = val
   284  			}
   285  
   286  			unknownIndex := false
   287  			idx, ok := recordIndexInFinal[recInden]
   288  			if !ok {
   289  				// For async queries where we need all records before we
   290  				// can return any (like dedup with a sortby), once we can
   291  				// get to this block because processing the dedup may
   292  				// return some records from previous segments and since
   293  				// it's an async query we're running this function with
   294  				// len(segmap)=1 because we try to process the data as the
   295  				// searched complete.
   296  				log.Infof("qid=%d, GetJsonFromAllRrc: Did not find index for record indentifier %s.", qid, recInden)
   297  				unknownIndex = true
   298  			}
   299  			if logfmtRequest {
   300  				record = addKeyValuePairs(record)
   301  			}
   302  			includeValues := make(map[string]interface{})
   303  			for cname, val := range record {
   304  				if len(valuesToLabels[cname]) > 0 {
   305  					actualIndex := rawIncludeValuesIndicies[cname]
   306  					switch valType := val.(type) {
   307  					case []interface{}:
   308  						if actualIndex > len(valType)-1 || actualIndex < 0 {
   309  							log.Errorf("GetJsonFromAllRrc: index=%v out of bounds for column=%v of length %v", actualIndex, cname, len(valType))
   310  							continue
   311  						}
   312  						includeValues[valuesToLabels[cname]] = valType[actualIndex]
   313  					case interface{}:
   314  						log.Errorf("GetJsonFromAllRrc: accessing object in %v as array!", cname)
   315  						continue
   316  					default:
   317  						log.Errorf("GetJsonFromAllRrc: unsupported value type")
   318  						continue
   319  					}
   320  				}
   321  
   322  			}
   323  			for label, val := range includeValues {
   324  				if record[label] != nil {
   325  					log.Errorf("GetJsonFromAllRrc: accessing object in %v as array!", label) //case where label == original column
   326  					continue
   327  				}
   328  				record[label] = val
   329  			}
   330  
   331  			delete(recordIndexInFinal, recInden)
   332  
   333  			if unknownIndex {
   334  				allRecords = append(allRecords, record)
   335  			} else {
   336  				allRecords[idx] = record
   337  			}
   338  
   339  			if transactionArgsExist {
   340  				recsAggRecords = append(recsAggRecords, record)
   341  			}
   342  		}
   343  	}
   344  
   345  	if !(tableColumnsExist || aggs.OutputTransforms == nil || hasQueryAggergatorBlock || transactionArgsExist) {
   346  		allRecords, finalCols = applyHardcodedColumns(hardcodedArray, renameHardcodedColumns, allRecords, finalCols)
   347  	} else {
   348  		for currSeg, blkIds := range segmap {
   349  			blkIdsIndex := 0
   350  			for blkNum, recNums := range blkIds.BlkRecIndexes {
   351  				blkIdsIndex++
   352  				isLastBlk := blkIdsIndex == len(blkIds.BlkRecIndexes)
   353  
   354  				blkRecIndexes := make(map[uint16]map[uint16]uint64)
   355  				blkRecIndexes[blkNum] = recNums
   356  				processSingleSegment(currSeg, blkIds.VirtualTableName, blkRecIndexes, isLastBlk)
   357  			}
   358  		}
   359  	}
   360  
   361  	if nodeRes.RecsAggsProcessedSegments >= numTotalSegments {
   362  		delete(nodeResMap, qid)
   363  	}
   364  
   365  	finalRecords, colsSlice := finalizeRecords(allRecords, finalCols, numProcessedRecords, recsAggRecords, transactionArgsExist)
   366  	log.Infof("qid=%d, GetJsonFromAllRrc: Got %v raw records from files in %+v", qid, len(finalRecords), time.Since(sTime))
   367  
   368  	return finalRecords, colsSlice, nil
   369  }
   370  
   371  func addKeyValuePairs(record map[string]interface{}) map[string]interface{} {
   372  	for _, value := range record {
   373  		if strValue, ok := value.(string); ok {
   374  			// Check if the string value has key-value pairs
   375  			keyValuePairs, err := extractKeyValuePairsFromString(strValue)
   376  			if err == nil {
   377  				// Add key-value pairs to the record
   378  				for k, v := range keyValuePairs {
   379  					record[k] = v
   380  				}
   381  			}
   382  		}
   383  	}
   384  	return record
   385  }
   386  
   387  func extractKeyValuePairsFromString(str string) (map[string]interface{}, error) {
   388  	keyValuePairs := make(map[string]interface{})
   389  	pairs := strings.Split(str, ",")
   390  
   391  	for _, pair := range pairs {
   392  		parts := strings.Split(pair, "=")
   393  		if len(parts) == 2 {
   394  			key := strings.TrimSpace(parts[0])
   395  			value := strings.TrimSpace(parts[1])
   396  			keyValuePairs[key] = utils.GetLiteralFromString(value)
   397  		} else {
   398  			return nil, fmt.Errorf("invalid key-value pair: %s", pair)
   399  		}
   400  	}
   401  
   402  	return keyValuePairs, nil
   403  }