github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/reader/record/recordreader.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package record
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"os"
    23  	"sort"
    24  
    25  	"github.com/cespare/xxhash"
    26  	"github.com/siglens/siglens/pkg/blob"
    27  	"github.com/siglens/siglens/pkg/common/fileutils"
    28  	"github.com/siglens/siglens/pkg/config"
    29  	"github.com/siglens/siglens/pkg/segment/query/metadata"
    30  	"github.com/siglens/siglens/pkg/segment/reader/segread"
    31  	"github.com/siglens/siglens/pkg/segment/structs"
    32  	"github.com/siglens/siglens/pkg/segment/utils"
    33  	"github.com/siglens/siglens/pkg/segment/writer"
    34  	toputils "github.com/siglens/siglens/pkg/utils"
    35  	log "github.com/sirupsen/logrus"
    36  )
    37  
    38  // returns a map of record identifiers to record maps, and all columns seen
    39  // record identifiers is segfilename + blockNum + recordNum
    40  // If esResponse is false, _id and _type will not be added to any record
    41  func GetRecordsFromSegment(segKey string, vTable string, blkRecIndexes map[uint16]map[uint16]uint64,
    42  	tsKey string, esQuery bool, qid uint64,
    43  	aggs *structs.QueryAggregators) (map[string]map[string]interface{}, map[string]bool, error) {
    44  
    45  	var err error
    46  	segKey, err = checkRecentlyRotatedKey(segKey)
    47  	if err != nil {
    48  		log.Errorf("qid=%d GetRecordsFromSegment failed to get recently rotated information for key %s table %s. err %+v", qid, segKey, vTable, err)
    49  	}
    50  	var allCols map[string]bool
    51  	var exists bool
    52  	allCols, exists = writer.CheckAndGetColsForUnrotatedSegKey(segKey)
    53  	if !exists {
    54  		allCols, exists = metadata.CheckAndGetColsForSegKey(segKey, vTable)
    55  		if !exists {
    56  			log.Errorf("GetRecordsFromSegment: failed to get column for key: %s, table %s", segKey, vTable)
    57  			return nil, allCols, errors.New("failed to get column names for segkey in rotated and unrotated files")
    58  		}
    59  	}
    60  	allCols = applyColNameTransform(allCols, aggs, qid)
    61  	numOpenFds := int64(len(allCols))
    62  	err = fileutils.GLOBAL_FD_LIMITER.TryAcquireWithBackoff(numOpenFds, 10, fmt.Sprintf("GetRecordsFromSegment.qid=%d", qid))
    63  	if err != nil {
    64  		log.Errorf("qid=%d GetRecordsFromSegment failed to acquire lock for opening %+v file descriptors. err %+v", qid, numOpenFds, err)
    65  		return nil, map[string]bool{}, err
    66  	}
    67  	defer fileutils.GLOBAL_FD_LIMITER.Release(numOpenFds)
    68  
    69  	bulkDownloadFiles := make(map[string]string)
    70  	allFiles := make([]string, 0)
    71  	for col := range allCols {
    72  		ssFile := fmt.Sprintf("%v_%v.csg", segKey, xxhash.Sum64String(col))
    73  		bulkDownloadFiles[ssFile] = col
    74  		allFiles = append(allFiles, ssFile)
    75  	}
    76  	err = blob.BulkDownloadSegmentBlob(bulkDownloadFiles, true)
    77  	if err != nil {
    78  		log.Errorf("qid=%d, GetRecordsFromSegment failed to download col file. err=%v", qid, err)
    79  		return nil, map[string]bool{}, err
    80  	}
    81  
    82  	defer func() {
    83  		err = blob.SetSegSetFilesAsNotInUse(allFiles)
    84  		if err != nil {
    85  			log.Errorf("qid=%d, GetRecordsFromSegment failed to set segset files as not in use. err=%v", qid, err)
    86  		}
    87  	}()
    88  
    89  	for ssFile := range bulkDownloadFiles {
    90  		fd, err := os.Open(ssFile)
    91  		if err != nil {
    92  			log.Errorf("qid=%d, GetRecordsFromSegment failed to open col file. Tried to open file=%v, err=%v", qid, ssFile, err)
    93  			return nil, map[string]bool{}, err
    94  		}
    95  		defer fd.Close()
    96  	}
    97  
    98  	var blockMetadata map[uint16]*structs.BlockMetadataHolder
    99  	if writer.IsSegKeyUnrotated(segKey) {
   100  		blockMetadata, err = writer.GetBlockSearchInfoForKey(segKey)
   101  		if err != nil {
   102  			log.Errorf("qid=%d GetRecordsFromSegment failed to get block search info for unrotated key %s table %s", qid, segKey, vTable)
   103  			return nil, map[string]bool{}, err
   104  		}
   105  	} else {
   106  		blockMetadata, err = metadata.GetBlockSearchInfoForKey(segKey)
   107  		if err != nil {
   108  			log.Errorf("GetRecordsFromSegment: failed to get blocksearchinfo for segkey=%v, err=%v", segKey, err)
   109  			return nil, map[string]bool{}, err
   110  		}
   111  	}
   112  
   113  	var blockSum []*structs.BlockSummary
   114  	if writer.IsSegKeyUnrotated(segKey) {
   115  		blockSum, err = writer.GetBlockSummaryForKey(segKey)
   116  		if err != nil {
   117  			log.Errorf("qid=%d GetRecordsFromSegment failed to get block search info for unrotated key %s table %s", qid, segKey, vTable)
   118  			return nil, map[string]bool{}, err
   119  		}
   120  	} else {
   121  		blockSum, err = metadata.GetBlockSummariesForKey(segKey)
   122  		if err != nil {
   123  			log.Errorf("GetRecordsFromSegment: failed to get blocksearchinfo for segkey=%v, err=%v", segKey, err)
   124  			return nil, map[string]bool{}, err
   125  		}
   126  	}
   127  
   128  	result := make(map[string]map[string]interface{})
   129  
   130  	sharedReader, err := segread.InitSharedMultiColumnReaders(segKey, allCols, blockMetadata, blockSum, 1, qid)
   131  	if err != nil {
   132  		log.Errorf("GetRecordsFromSegment: failed to initialize shared readers for segkey=%v, err=%v", segKey, err)
   133  		return nil, map[string]bool{}, err
   134  	}
   135  	defer sharedReader.Close()
   136  	multiReader := sharedReader.MultiColReaders[0]
   137  
   138  	allMatchedColumns := make(map[string]bool)
   139  	allMatchedColumns[config.GetTimeStampKey()] = true
   140  
   141  	// get the keys (which is blocknums, and sort them
   142  	sortedBlkNums := make([]uint16, len(blkRecIndexes))
   143  	idx := 0
   144  	for bnum := range blkRecIndexes {
   145  		sortedBlkNums[idx] = bnum
   146  		idx++
   147  	}
   148  	sort.Slice(sortedBlkNums, func(i, j int) bool { return sortedBlkNums[i] < sortedBlkNums[j] })
   149  
   150  	var addedExtraFields bool
   151  	for _, blockIdx := range sortedBlkNums {
   152  		// traverse the sorted blocknums and use it to extract the recordIdxTSMap
   153  		// and then do the search, this way we read the segfiles in sequence
   154  
   155  		recordIdxTSMap := blkRecIndexes[blockIdx]
   156  
   157  		allRecNums := make([]uint16, len(recordIdxTSMap))
   158  		idx := 0
   159  		for recNum := range recordIdxTSMap {
   160  			allRecNums[idx] = recNum
   161  			idx++
   162  		}
   163  		sort.Slice(allRecNums, func(i, j int) bool { return allRecNums[i] < allRecNums[j] })
   164  		resultAllRawRecs := readAllRawRecords(allRecNums, blockIdx, multiReader, allMatchedColumns, esQuery, qid, aggs)
   165  
   166  		for r := range resultAllRawRecs {
   167  			resultAllRawRecs[r][config.GetTimeStampKey()] = recordIdxTSMap[r]
   168  			resultAllRawRecs[r]["_index"] = vTable
   169  
   170  			resId := fmt.Sprintf("%s_%d_%d", segKey, blockIdx, r)
   171  			if esQuery {
   172  				if _, ok := resultAllRawRecs[r]["_id"]; !ok {
   173  					resultAllRawRecs[r]["_id"] = fmt.Sprintf("%d", xxhash.Sum64String(resId))
   174  				}
   175  			}
   176  			result[resId] = resultAllRawRecs[r]
   177  			addedExtraFields = true
   178  		}
   179  	}
   180  	if addedExtraFields {
   181  		allMatchedColumns["_index"] = true
   182  	}
   183  
   184  	return result, allMatchedColumns, nil
   185  }
   186  
   187  func checkRecentlyRotatedKey(segkey string) (string, error) {
   188  	if writer.IsRecentlyRotatedSegKey(segkey) {
   189  		return writer.GetFileNameForRotatedSegment(segkey)
   190  	}
   191  	return segkey, nil
   192  }
   193  
   194  func getMathOpsColMap(MathOps []*structs.MathEvaluator) map[string]int {
   195  	colMap := make(map[string]int)
   196  	for index, mathOp := range MathOps {
   197  		colMap[mathOp.MathCol] = index
   198  	}
   199  	return colMap
   200  }
   201  
   202  func readAllRawRecords(orderedRecNums []uint16, blockIdx uint16, segReader *segread.MultiColSegmentReader,
   203  	allMatchedColumns map[string]bool, esQuery bool, qid uint64, aggs *structs.QueryAggregators) map[uint16]map[string]interface{} {
   204  
   205  	results := make(map[uint16]map[string]interface{})
   206  
   207  	dictEncCols := make(map[string]bool)
   208  	for _, colInfo := range segReader.AllColums {
   209  		col := colInfo.ColumnName
   210  		if !esQuery && (col == "_type" || col == "_id") {
   211  			dictEncCols[col] = true
   212  			continue
   213  		}
   214  		if col == config.GetTimeStampKey() {
   215  			dictEncCols[col] = true
   216  			continue
   217  		}
   218  		ok := segReader.GetDictEncCvalsFromColFile(results, col, blockIdx, orderedRecNums, qid)
   219  		if ok {
   220  			dictEncCols[col] = true
   221  			allMatchedColumns[col] = true
   222  		}
   223  	}
   224  
   225  	var mathColMap map[string]int
   226  	var mathColOpsPresent bool
   227  
   228  	if aggs != nil && aggs.MathOperations != nil && len(aggs.MathOperations) > 0 {
   229  		mathColMap = getMathOpsColMap(aggs.MathOperations)
   230  		mathColOpsPresent = true
   231  	} else {
   232  		mathColOpsPresent = false
   233  		mathColMap = make(map[string]int)
   234  	}
   235  
   236  	for _, recNum := range orderedRecNums {
   237  		_, ok := results[recNum]
   238  		if !ok {
   239  			results[recNum] = make(map[string]interface{})
   240  		}
   241  
   242  		for _, colInfo := range segReader.AllColums {
   243  			col := colInfo.ColumnName
   244  
   245  			_, ok := dictEncCols[col]
   246  			if ok {
   247  				continue
   248  			}
   249  
   250  			cValEnc, err := segReader.ExtractValueFromColumnFile(col, blockIdx, recNum, qid)
   251  			if err != nil {
   252  				// if the column was absent for an entire block and came for other blocks, this will error, hence no error logging here
   253  			} else {
   254  
   255  				if mathColOpsPresent {
   256  					colIndex, exists := mathColMap[col]
   257  					if exists {
   258  						mathOp := aggs.MathOperations[colIndex]
   259  						fieldToValue := make(map[string]utils.CValueEnclosure)
   260  						fieldToValue[mathOp.MathCol] = *cValEnc
   261  						valueFloat, err := mathOp.ValueColRequest.EvaluateToFloat(fieldToValue)
   262  						if err != nil {
   263  							log.Errorf("qid=%d, failed to evaluate math operation for col %s, err=%v", qid, col, err)
   264  						} else {
   265  							cValEnc.CVal = valueFloat
   266  						}
   267  					}
   268  				}
   269  
   270  				results[recNum][col] = cValEnc.CVal
   271  				allMatchedColumns[col] = true
   272  			}
   273  		}
   274  
   275  		if aggs != nil && aggs.OutputTransforms != nil {
   276  			if aggs.OutputTransforms.OutputColumns != nil && aggs.OutputTransforms.OutputColumns.RenameColumns != nil {
   277  				for oldCname, newCname := range aggs.OutputTransforms.OutputColumns.RenameColumns {
   278  					for _, logLine := range results {
   279  						if logLine[oldCname] != nil && oldCname != newCname {
   280  							logLine[newCname] = logLine[oldCname]
   281  							delete(logLine, oldCname)
   282  							allMatchedColumns[newCname] = true
   283  							delete(allMatchedColumns, oldCname)
   284  						}
   285  					}
   286  				}
   287  			}
   288  		}
   289  
   290  	}
   291  	return results
   292  }
   293  
   294  func applyColNameTransform(allCols map[string]bool, aggs *structs.QueryAggregators, qid uint64) map[string]bool {
   295  	retCols := make(map[string]bool)
   296  	if aggs == nil || aggs.OutputTransforms == nil {
   297  		return allCols
   298  	}
   299  
   300  	if aggs.OutputTransforms.OutputColumns == nil {
   301  		return allCols
   302  	}
   303  
   304  	allColNames := make([]string, len(allCols))
   305  	i := 0
   306  	for cName := range allCols {
   307  		allColNames[i] = cName
   308  		i++
   309  	}
   310  
   311  	if aggs.OutputTransforms.OutputColumns.IncludeColumns == nil {
   312  		retCols = allCols
   313  	} else {
   314  		for _, cName := range aggs.OutputTransforms.OutputColumns.IncludeColumns {
   315  			for _, matchingColumn := range toputils.SelectMatchingStringsWithWildcard(cName, allColNames) {
   316  				retCols[matchingColumn] = true
   317  			}
   318  		}
   319  	}
   320  	if len(aggs.OutputTransforms.OutputColumns.ExcludeColumns) != 0 {
   321  		for _, cName := range aggs.OutputTransforms.OutputColumns.ExcludeColumns {
   322  			for _, matchingColumn := range toputils.SelectMatchingStringsWithWildcard(cName, allColNames) {
   323  				delete(retCols, matchingColumn)
   324  			}
   325  		}
   326  	}
   327  	if aggs.OutputTransforms.OutputColumns.RenameColumns != nil {
   328  		log.Info("handle aggs.OutputTransforms.OutputColumns.RenameColumn")
   329  		//todo handle rename
   330  	}
   331  	return retCols
   332  }