github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/reader/record/recordreader.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package record 18 19 import ( 20 "errors" 21 "fmt" 22 "os" 23 "sort" 24 25 "github.com/cespare/xxhash" 26 "github.com/siglens/siglens/pkg/blob" 27 "github.com/siglens/siglens/pkg/common/fileutils" 28 "github.com/siglens/siglens/pkg/config" 29 "github.com/siglens/siglens/pkg/segment/query/metadata" 30 "github.com/siglens/siglens/pkg/segment/reader/segread" 31 "github.com/siglens/siglens/pkg/segment/structs" 32 "github.com/siglens/siglens/pkg/segment/utils" 33 "github.com/siglens/siglens/pkg/segment/writer" 34 toputils "github.com/siglens/siglens/pkg/utils" 35 log "github.com/sirupsen/logrus" 36 ) 37 38 // returns a map of record identifiers to record maps, and all columns seen 39 // record identifiers is segfilename + blockNum + recordNum 40 // If esResponse is false, _id and _type will not be added to any record 41 func GetRecordsFromSegment(segKey string, vTable string, blkRecIndexes map[uint16]map[uint16]uint64, 42 tsKey string, esQuery bool, qid uint64, 43 aggs *structs.QueryAggregators) (map[string]map[string]interface{}, map[string]bool, error) { 44 45 var err error 46 segKey, err = checkRecentlyRotatedKey(segKey) 47 if err != nil { 48 log.Errorf("qid=%d GetRecordsFromSegment failed to get recently rotated information for key %s table %s. err %+v", qid, segKey, vTable, err) 49 } 50 var allCols map[string]bool 51 var exists bool 52 allCols, exists = writer.CheckAndGetColsForUnrotatedSegKey(segKey) 53 if !exists { 54 allCols, exists = metadata.CheckAndGetColsForSegKey(segKey, vTable) 55 if !exists { 56 log.Errorf("GetRecordsFromSegment: failed to get column for key: %s, table %s", segKey, vTable) 57 return nil, allCols, errors.New("failed to get column names for segkey in rotated and unrotated files") 58 } 59 } 60 allCols = applyColNameTransform(allCols, aggs, qid) 61 numOpenFds := int64(len(allCols)) 62 err = fileutils.GLOBAL_FD_LIMITER.TryAcquireWithBackoff(numOpenFds, 10, fmt.Sprintf("GetRecordsFromSegment.qid=%d", qid)) 63 if err != nil { 64 log.Errorf("qid=%d GetRecordsFromSegment failed to acquire lock for opening %+v file descriptors. err %+v", qid, numOpenFds, err) 65 return nil, map[string]bool{}, err 66 } 67 defer fileutils.GLOBAL_FD_LIMITER.Release(numOpenFds) 68 69 bulkDownloadFiles := make(map[string]string) 70 allFiles := make([]string, 0) 71 for col := range allCols { 72 ssFile := fmt.Sprintf("%v_%v.csg", segKey, xxhash.Sum64String(col)) 73 bulkDownloadFiles[ssFile] = col 74 allFiles = append(allFiles, ssFile) 75 } 76 err = blob.BulkDownloadSegmentBlob(bulkDownloadFiles, true) 77 if err != nil { 78 log.Errorf("qid=%d, GetRecordsFromSegment failed to download col file. err=%v", qid, err) 79 return nil, map[string]bool{}, err 80 } 81 82 defer func() { 83 err = blob.SetSegSetFilesAsNotInUse(allFiles) 84 if err != nil { 85 log.Errorf("qid=%d, GetRecordsFromSegment failed to set segset files as not in use. err=%v", qid, err) 86 } 87 }() 88 89 for ssFile := range bulkDownloadFiles { 90 fd, err := os.Open(ssFile) 91 if err != nil { 92 log.Errorf("qid=%d, GetRecordsFromSegment failed to open col file. Tried to open file=%v, err=%v", qid, ssFile, err) 93 return nil, map[string]bool{}, err 94 } 95 defer fd.Close() 96 } 97 98 var blockMetadata map[uint16]*structs.BlockMetadataHolder 99 if writer.IsSegKeyUnrotated(segKey) { 100 blockMetadata, err = writer.GetBlockSearchInfoForKey(segKey) 101 if err != nil { 102 log.Errorf("qid=%d GetRecordsFromSegment failed to get block search info for unrotated key %s table %s", qid, segKey, vTable) 103 return nil, map[string]bool{}, err 104 } 105 } else { 106 blockMetadata, err = metadata.GetBlockSearchInfoForKey(segKey) 107 if err != nil { 108 log.Errorf("GetRecordsFromSegment: failed to get blocksearchinfo for segkey=%v, err=%v", segKey, err) 109 return nil, map[string]bool{}, err 110 } 111 } 112 113 var blockSum []*structs.BlockSummary 114 if writer.IsSegKeyUnrotated(segKey) { 115 blockSum, err = writer.GetBlockSummaryForKey(segKey) 116 if err != nil { 117 log.Errorf("qid=%d GetRecordsFromSegment failed to get block search info for unrotated key %s table %s", qid, segKey, vTable) 118 return nil, map[string]bool{}, err 119 } 120 } else { 121 blockSum, err = metadata.GetBlockSummariesForKey(segKey) 122 if err != nil { 123 log.Errorf("GetRecordsFromSegment: failed to get blocksearchinfo for segkey=%v, err=%v", segKey, err) 124 return nil, map[string]bool{}, err 125 } 126 } 127 128 result := make(map[string]map[string]interface{}) 129 130 sharedReader, err := segread.InitSharedMultiColumnReaders(segKey, allCols, blockMetadata, blockSum, 1, qid) 131 if err != nil { 132 log.Errorf("GetRecordsFromSegment: failed to initialize shared readers for segkey=%v, err=%v", segKey, err) 133 return nil, map[string]bool{}, err 134 } 135 defer sharedReader.Close() 136 multiReader := sharedReader.MultiColReaders[0] 137 138 allMatchedColumns := make(map[string]bool) 139 allMatchedColumns[config.GetTimeStampKey()] = true 140 141 // get the keys (which is blocknums, and sort them 142 sortedBlkNums := make([]uint16, len(blkRecIndexes)) 143 idx := 0 144 for bnum := range blkRecIndexes { 145 sortedBlkNums[idx] = bnum 146 idx++ 147 } 148 sort.Slice(sortedBlkNums, func(i, j int) bool { return sortedBlkNums[i] < sortedBlkNums[j] }) 149 150 var addedExtraFields bool 151 for _, blockIdx := range sortedBlkNums { 152 // traverse the sorted blocknums and use it to extract the recordIdxTSMap 153 // and then do the search, this way we read the segfiles in sequence 154 155 recordIdxTSMap := blkRecIndexes[blockIdx] 156 157 allRecNums := make([]uint16, len(recordIdxTSMap)) 158 idx := 0 159 for recNum := range recordIdxTSMap { 160 allRecNums[idx] = recNum 161 idx++ 162 } 163 sort.Slice(allRecNums, func(i, j int) bool { return allRecNums[i] < allRecNums[j] }) 164 resultAllRawRecs := readAllRawRecords(allRecNums, blockIdx, multiReader, allMatchedColumns, esQuery, qid, aggs) 165 166 for r := range resultAllRawRecs { 167 resultAllRawRecs[r][config.GetTimeStampKey()] = recordIdxTSMap[r] 168 resultAllRawRecs[r]["_index"] = vTable 169 170 resId := fmt.Sprintf("%s_%d_%d", segKey, blockIdx, r) 171 if esQuery { 172 if _, ok := resultAllRawRecs[r]["_id"]; !ok { 173 resultAllRawRecs[r]["_id"] = fmt.Sprintf("%d", xxhash.Sum64String(resId)) 174 } 175 } 176 result[resId] = resultAllRawRecs[r] 177 addedExtraFields = true 178 } 179 } 180 if addedExtraFields { 181 allMatchedColumns["_index"] = true 182 } 183 184 return result, allMatchedColumns, nil 185 } 186 187 func checkRecentlyRotatedKey(segkey string) (string, error) { 188 if writer.IsRecentlyRotatedSegKey(segkey) { 189 return writer.GetFileNameForRotatedSegment(segkey) 190 } 191 return segkey, nil 192 } 193 194 func getMathOpsColMap(MathOps []*structs.MathEvaluator) map[string]int { 195 colMap := make(map[string]int) 196 for index, mathOp := range MathOps { 197 colMap[mathOp.MathCol] = index 198 } 199 return colMap 200 } 201 202 func readAllRawRecords(orderedRecNums []uint16, blockIdx uint16, segReader *segread.MultiColSegmentReader, 203 allMatchedColumns map[string]bool, esQuery bool, qid uint64, aggs *structs.QueryAggregators) map[uint16]map[string]interface{} { 204 205 results := make(map[uint16]map[string]interface{}) 206 207 dictEncCols := make(map[string]bool) 208 for _, colInfo := range segReader.AllColums { 209 col := colInfo.ColumnName 210 if !esQuery && (col == "_type" || col == "_id") { 211 dictEncCols[col] = true 212 continue 213 } 214 if col == config.GetTimeStampKey() { 215 dictEncCols[col] = true 216 continue 217 } 218 ok := segReader.GetDictEncCvalsFromColFile(results, col, blockIdx, orderedRecNums, qid) 219 if ok { 220 dictEncCols[col] = true 221 allMatchedColumns[col] = true 222 } 223 } 224 225 var mathColMap map[string]int 226 var mathColOpsPresent bool 227 228 if aggs != nil && aggs.MathOperations != nil && len(aggs.MathOperations) > 0 { 229 mathColMap = getMathOpsColMap(aggs.MathOperations) 230 mathColOpsPresent = true 231 } else { 232 mathColOpsPresent = false 233 mathColMap = make(map[string]int) 234 } 235 236 for _, recNum := range orderedRecNums { 237 _, ok := results[recNum] 238 if !ok { 239 results[recNum] = make(map[string]interface{}) 240 } 241 242 for _, colInfo := range segReader.AllColums { 243 col := colInfo.ColumnName 244 245 _, ok := dictEncCols[col] 246 if ok { 247 continue 248 } 249 250 cValEnc, err := segReader.ExtractValueFromColumnFile(col, blockIdx, recNum, qid) 251 if err != nil { 252 // if the column was absent for an entire block and came for other blocks, this will error, hence no error logging here 253 } else { 254 255 if mathColOpsPresent { 256 colIndex, exists := mathColMap[col] 257 if exists { 258 mathOp := aggs.MathOperations[colIndex] 259 fieldToValue := make(map[string]utils.CValueEnclosure) 260 fieldToValue[mathOp.MathCol] = *cValEnc 261 valueFloat, err := mathOp.ValueColRequest.EvaluateToFloat(fieldToValue) 262 if err != nil { 263 log.Errorf("qid=%d, failed to evaluate math operation for col %s, err=%v", qid, col, err) 264 } else { 265 cValEnc.CVal = valueFloat 266 } 267 } 268 } 269 270 results[recNum][col] = cValEnc.CVal 271 allMatchedColumns[col] = true 272 } 273 } 274 275 if aggs != nil && aggs.OutputTransforms != nil { 276 if aggs.OutputTransforms.OutputColumns != nil && aggs.OutputTransforms.OutputColumns.RenameColumns != nil { 277 for oldCname, newCname := range aggs.OutputTransforms.OutputColumns.RenameColumns { 278 for _, logLine := range results { 279 if logLine[oldCname] != nil && oldCname != newCname { 280 logLine[newCname] = logLine[oldCname] 281 delete(logLine, oldCname) 282 allMatchedColumns[newCname] = true 283 delete(allMatchedColumns, oldCname) 284 } 285 } 286 } 287 } 288 } 289 290 } 291 return results 292 } 293 294 func applyColNameTransform(allCols map[string]bool, aggs *structs.QueryAggregators, qid uint64) map[string]bool { 295 retCols := make(map[string]bool) 296 if aggs == nil || aggs.OutputTransforms == nil { 297 return allCols 298 } 299 300 if aggs.OutputTransforms.OutputColumns == nil { 301 return allCols 302 } 303 304 allColNames := make([]string, len(allCols)) 305 i := 0 306 for cName := range allCols { 307 allColNames[i] = cName 308 i++ 309 } 310 311 if aggs.OutputTransforms.OutputColumns.IncludeColumns == nil { 312 retCols = allCols 313 } else { 314 for _, cName := range aggs.OutputTransforms.OutputColumns.IncludeColumns { 315 for _, matchingColumn := range toputils.SelectMatchingStringsWithWildcard(cName, allColNames) { 316 retCols[matchingColumn] = true 317 } 318 } 319 } 320 if len(aggs.OutputTransforms.OutputColumns.ExcludeColumns) != 0 { 321 for _, cName := range aggs.OutputTransforms.OutputColumns.ExcludeColumns { 322 for _, matchingColumn := range toputils.SelectMatchingStringsWithWildcard(cName, allColNames) { 323 delete(retCols, matchingColumn) 324 } 325 } 326 } 327 if aggs.OutputTransforms.OutputColumns.RenameColumns != nil { 328 log.Info("handle aggs.OutputTransforms.OutputColumns.RenameColumn") 329 //todo handle rename 330 } 331 return retCols 332 }