github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/query/queryrefresh.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package query
    18  
    19  import (
    20  	"bufio"
    21  	"errors"
    22  	"os"
    23  	"sort"
    24  	"strconv"
    25  	"strings"
    26  	"sync"
    27  	"time"
    28  
    29  	"github.com/siglens/siglens/pkg/blob"
    30  	"github.com/siglens/siglens/pkg/config"
    31  	"github.com/siglens/siglens/pkg/querytracker"
    32  	"github.com/siglens/siglens/pkg/segment/query/metadata"
    33  	"github.com/siglens/siglens/pkg/segment/query/pqs"
    34  	"github.com/siglens/siglens/pkg/segment/structs"
    35  	"github.com/siglens/siglens/pkg/segment/writer"
    36  	mmeta "github.com/siglens/siglens/pkg/segment/writer/metrics/meta"
    37  
    38  	"github.com/siglens/siglens/pkg/usersavedqueries"
    39  	"github.com/siglens/siglens/pkg/virtualtable"
    40  	log "github.com/sirupsen/logrus"
    41  )
    42  
    43  const SECONDS_REREAD_META = 5
    44  const SECONDS_REREAD_META_SSR = 60
    45  const SECONDS_REFRESH_GLOBAL_METADATA = 30
    46  const SEGMETA_FILENAME = "/segmeta.json"
    47  
    48  var metaFileLastModifiedLock sync.RWMutex
    49  var metaFileLastModified = make(map[string]uint64) // maps meta file name to the epoch time of last modification
    50  
    51  func initSegmentMetaRefresh() {
    52  	smFile := writer.GetLocalSegmetaFName()
    53  	err := populateMicroIndices(smFile)
    54  	if err != nil {
    55  		if !errors.Is(err, os.ErrNotExist) {
    56  			log.Errorf("initSegmentMetaRefresh:Error loading initial metadata from file %v: %v", smFile, err)
    57  		}
    58  	}
    59  	go refreshLocalMetadataLoop()
    60  }
    61  
    62  func initMetricsMetaRefresh() {
    63  	mFile := mmeta.GetLocalMetricsMetaFName()
    64  	err := populateMetricsMetadata(mFile)
    65  	if err != nil {
    66  		if !errors.Is(err, os.ErrNotExist) {
    67  			log.Errorf("initMetricsMetaRefresh:Error loading initial metadata from file %v: %v", mFile, err)
    68  		}
    69  	}
    70  	go refreshMetricsMetadataLoop()
    71  }
    72  
    73  func initMetadataRefresh() {
    74  	initSegmentMetaRefresh()
    75  	initMetricsMetaRefresh()
    76  }
    77  
    78  func updateVTable(vfname string, orgid uint64) error {
    79  	vtableFd, err := os.OpenFile(vfname, os.O_RDONLY, 0644)
    80  	if err != nil {
    81  		if errors.Is(err, os.ErrNotExist) {
    82  			return nil
    83  		}
    84  		log.Errorf("updateVTable: Failed to open file=%v, err=%v", vfname, err)
    85  		return err
    86  	}
    87  	defer func() {
    88  		err = vtableFd.Close()
    89  		if err != nil {
    90  			log.Errorf("updateVTable: Failed to close file name=%v, err:%v", vfname, err)
    91  		}
    92  	}()
    93  	scanner := bufio.NewScanner(vtableFd)
    94  
    95  	for scanner.Scan() {
    96  		rawbytes := scanner.Bytes()
    97  		vtableName := string(rawbytes)
    98  		if vtableName != "" {
    99  			// todo: confirm if this is correct
   100  			err = virtualtable.AddVirtualTable(&vtableName, orgid)
   101  			if err != nil {
   102  				log.Errorf("updateVTable: Error in adding virtual table:%v, err:%v", &vtableName, err)
   103  				return err
   104  			}
   105  		}
   106  	}
   107  	return err
   108  }
   109  
   110  func initGlobalMetadataRefresh(getMyIds func() []uint64) {
   111  	if !config.IsQueryNode() || !config.IsS3Enabled() {
   112  		return
   113  	}
   114  
   115  	err := refreshGlobalMetadata(getMyIds)
   116  	if err != nil {
   117  		log.Errorf("initGlobalMetadataRefresh: Error in refreshing global metadata, err:%v", err)
   118  	}
   119  	go refreshGlobalMetadataLoop(getMyIds)
   120  }
   121  
   122  func refreshGlobalMetadata(fnMyids func() []uint64) error {
   123  	err := blob.DownloadAllIngestNodesDir()
   124  	if err != nil {
   125  		log.Errorf("refreshGlobalMetadataLoop: Error in downloading ingest nodes dir, err:%v", err)
   126  		return err
   127  	}
   128  
   129  	ingestNodes := make([]string, 0)
   130  	ingestNodePath := config.GetDataPath() + "ingestnodes"
   131  
   132  	files, err := os.ReadDir(ingestNodePath)
   133  
   134  	if err != nil {
   135  		log.Errorf("refreshGlobalMetadataLoop: Error in reading directory, ingestNodePath:%v , err:%v", ingestNodePath, err)
   136  		return err
   137  	}
   138  	for _, file := range files {
   139  		if file.IsDir() {
   140  			if strings.Contains(file.Name(), config.GetHostID()) {
   141  				continue
   142  			}
   143  			ingestNodes = append(ingestNodes, file.Name())
   144  		}
   145  	}
   146  	myids := fnMyids()
   147  
   148  	// For each non current ingest node, we need to process the
   149  	//  segmeta.json and virtualtablenames.txt
   150  	var wg sync.WaitGroup
   151  	for _, n := range ingestNodes {
   152  		wg.Add(1)
   153  		go func(node string) {
   154  			defer wg.Done()
   155  			vfname := virtualtable.GetFilePathForRemoteNode(node, 0)
   156  			err := updateVTable(vfname, 0)
   157  			if err != nil {
   158  				log.Errorf("refreshGlobalMetadataLoop: Error updating default org vtable, err:%v", err)
   159  			}
   160  			for _, myid := range myids {
   161  				vfname := virtualtable.GetFilePathForRemoteNode(node, myid)
   162  				err := updateVTable(vfname, myid)
   163  				if err != nil {
   164  					log.Errorf("refreshGlobalMetadataLoop: Error in refreshing vtable for myid=%d  err:%v", myid, err)
   165  				}
   166  			}
   167  			// Call populateMicroIndices for all read segmeta.json
   168  			var smFile strings.Builder
   169  			smFile.WriteString(config.GetDataPath() + "ingestnodes/" + node)
   170  			smFile.WriteString(SEGMETA_FILENAME)
   171  			smfname := smFile.String()
   172  			err = populateMicroIndices(smfname)
   173  			if err != nil {
   174  				if !errors.Is(err, os.ErrNotExist) {
   175  					log.Errorf("refreshGlobalMetadataLoop: Error loading initial metadata from file %v: %v", smfname, err)
   176  				}
   177  			}
   178  		}(n)
   179  	}
   180  	wg.Wait()
   181  	return err
   182  }
   183  
   184  func refreshGlobalMetadataLoop(getMyIds func() []uint64) {
   185  	for {
   186  		err := refreshGlobalMetadata(getMyIds)
   187  		if err != nil {
   188  			log.Errorf("refreshGlobalMetadataLoop: Error in refreshing global metadata, err:%v", err)
   189  		}
   190  		time.Sleep(SECONDS_REFRESH_GLOBAL_METADATA * time.Second)
   191  	}
   192  }
   193  
   194  func populateMicroIndices(smFile string) error {
   195  
   196  	var metaModificationTimeMs uint64
   197  
   198  	log.Debugf("populateMicroIndices: reading smFile=%v", smFile)
   199  	fileInfo, err := os.Stat(smFile)
   200  	if err != nil {
   201  		if errors.Is(err, os.ErrNotExist) {
   202  			return nil
   203  		}
   204  		log.Warnf("populateMicroIndices: error when trying to stat meta file=%+v. Error=%+v", smFile, err)
   205  		return err
   206  	}
   207  	metaModificationTimeMs = uint64(fileInfo.ModTime().UTC().Unix() * 1000)
   208  	lastTimeMetafileRefreshed := getLastModifiedTimeForMetaFile(smFile)
   209  
   210  	if lastTimeMetafileRefreshed >= metaModificationTimeMs {
   211  		log.Debugf("populateMicroIndices: not updating meta file %+v. As file was not updated after last refresh", smFile)
   212  		return nil
   213  	}
   214  
   215  	allSegMetas, err := writer.ReadSegmeta(smFile)
   216  	if err != nil {
   217  		log.Errorf("populateMicroIndices: error when trying to read meta file=%+v. Error=%+v", smFile, err)
   218  		return err
   219  	}
   220  
   221  	allSmi := make([]*metadata.SegmentMicroIndex, len(allSegMetas))
   222  	for idx, segMetaInfo := range allSegMetas {
   223  		allSmi[idx] = processSegmetaInfo(segMetaInfo)
   224  	}
   225  
   226  	// segmeta entries inside segmeta.json are added in increasing time order. we just reverse this and we get
   227  	// the latest segmeta entry first
   228  	sort.SliceStable(allSmi, func(i, j int) bool {
   229  		return true
   230  	})
   231  
   232  	metadata.BulkAddSegmentMicroIndex(allSmi)
   233  	updateLastModifiedTimeForMetaFile(smFile, metaModificationTimeMs)
   234  	return nil
   235  }
   236  
   237  func populateMetricsMetadata(mName string) error {
   238  	var metaModificationTimeMs uint64
   239  
   240  	log.Infof("populateMetricsMetadata: reading smFile=%v", mName)
   241  	fileInfo, err := os.Stat(mName)
   242  	if err != nil {
   243  		if errors.Is(err, os.ErrNotExist) {
   244  			return nil
   245  		}
   246  		log.Warnf("populateMetricsMetadata: error when trying to stat meta file=%+v. Error=%+v", mName, err)
   247  		return err
   248  	}
   249  	metaModificationTimeMs = uint64(fileInfo.ModTime().UTC().Unix() * 1000)
   250  	lastTimeMetafileRefreshed := getLastModifiedTimeForMetaFile(mName)
   251  
   252  	if lastTimeMetafileRefreshed >= metaModificationTimeMs {
   253  		log.Debugf("populateMetricsMetadata: not updating meta file %+v. As file was not updated after last refresh", mName)
   254  		return nil
   255  	}
   256  
   257  	allMetricsMetas, err := mmeta.GetLocalMetricsMetaEntries()
   258  	if err != nil {
   259  		log.Errorf("populateMetricsMetadata: unable to get all the metrics meta entries. Error: %v", err)
   260  		return err
   261  	}
   262  
   263  	allMetricsSegmentMeta := make([]*metadata.MetricsSegmentMetadata, 0)
   264  	for _, mMetaInfo := range allMetricsMetas {
   265  		currMSegMetadata := metadata.InitMetricsMicroIndex(mMetaInfo)
   266  		allMetricsSegmentMeta = append(allMetricsSegmentMeta, currMSegMetadata)
   267  	}
   268  
   269  	metadata.BulkAddMetricsSegment(allMetricsSegmentMeta)
   270  	updateLastModifiedTimeForMetaFile(mName, metaModificationTimeMs)
   271  	return nil
   272  }
   273  
   274  func getLastModifiedTimeForMetaFile(metaFilename string) uint64 {
   275  	metaFileLastModifiedLock.RLock()
   276  	defer metaFileLastModifiedLock.RUnlock()
   277  	mModTime, present := metaFileLastModified[metaFilename]
   278  
   279  	if !present {
   280  		return 0
   281  	}
   282  	return mModTime
   283  }
   284  
   285  func refreshMetricsMetadataLoop() {
   286  	for {
   287  		time.Sleep(SECONDS_REREAD_META * time.Second)
   288  		mmFile := mmeta.GetLocalMetricsMetaFName()
   289  		fileInfo, err := os.Stat(mmFile)
   290  		if errors.Is(err, os.ErrNotExist) {
   291  			continue
   292  		} else if err != nil {
   293  			log.Errorf("refreshMetricsMetadataLoop: Cannot stat meta file while re-reading, err= %v", err)
   294  			continue
   295  		}
   296  		modifiedTime := fileInfo.ModTime()
   297  		modifiedTimeMillisec := uint64(modifiedTime.UTC().Unix() * 1000)
   298  		lastModified := getLastModifiedTimeForMetaFile(mmFile)
   299  		if modifiedTimeMillisec > lastModified {
   300  			log.Debugf("refreshMetricsMetadataLoop: Meta file has been modified %+v %+v. filePath = %+v", modifiedTimeMillisec, lastModified, mmFile)
   301  			err := populateMetricsMetadata(mmFile)
   302  			if err != nil {
   303  				log.Errorf("refreshMetricsMetadataLoop: failed to populate micro indices from %+v: %+v", mmFile, err)
   304  			}
   305  			updateLastModifiedTimeForMetaFile(mmFile, modifiedTimeMillisec)
   306  		}
   307  	}
   308  }
   309  
   310  func refreshLocalMetadataLoop() {
   311  	err := blob.DownloadAllIngestNodesDir()
   312  	if err != nil {
   313  		log.Errorf("refreshGlobalMetadataLoop: Error in downloading ingest nodes dir, err:%v", err)
   314  		return
   315  	}
   316  
   317  	for {
   318  		time.Sleep(SECONDS_REREAD_META * time.Second)
   319  		smFile := writer.GetLocalSegmetaFName()
   320  		fileInfo, err := os.Stat(smFile)
   321  		if errors.Is(err, os.ErrNotExist) {
   322  			continue
   323  		} else if err != nil {
   324  			log.Errorf("refreshLocalMetadataLoop: Cannot stat meta file while re-reading, err= %v", err)
   325  			continue
   326  		}
   327  		modifiedTime := fileInfo.ModTime()
   328  		modifiedTimeMillisec := uint64(modifiedTime.UTC().Unix() * 1000)
   329  		lastModified := getLastModifiedTimeForMetaFile(smFile)
   330  		if modifiedTimeMillisec > lastModified {
   331  			log.Debugf("refreshLocalMetadataLoop: Meta file has been modified %+v %+v. filePath = %+v", modifiedTimeMillisec, lastModified, smFile)
   332  			err := populateMicroIndices(smFile)
   333  			if err != nil {
   334  				log.Errorf("refreshLocalMetadataLoop: failed to populate micro indices from %+v: %+v", smFile, err)
   335  			}
   336  			updateLastModifiedTimeForMetaFile(smFile, modifiedTimeMillisec)
   337  		}
   338  	}
   339  }
   340  
   341  func updateLastModifiedTimeForMetaFile(metaFilename string, newTime uint64) {
   342  	metaFileLastModifiedLock.Lock()
   343  	defer metaFileLastModifiedLock.Unlock()
   344  	metaFileLastModified[metaFilename] = newTime
   345  }
   346  
   347  func processSegmetaInfo(segMetaInfo *structs.SegMeta) *metadata.SegmentMicroIndex {
   348  	for pqid := range segMetaInfo.AllPQIDs {
   349  		pqs.AddPersistentQueryResult(segMetaInfo.SegmentKey, segMetaInfo.VirtualTableName, pqid)
   350  	}
   351  
   352  	return metadata.InitSegmentMicroIndex(segMetaInfo)
   353  }
   354  
   355  func getExternalPqinfoFiles() ([]string, error) {
   356  	fNames := make([]string, 0)
   357  	queryNodes := make([]string, 0)
   358  	querytNodePath := config.GetDataPath() + "querynodes"
   359  
   360  	files, err := os.ReadDir(querytNodePath)
   361  	if err != nil {
   362  		log.Errorf("getExternalPqinfoFiles: Error in downloading query nodes dir,err:%v", err)
   363  		return nil, err
   364  	}
   365  	for _, file := range files {
   366  		if file.IsDir() {
   367  			if strings.Contains(file.Name(), config.GetHostID()) {
   368  				continue
   369  			}
   370  			queryNodes = append(queryNodes, file.Name())
   371  		}
   372  	}
   373  
   374  	for _, node := range queryNodes {
   375  		var pqInfoFile strings.Builder
   376  		pqInfoFile.WriteString(config.GetDataPath() + "querynodes/" + node + "/pqueries")
   377  		baseDir := pqInfoFile.String()
   378  
   379  		pqInfoFilename := baseDir + "/pqinfo.bin"
   380  		fNames = append(fNames, pqInfoFilename)
   381  	}
   382  	return fNames, nil
   383  }
   384  
   385  func getExternalUSQueriesInfo(orgid uint64) ([]string, error) {
   386  	fNames := make([]string, 0)
   387  	queryNodes := make([]string, 0)
   388  	querytNodePath := config.GetDataPath() + "querynodes"
   389  
   390  	files, err := os.ReadDir(querytNodePath)
   391  	if err != nil {
   392  		log.Errorf("getExternalUSQueriesInfo: Error in downloading query nodes dir,err:%v", err)
   393  		return nil, err
   394  	}
   395  	for _, file := range files {
   396  		if file.IsDir() {
   397  			if strings.Contains(file.Name(), config.GetHostID()) {
   398  				continue
   399  			}
   400  			queryNodes = append(queryNodes, file.Name())
   401  		}
   402  	}
   403  
   404  	var usqFileExtensionName string
   405  	if orgid == 0 {
   406  		usqFileExtensionName = "/usqinfo.bin"
   407  	} else {
   408  		usqFileExtensionName = "/usqinfo-" + strconv.FormatUint(orgid, 10) + ".bin"
   409  	}
   410  
   411  	for _, node := range queryNodes {
   412  		var usqInfoFile strings.Builder
   413  		usqInfoFile.WriteString(config.GetDataPath() + "querynodes/" + node + "/usersavedqueries")
   414  		baseDir := usqInfoFile.String()
   415  
   416  		usqInfoFilename := baseDir + usqFileExtensionName
   417  		fNames = append(fNames, usqInfoFilename)
   418  	}
   419  	return fNames, nil
   420  }
   421  
   422  func internalQueryInfoRefresh(getMyIds func() []uint64) {
   423  	err := blob.DownloadAllQueryNodesDir()
   424  	if err != nil {
   425  		log.Errorf("internalQueryInfoRefresh: Error in downloading query nodes dir, err:%v", err)
   426  		return
   427  	}
   428  	pqInfoFiles, err := getExternalPqinfoFiles()
   429  	if err != nil {
   430  		log.Errorf("internalQueryInfoRefresh: Error in getting external pqinfo files, err:%v", err)
   431  		return
   432  	}
   433  	if len(pqInfoFiles) > 0 {
   434  		err = querytracker.RefreshExternalPQInfo(pqInfoFiles)
   435  		if err != nil {
   436  			log.Errorf("internalQueryInfoRefresh: Error in refreshing external pqinfo files, err:%v", err)
   437  			return
   438  		}
   439  	}
   440  
   441  	allMyids := getMyIds()
   442  
   443  	for _, myid := range allMyids {
   444  		usqInfoFiles, err := getExternalUSQueriesInfo(myid)
   445  		if err != nil {
   446  			log.Errorf("internalQueryInfoRefresh: Error in getting external usqinfo Files, err:%v", err)
   447  			return
   448  		}
   449  		for _, file := range usqInfoFiles {
   450  			err := usersavedqueries.ReadExternalUSQInfo(file, myid)
   451  			if err != nil {
   452  				log.Errorf("internalQueryInfoRefresh: Error in reading external usqinfo file:%v, err:%v", file, err)
   453  				continue
   454  			}
   455  		}
   456  	}
   457  
   458  	aggsInfoFiles, err := GetExternalAggsInfoFiles()
   459  	if err != nil {
   460  		log.Errorf("internalQueryInfoRefresh: Error in getting external aggs files, err:%v", err)
   461  		return
   462  	}
   463  	if len(aggsInfoFiles) > 0 {
   464  		err = querytracker.RefreshExternalAggsInfo(aggsInfoFiles)
   465  		if err != nil {
   466  			log.Errorf("internalQueryInfoRefresh: Error in refreshing external aggs files, err:%v", err)
   467  			return
   468  		}
   469  	}
   470  }
   471  
   472  func runQueryInfoRefreshLoop(getMyIds func() []uint64) {
   473  	for {
   474  
   475  		startTime := time.Now()
   476  		internalQueryInfoRefresh(getMyIds)
   477  		sleep := time.Duration(QUERY_INFO_REFRESH_LOOP_SECS - time.Since(startTime))
   478  		if sleep < 0 {
   479  			time.Sleep(60 * time.Second)
   480  		} else {
   481  			time.Sleep(sleep * time.Second)
   482  		}
   483  
   484  	}
   485  }
   486  
   487  func GetExternalAggsInfoFiles() ([]string, error) {
   488  	fNames := make([]string, 0)
   489  	queryNodes := make([]string, 0)
   490  	querytNodePath := config.GetDataPath() + "querynodes"
   491  
   492  	files, err := os.ReadDir(querytNodePath)
   493  	if err != nil {
   494  		log.Errorf("GetExternalAggsinfoFiles: Error in downloading query nodes dir,err:%v", err)
   495  		return nil, err
   496  	}
   497  	for _, file := range files {
   498  		if file.IsDir() {
   499  			if strings.Contains(file.Name(), config.GetHostID()) {
   500  				continue
   501  			}
   502  			queryNodes = append(queryNodes, file.Name())
   503  		}
   504  	}
   505  
   506  	for _, node := range queryNodes {
   507  		var aggsInfoFile strings.Builder
   508  		aggsInfoFile.WriteString(config.GetDataPath() + "querynodes/" + node + "/pqueries")
   509  		baseDir := aggsInfoFile.String()
   510  
   511  		aggsInfoFilename := baseDir + "/aggsinfo.bin"
   512  		fNames = append(fNames, aggsInfoFilename)
   513  	}
   514  	return fNames, nil
   515  }