code.gitea.io/gitea@v1.19.3/modules/indexer/code/indexer.go (about)

     1  // Copyright 2016 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package code
     5  
     6  import (
     7  	"context"
     8  	"os"
     9  	"runtime/pprof"
    10  	"strconv"
    11  	"strings"
    12  	"time"
    13  
    14  	"code.gitea.io/gitea/models/db"
    15  	repo_model "code.gitea.io/gitea/models/repo"
    16  	"code.gitea.io/gitea/modules/graceful"
    17  	"code.gitea.io/gitea/modules/log"
    18  	"code.gitea.io/gitea/modules/process"
    19  	"code.gitea.io/gitea/modules/queue"
    20  	"code.gitea.io/gitea/modules/setting"
    21  	"code.gitea.io/gitea/modules/timeutil"
    22  )
    23  
    24  // SearchResult result of performing a search in a repo
    25  type SearchResult struct {
    26  	RepoID      int64
    27  	StartIndex  int
    28  	EndIndex    int
    29  	Filename    string
    30  	Content     string
    31  	CommitID    string
    32  	UpdatedUnix timeutil.TimeStamp
    33  	Language    string
    34  	Color       string
    35  }
    36  
    37  // SearchResultLanguages result of top languages count in search results
    38  type SearchResultLanguages struct {
    39  	Language string
    40  	Color    string
    41  	Count    int
    42  }
    43  
    44  // Indexer defines an interface to index and search code contents
    45  type Indexer interface {
    46  	Ping() bool
    47  	SetAvailabilityChangeCallback(callback func(bool))
    48  	Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *repoChanges) error
    49  	Delete(repoID int64) error
    50  	Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error)
    51  	Close()
    52  }
    53  
    54  func filenameIndexerID(repoID int64, filename string) string {
    55  	return indexerID(repoID) + "_" + filename
    56  }
    57  
    58  func indexerID(id int64) string {
    59  	return strconv.FormatInt(id, 36)
    60  }
    61  
    62  func parseIndexerID(indexerID string) (int64, string) {
    63  	index := strings.IndexByte(indexerID, '_')
    64  	if index == -1 {
    65  		log.Error("Unexpected ID in repo indexer: %s", indexerID)
    66  	}
    67  	repoID, _ := strconv.ParseInt(indexerID[:index], 36, 64)
    68  	return repoID, indexerID[index+1:]
    69  }
    70  
    71  func filenameOfIndexerID(indexerID string) string {
    72  	index := strings.IndexByte(indexerID, '_')
    73  	if index == -1 {
    74  		log.Error("Unexpected ID in repo indexer: %s", indexerID)
    75  	}
    76  	return indexerID[index+1:]
    77  }
    78  
    79  // IndexerData represents data stored in the code indexer
    80  type IndexerData struct {
    81  	RepoID int64
    82  }
    83  
    84  var indexerQueue queue.UniqueQueue
    85  
    86  func index(ctx context.Context, indexer Indexer, repoID int64) error {
    87  	repo, err := repo_model.GetRepositoryByID(ctx, repoID)
    88  	if repo_model.IsErrRepoNotExist(err) {
    89  		return indexer.Delete(repoID)
    90  	}
    91  	if err != nil {
    92  		return err
    93  	}
    94  
    95  	sha, err := getDefaultBranchSha(ctx, repo)
    96  	if err != nil {
    97  		return err
    98  	}
    99  	changes, err := getRepoChanges(ctx, repo, sha)
   100  	if err != nil {
   101  		return err
   102  	} else if changes == nil {
   103  		return nil
   104  	}
   105  
   106  	if err := indexer.Index(ctx, repo, sha, changes); err != nil {
   107  		return err
   108  	}
   109  
   110  	return repo_model.UpdateIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode, sha)
   111  }
   112  
   113  // Init initialize the repo indexer
   114  func Init() {
   115  	if !setting.Indexer.RepoIndexerEnabled {
   116  		indexer.Close()
   117  		return
   118  	}
   119  
   120  	ctx, cancel, finished := process.GetManager().AddTypedContext(context.Background(), "Service: CodeIndexer", process.SystemProcessType, false)
   121  
   122  	graceful.GetManager().RunAtTerminate(func() {
   123  		select {
   124  		case <-ctx.Done():
   125  			return
   126  		default:
   127  		}
   128  		cancel()
   129  		log.Debug("Closing repository indexer")
   130  		indexer.Close()
   131  		log.Info("PID: %d Repository Indexer closed", os.Getpid())
   132  		finished()
   133  	})
   134  
   135  	waitChannel := make(chan time.Duration, 1)
   136  
   137  	// Create the Queue
   138  	switch setting.Indexer.RepoType {
   139  	case "bleve", "elasticsearch":
   140  		handler := func(data ...queue.Data) []queue.Data {
   141  			idx, err := indexer.get()
   142  			if idx == nil || err != nil {
   143  				log.Error("Codes indexer handler: unable to get indexer!")
   144  				return data
   145  			}
   146  
   147  			unhandled := make([]queue.Data, 0, len(data))
   148  			for _, datum := range data {
   149  				indexerData, ok := datum.(*IndexerData)
   150  				if !ok {
   151  					log.Error("Unable to process provided datum: %v - not possible to cast to IndexerData", datum)
   152  					continue
   153  				}
   154  				log.Trace("IndexerData Process Repo: %d", indexerData.RepoID)
   155  
   156  				if err := index(ctx, indexer, indexerData.RepoID); err != nil {
   157  					log.Error("index: %v", err)
   158  					if indexer.Ping() {
   159  						continue
   160  					}
   161  					// Add back to queue
   162  					unhandled = append(unhandled, datum)
   163  				}
   164  			}
   165  			return unhandled
   166  		}
   167  
   168  		indexerQueue = queue.CreateUniqueQueue("code_indexer", handler, &IndexerData{})
   169  		if indexerQueue == nil {
   170  			log.Fatal("Unable to create codes indexer queue")
   171  		}
   172  	default:
   173  		log.Fatal("Unknown codes indexer type; %s", setting.Indexer.RepoType)
   174  	}
   175  
   176  	go func() {
   177  		pprof.SetGoroutineLabels(ctx)
   178  		start := time.Now()
   179  		var (
   180  			rIndexer Indexer
   181  			populate bool
   182  			err      error
   183  		)
   184  		switch setting.Indexer.RepoType {
   185  		case "bleve":
   186  			log.Info("PID: %d Initializing Repository Indexer at: %s", os.Getpid(), setting.Indexer.RepoPath)
   187  			defer func() {
   188  				if err := recover(); err != nil {
   189  					log.Error("PANIC whilst initializing repository indexer: %v\nStacktrace: %s", err, log.Stack(2))
   190  					log.Error("The indexer files are likely corrupted and may need to be deleted")
   191  					log.Error("You can completely remove the \"%s\" directory to make Gitea recreate the indexes", setting.Indexer.RepoPath)
   192  				}
   193  			}()
   194  
   195  			rIndexer, populate, err = NewBleveIndexer(setting.Indexer.RepoPath)
   196  			if err != nil {
   197  				cancel()
   198  				indexer.Close()
   199  				close(waitChannel)
   200  				log.Fatal("PID: %d Unable to initialize the bleve Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err)
   201  			}
   202  		case "elasticsearch":
   203  			log.Info("PID: %d Initializing Repository Indexer at: %s", os.Getpid(), setting.Indexer.RepoConnStr)
   204  			defer func() {
   205  				if err := recover(); err != nil {
   206  					log.Error("PANIC whilst initializing repository indexer: %v\nStacktrace: %s", err, log.Stack(2))
   207  					log.Error("The indexer files are likely corrupted and may need to be deleted")
   208  					log.Error("You can completely remove the \"%s\" index to make Gitea recreate the indexes", setting.Indexer.RepoConnStr)
   209  				}
   210  			}()
   211  
   212  			rIndexer, populate, err = NewElasticSearchIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName)
   213  			if err != nil {
   214  				cancel()
   215  				indexer.Close()
   216  				close(waitChannel)
   217  				log.Fatal("PID: %d Unable to initialize the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err)
   218  			}
   219  		default:
   220  			log.Fatal("PID: %d Unknown Indexer type: %s", os.Getpid(), setting.Indexer.RepoType)
   221  		}
   222  
   223  		indexer.set(rIndexer)
   224  
   225  		if queue, ok := indexerQueue.(queue.Pausable); ok {
   226  			rIndexer.SetAvailabilityChangeCallback(func(available bool) {
   227  				if !available {
   228  					log.Info("Code index queue paused")
   229  					queue.Pause()
   230  				} else {
   231  					log.Info("Code index queue resumed")
   232  					queue.Resume()
   233  				}
   234  			})
   235  		}
   236  
   237  		// Start processing the queue
   238  		go graceful.GetManager().RunWithShutdownFns(indexerQueue.Run)
   239  
   240  		if populate {
   241  			go graceful.GetManager().RunWithShutdownContext(populateRepoIndexer)
   242  		}
   243  		select {
   244  		case waitChannel <- time.Since(start):
   245  		case <-graceful.GetManager().IsShutdown():
   246  		}
   247  
   248  		close(waitChannel)
   249  	}()
   250  
   251  	if setting.Indexer.StartupTimeout > 0 {
   252  		go func() {
   253  			pprof.SetGoroutineLabels(ctx)
   254  			timeout := setting.Indexer.StartupTimeout
   255  			if graceful.GetManager().IsChild() && setting.GracefulHammerTime > 0 {
   256  				timeout += setting.GracefulHammerTime
   257  			}
   258  			select {
   259  			case <-graceful.GetManager().IsShutdown():
   260  				log.Warn("Shutdown before Repository Indexer completed initialization")
   261  				cancel()
   262  				indexer.Close()
   263  			case duration, ok := <-waitChannel:
   264  				if !ok {
   265  					log.Warn("Repository Indexer Initialization failed")
   266  					cancel()
   267  					indexer.Close()
   268  					return
   269  				}
   270  				log.Info("Repository Indexer Initialization took %v", duration)
   271  			case <-time.After(timeout):
   272  				cancel()
   273  				indexer.Close()
   274  				log.Fatal("Repository Indexer Initialization Timed-Out after: %v", timeout)
   275  			}
   276  		}()
   277  	}
   278  }
   279  
   280  // UpdateRepoIndexer update a repository's entries in the indexer
   281  func UpdateRepoIndexer(repo *repo_model.Repository) {
   282  	indexData := &IndexerData{RepoID: repo.ID}
   283  	if err := indexerQueue.Push(indexData); err != nil {
   284  		log.Error("Update repo index data %v failed: %v", indexData, err)
   285  	}
   286  }
   287  
   288  // IsAvailable checks if issue indexer is available
   289  func IsAvailable() bool {
   290  	idx, err := indexer.get()
   291  	if err != nil {
   292  		log.Error("IsAvailable(): unable to get indexer: %v", err)
   293  		return false
   294  	}
   295  
   296  	return idx.Ping()
   297  }
   298  
   299  // populateRepoIndexer populate the repo indexer with pre-existing data. This
   300  // should only be run when the indexer is created for the first time.
   301  func populateRepoIndexer(ctx context.Context) {
   302  	log.Info("Populating the repo indexer with existing repositories")
   303  
   304  	exist, err := db.IsTableNotEmpty("repository")
   305  	if err != nil {
   306  		log.Fatal("System error: %v", err)
   307  	} else if !exist {
   308  		return
   309  	}
   310  
   311  	// if there is any existing repo indexer metadata in the DB, delete it
   312  	// since we are starting afresh. Also, xorm requires deletes to have a
   313  	// condition, and we want to delete everything, thus 1=1.
   314  	if err := db.DeleteAllRecords("repo_indexer_status"); err != nil {
   315  		log.Fatal("System error: %v", err)
   316  	}
   317  
   318  	var maxRepoID int64
   319  	if maxRepoID, err = db.GetMaxID("repository"); err != nil {
   320  		log.Fatal("System error: %v", err)
   321  	}
   322  
   323  	// start with the maximum existing repo ID and work backwards, so that we
   324  	// don't include repos that are created after gitea starts; such repos will
   325  	// already be added to the indexer, and we don't need to add them again.
   326  	for maxRepoID > 0 {
   327  		select {
   328  		case <-ctx.Done():
   329  			log.Info("Repository Indexer population shutdown before completion")
   330  			return
   331  		default:
   332  		}
   333  		ids, err := repo_model.GetUnindexedRepos(repo_model.RepoIndexerTypeCode, maxRepoID, 0, 50)
   334  		if err != nil {
   335  			log.Error("populateRepoIndexer: %v", err)
   336  			return
   337  		} else if len(ids) == 0 {
   338  			break
   339  		}
   340  		for _, id := range ids {
   341  			select {
   342  			case <-ctx.Done():
   343  				log.Info("Repository Indexer population shutdown before completion")
   344  				return
   345  			default:
   346  			}
   347  			if err := indexerQueue.Push(&IndexerData{RepoID: id}); err != nil {
   348  				log.Error("indexerQueue.Push: %v", err)
   349  				return
   350  			}
   351  			maxRepoID = id - 1
   352  		}
   353  	}
   354  	log.Info("Done (re)populating the repo indexer with existing repositories")
   355  }