code.gitea.io/gitea@v1.22.3/modules/indexer/issues/indexer.go (about)

     1  // Copyright 2018 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package issues
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"os"
    10  	"runtime/pprof"
    11  	"sync/atomic"
    12  	"time"
    13  
    14  	db_model "code.gitea.io/gitea/models/db"
    15  	repo_model "code.gitea.io/gitea/models/repo"
    16  	"code.gitea.io/gitea/modules/graceful"
    17  	"code.gitea.io/gitea/modules/indexer/issues/bleve"
    18  	"code.gitea.io/gitea/modules/indexer/issues/db"
    19  	"code.gitea.io/gitea/modules/indexer/issues/elasticsearch"
    20  	"code.gitea.io/gitea/modules/indexer/issues/internal"
    21  	"code.gitea.io/gitea/modules/indexer/issues/meilisearch"
    22  	"code.gitea.io/gitea/modules/log"
    23  	"code.gitea.io/gitea/modules/optional"
    24  	"code.gitea.io/gitea/modules/process"
    25  	"code.gitea.io/gitea/modules/queue"
    26  	"code.gitea.io/gitea/modules/setting"
    27  )
    28  
    29  // IndexerMetadata is used to send data to the queue, so it contains only the ids.
    30  // It may look weired, because it has to be compatible with the old queue data format.
    31  // If the IsDelete flag is true, the IDs specify the issues to delete from the index without querying the database.
    32  // If the IsDelete flag is false, the ID specify the issue to index, so Indexer will query the database to get the issue data.
    33  // It should be noted that if the id is not existing in the database, it's index will be deleted too even if IsDelete is false.
    34  // Valid values:
    35  //   - IsDelete = true, IDs = [1, 2, 3], and ID will be ignored
    36  //   - IsDelete = false, ID = 1, and IDs will be ignored
    37  type IndexerMetadata struct {
    38  	ID int64 `json:"id"`
    39  
    40  	IsDelete bool    `json:"is_delete"`
    41  	IDs      []int64 `json:"ids"`
    42  }
    43  
    44  var (
    45  	// issueIndexerQueue queue of issue ids to be updated
    46  	issueIndexerQueue *queue.WorkerPoolQueue[*IndexerMetadata]
    47  	// globalIndexer is the global indexer, it cannot be nil.
    48  	// When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready.
    49  	// So it's always safe use it as *globalIndexer.Load() and call its methods.
    50  	globalIndexer atomic.Pointer[internal.Indexer]
    51  	dummyIndexer  *internal.Indexer
    52  )
    53  
    54  func init() {
    55  	i := internal.NewDummyIndexer()
    56  	dummyIndexer = &i
    57  	globalIndexer.Store(dummyIndexer)
    58  }
    59  
    60  // InitIssueIndexer initialize issue indexer, syncReindex is true then reindex until
    61  // all issue index done.
    62  func InitIssueIndexer(syncReindex bool) {
    63  	ctx, _, finished := process.GetManager().AddTypedContext(context.Background(), "Service: IssueIndexer", process.SystemProcessType, false)
    64  
    65  	indexerInitWaitChannel := make(chan time.Duration, 1)
    66  
    67  	// Create the Queue
    68  	issueIndexerQueue = queue.CreateUniqueQueue(ctx, "issue_indexer", getIssueIndexerQueueHandler(ctx))
    69  
    70  	graceful.GetManager().RunAtTerminate(finished)
    71  
    72  	// Create the Indexer
    73  	go func() {
    74  		pprof.SetGoroutineLabels(ctx)
    75  		start := time.Now()
    76  		log.Info("PID %d: Initializing Issue Indexer: %s", os.Getpid(), setting.Indexer.IssueType)
    77  		var (
    78  			issueIndexer internal.Indexer
    79  			existed      bool
    80  			err          error
    81  		)
    82  		switch setting.Indexer.IssueType {
    83  		case "bleve":
    84  			defer func() {
    85  				if err := recover(); err != nil {
    86  					log.Error("PANIC whilst initializing issue indexer: %v\nStacktrace: %s", err, log.Stack(2))
    87  					log.Error("The indexer files are likely corrupted and may need to be deleted")
    88  					log.Error("You can completely remove the %q directory to make Gitea recreate the indexes", setting.Indexer.IssuePath)
    89  					globalIndexer.Store(dummyIndexer)
    90  					log.Fatal("PID: %d Unable to initialize the Bleve Issue Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.IssuePath, err)
    91  				}
    92  			}()
    93  			issueIndexer = bleve.NewIndexer(setting.Indexer.IssuePath)
    94  			existed, err = issueIndexer.Init(ctx)
    95  			if err != nil {
    96  				log.Fatal("Unable to initialize Bleve Issue Indexer at path: %s Error: %v", setting.Indexer.IssuePath, err)
    97  			}
    98  		case "elasticsearch":
    99  			issueIndexer = elasticsearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName)
   100  			existed, err = issueIndexer.Init(ctx)
   101  			if err != nil {
   102  				log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err)
   103  			}
   104  		case "db":
   105  			issueIndexer = db.NewIndexer()
   106  		case "meilisearch":
   107  			issueIndexer = meilisearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName)
   108  			existed, err = issueIndexer.Init(ctx)
   109  			if err != nil {
   110  				log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err)
   111  			}
   112  		default:
   113  			log.Fatal("Unknown issue indexer type: %s", setting.Indexer.IssueType)
   114  		}
   115  		globalIndexer.Store(&issueIndexer)
   116  
   117  		graceful.GetManager().RunAtTerminate(func() {
   118  			log.Debug("Closing issue indexer")
   119  			(*globalIndexer.Load()).Close()
   120  			log.Info("PID: %d Issue Indexer closed", os.Getpid())
   121  		})
   122  
   123  		// Start processing the queue
   124  		go graceful.GetManager().RunWithCancel(issueIndexerQueue)
   125  
   126  		// Populate the index
   127  		if !existed {
   128  			if syncReindex {
   129  				graceful.GetManager().RunWithShutdownContext(populateIssueIndexer)
   130  			} else {
   131  				go graceful.GetManager().RunWithShutdownContext(populateIssueIndexer)
   132  			}
   133  		}
   134  
   135  		indexerInitWaitChannel <- time.Since(start)
   136  		close(indexerInitWaitChannel)
   137  	}()
   138  
   139  	if syncReindex {
   140  		select {
   141  		case <-indexerInitWaitChannel:
   142  		case <-graceful.GetManager().IsShutdown():
   143  		}
   144  	} else if setting.Indexer.StartupTimeout > 0 {
   145  		go func() {
   146  			pprof.SetGoroutineLabels(ctx)
   147  			timeout := setting.Indexer.StartupTimeout
   148  			if graceful.GetManager().IsChild() && setting.GracefulHammerTime > 0 {
   149  				timeout += setting.GracefulHammerTime
   150  			}
   151  			select {
   152  			case duration := <-indexerInitWaitChannel:
   153  				log.Info("Issue Indexer Initialization took %v", duration)
   154  			case <-graceful.GetManager().IsShutdown():
   155  				log.Warn("Shutdown occurred before issue index initialisation was complete")
   156  			case <-time.After(timeout):
   157  				issueIndexerQueue.ShutdownWait(5 * time.Second)
   158  				log.Fatal("Issue Indexer Initialization timed-out after: %v", timeout)
   159  			}
   160  		}()
   161  	}
   162  }
   163  
   164  func getIssueIndexerQueueHandler(ctx context.Context) func(items ...*IndexerMetadata) []*IndexerMetadata {
   165  	return func(items ...*IndexerMetadata) []*IndexerMetadata {
   166  		var unhandled []*IndexerMetadata
   167  
   168  		indexer := *globalIndexer.Load()
   169  		for _, item := range items {
   170  			log.Trace("IndexerMetadata Process: %d %v %t", item.ID, item.IDs, item.IsDelete)
   171  			if item.IsDelete {
   172  				if err := indexer.Delete(ctx, item.IDs...); err != nil {
   173  					log.Error("Issue indexer handler: failed to from index: %v Error: %v", item.IDs, err)
   174  					unhandled = append(unhandled, item)
   175  				}
   176  				continue
   177  			}
   178  			data, existed, err := getIssueIndexerData(ctx, item.ID)
   179  			if err != nil {
   180  				log.Error("Issue indexer handler: failed to get issue data of %d: %v", item.ID, err)
   181  				unhandled = append(unhandled, item)
   182  				continue
   183  			}
   184  			if !existed {
   185  				if err := indexer.Delete(ctx, item.ID); err != nil {
   186  					log.Error("Issue indexer handler: failed to delete issue %d from index: %v", item.ID, err)
   187  					unhandled = append(unhandled, item)
   188  				}
   189  				continue
   190  			}
   191  			if err := indexer.Index(ctx, data); err != nil {
   192  				log.Error("Issue indexer handler: failed to index issue %d: %v", item.ID, err)
   193  				unhandled = append(unhandled, item)
   194  				continue
   195  			}
   196  		}
   197  
   198  		return unhandled
   199  	}
   200  }
   201  
   202  // populateIssueIndexer populate the issue indexer with issue data
   203  func populateIssueIndexer(ctx context.Context) {
   204  	ctx, _, finished := process.GetManager().AddTypedContext(ctx, "Service: PopulateIssueIndexer", process.SystemProcessType, true)
   205  	defer finished()
   206  	ctx = contextWithKeepRetry(ctx) // keep retrying since it's a background task
   207  	if err := PopulateIssueIndexer(ctx); err != nil {
   208  		log.Error("Issue indexer population failed: %v", err)
   209  	}
   210  }
   211  
   212  func PopulateIssueIndexer(ctx context.Context) error {
   213  	for page := 1; ; page++ {
   214  		select {
   215  		case <-ctx.Done():
   216  			return fmt.Errorf("shutdown before completion: %w", ctx.Err())
   217  		default:
   218  		}
   219  		repos, _, err := repo_model.SearchRepositoryByName(ctx, &repo_model.SearchRepoOptions{
   220  			ListOptions: db_model.ListOptions{Page: page, PageSize: repo_model.RepositoryListDefaultPageSize},
   221  			OrderBy:     db_model.SearchOrderByID,
   222  			Private:     true,
   223  			Collaborate: optional.Some(false),
   224  		})
   225  		if err != nil {
   226  			log.Error("SearchRepositoryByName: %v", err)
   227  			continue
   228  		}
   229  		if len(repos) == 0 {
   230  			log.Debug("Issue Indexer population complete")
   231  			return nil
   232  		}
   233  
   234  		for _, repo := range repos {
   235  			if err := updateRepoIndexer(ctx, repo.ID); err != nil {
   236  				return fmt.Errorf("populate issue indexer for repo %d: %v", repo.ID, err)
   237  			}
   238  		}
   239  	}
   240  }
   241  
   242  // UpdateRepoIndexer add/update all issues of the repositories
   243  func UpdateRepoIndexer(ctx context.Context, repoID int64) {
   244  	if err := updateRepoIndexer(ctx, repoID); err != nil {
   245  		log.Error("Unable to push repo %d to issue indexer: %v", repoID, err)
   246  	}
   247  }
   248  
   249  // UpdateIssueIndexer add/update an issue to the issue indexer
   250  func UpdateIssueIndexer(ctx context.Context, issueID int64) {
   251  	if err := updateIssueIndexer(ctx, issueID); err != nil {
   252  		log.Error("Unable to push issue %d to issue indexer: %v", issueID, err)
   253  	}
   254  }
   255  
   256  // DeleteRepoIssueIndexer deletes repo's all issues indexes
   257  func DeleteRepoIssueIndexer(ctx context.Context, repoID int64) {
   258  	if err := deleteRepoIssueIndexer(ctx, repoID); err != nil {
   259  		log.Error("Unable to push deleted repo %d to issue indexer: %v", repoID, err)
   260  	}
   261  }
   262  
   263  // IsAvailable checks if issue indexer is available
   264  func IsAvailable(ctx context.Context) bool {
   265  	return (*globalIndexer.Load()).Ping(ctx) == nil
   266  }
   267  
   268  // SearchOptions indicates the options for searching issues
   269  type SearchOptions = internal.SearchOptions
   270  
   271  const (
   272  	SortByCreatedDesc  = internal.SortByCreatedDesc
   273  	SortByUpdatedDesc  = internal.SortByUpdatedDesc
   274  	SortByCommentsDesc = internal.SortByCommentsDesc
   275  	SortByDeadlineDesc = internal.SortByDeadlineDesc
   276  	SortByCreatedAsc   = internal.SortByCreatedAsc
   277  	SortByUpdatedAsc   = internal.SortByUpdatedAsc
   278  	SortByCommentsAsc  = internal.SortByCommentsAsc
   279  	SortByDeadlineAsc  = internal.SortByDeadlineAsc
   280  )
   281  
   282  // SearchIssues search issues by options.
   283  func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, error) {
   284  	indexer := *globalIndexer.Load()
   285  
   286  	if opts.Keyword == "" {
   287  		// This is a conservative shortcut.
   288  		// If the keyword is empty, db has better (at least not worse) performance to filter issues.
   289  		// When the keyword is empty, it tends to listing rather than searching issues.
   290  		// So if the user creates an issue and list issues immediately, the issue may not be listed because the indexer needs time to index the issue.
   291  		// Even worse, the external indexer like elastic search may not be available for a while,
   292  		// and the user may not be able to list issues completely until it is available again.
   293  		indexer = db.NewIndexer()
   294  	}
   295  
   296  	result, err := indexer.Search(ctx, opts)
   297  	if err != nil {
   298  		return nil, 0, err
   299  	}
   300  
   301  	ret := make([]int64, 0, len(result.Hits))
   302  	for _, hit := range result.Hits {
   303  		ret = append(ret, hit.ID)
   304  	}
   305  
   306  	return ret, result.Total, nil
   307  }
   308  
   309  // CountIssues counts issues by options. It is a shortcut of SearchIssues(ctx, opts) but only returns the total count.
   310  func CountIssues(ctx context.Context, opts *SearchOptions) (int64, error) {
   311  	opts = opts.Copy(func(options *SearchOptions) { options.Paginator = &db_model.ListOptions{PageSize: 0} })
   312  
   313  	_, total, err := SearchIssues(ctx, opts)
   314  	return total, err
   315  }