code.gitea.io/gitea@v1.19.3/modules/indexer/code/indexer.go (about) 1 // Copyright 2016 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package code 5 6 import ( 7 "context" 8 "os" 9 "runtime/pprof" 10 "strconv" 11 "strings" 12 "time" 13 14 "code.gitea.io/gitea/models/db" 15 repo_model "code.gitea.io/gitea/models/repo" 16 "code.gitea.io/gitea/modules/graceful" 17 "code.gitea.io/gitea/modules/log" 18 "code.gitea.io/gitea/modules/process" 19 "code.gitea.io/gitea/modules/queue" 20 "code.gitea.io/gitea/modules/setting" 21 "code.gitea.io/gitea/modules/timeutil" 22 ) 23 24 // SearchResult result of performing a search in a repo 25 type SearchResult struct { 26 RepoID int64 27 StartIndex int 28 EndIndex int 29 Filename string 30 Content string 31 CommitID string 32 UpdatedUnix timeutil.TimeStamp 33 Language string 34 Color string 35 } 36 37 // SearchResultLanguages result of top languages count in search results 38 type SearchResultLanguages struct { 39 Language string 40 Color string 41 Count int 42 } 43 44 // Indexer defines an interface to index and search code contents 45 type Indexer interface { 46 Ping() bool 47 SetAvailabilityChangeCallback(callback func(bool)) 48 Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *repoChanges) error 49 Delete(repoID int64) error 50 Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) 51 Close() 52 } 53 54 func filenameIndexerID(repoID int64, filename string) string { 55 return indexerID(repoID) + "_" + filename 56 } 57 58 func indexerID(id int64) string { 59 return strconv.FormatInt(id, 36) 60 } 61 62 func parseIndexerID(indexerID string) (int64, string) { 63 index := strings.IndexByte(indexerID, '_') 64 if index == -1 { 65 log.Error("Unexpected ID in repo indexer: %s", indexerID) 66 } 67 repoID, _ := strconv.ParseInt(indexerID[:index], 36, 64) 68 return repoID, indexerID[index+1:] 69 } 70 71 func filenameOfIndexerID(indexerID string) string { 72 index := strings.IndexByte(indexerID, '_') 73 if index == -1 { 74 log.Error("Unexpected ID in repo indexer: %s", indexerID) 75 } 76 return indexerID[index+1:] 77 } 78 79 // IndexerData represents data stored in the code indexer 80 type IndexerData struct { 81 RepoID int64 82 } 83 84 var indexerQueue queue.UniqueQueue 85 86 func index(ctx context.Context, indexer Indexer, repoID int64) error { 87 repo, err := repo_model.GetRepositoryByID(ctx, repoID) 88 if repo_model.IsErrRepoNotExist(err) { 89 return indexer.Delete(repoID) 90 } 91 if err != nil { 92 return err 93 } 94 95 sha, err := getDefaultBranchSha(ctx, repo) 96 if err != nil { 97 return err 98 } 99 changes, err := getRepoChanges(ctx, repo, sha) 100 if err != nil { 101 return err 102 } else if changes == nil { 103 return nil 104 } 105 106 if err := indexer.Index(ctx, repo, sha, changes); err != nil { 107 return err 108 } 109 110 return repo_model.UpdateIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode, sha) 111 } 112 113 // Init initialize the repo indexer 114 func Init() { 115 if !setting.Indexer.RepoIndexerEnabled { 116 indexer.Close() 117 return 118 } 119 120 ctx, cancel, finished := process.GetManager().AddTypedContext(context.Background(), "Service: CodeIndexer", process.SystemProcessType, false) 121 122 graceful.GetManager().RunAtTerminate(func() { 123 select { 124 case <-ctx.Done(): 125 return 126 default: 127 } 128 cancel() 129 log.Debug("Closing repository indexer") 130 indexer.Close() 131 log.Info("PID: %d Repository Indexer closed", os.Getpid()) 132 finished() 133 }) 134 135 waitChannel := make(chan time.Duration, 1) 136 137 // Create the Queue 138 switch setting.Indexer.RepoType { 139 case "bleve", "elasticsearch": 140 handler := func(data ...queue.Data) []queue.Data { 141 idx, err := indexer.get() 142 if idx == nil || err != nil { 143 log.Error("Codes indexer handler: unable to get indexer!") 144 return data 145 } 146 147 unhandled := make([]queue.Data, 0, len(data)) 148 for _, datum := range data { 149 indexerData, ok := datum.(*IndexerData) 150 if !ok { 151 log.Error("Unable to process provided datum: %v - not possible to cast to IndexerData", datum) 152 continue 153 } 154 log.Trace("IndexerData Process Repo: %d", indexerData.RepoID) 155 156 if err := index(ctx, indexer, indexerData.RepoID); err != nil { 157 log.Error("index: %v", err) 158 if indexer.Ping() { 159 continue 160 } 161 // Add back to queue 162 unhandled = append(unhandled, datum) 163 } 164 } 165 return unhandled 166 } 167 168 indexerQueue = queue.CreateUniqueQueue("code_indexer", handler, &IndexerData{}) 169 if indexerQueue == nil { 170 log.Fatal("Unable to create codes indexer queue") 171 } 172 default: 173 log.Fatal("Unknown codes indexer type; %s", setting.Indexer.RepoType) 174 } 175 176 go func() { 177 pprof.SetGoroutineLabels(ctx) 178 start := time.Now() 179 var ( 180 rIndexer Indexer 181 populate bool 182 err error 183 ) 184 switch setting.Indexer.RepoType { 185 case "bleve": 186 log.Info("PID: %d Initializing Repository Indexer at: %s", os.Getpid(), setting.Indexer.RepoPath) 187 defer func() { 188 if err := recover(); err != nil { 189 log.Error("PANIC whilst initializing repository indexer: %v\nStacktrace: %s", err, log.Stack(2)) 190 log.Error("The indexer files are likely corrupted and may need to be deleted") 191 log.Error("You can completely remove the \"%s\" directory to make Gitea recreate the indexes", setting.Indexer.RepoPath) 192 } 193 }() 194 195 rIndexer, populate, err = NewBleveIndexer(setting.Indexer.RepoPath) 196 if err != nil { 197 cancel() 198 indexer.Close() 199 close(waitChannel) 200 log.Fatal("PID: %d Unable to initialize the bleve Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err) 201 } 202 case "elasticsearch": 203 log.Info("PID: %d Initializing Repository Indexer at: %s", os.Getpid(), setting.Indexer.RepoConnStr) 204 defer func() { 205 if err := recover(); err != nil { 206 log.Error("PANIC whilst initializing repository indexer: %v\nStacktrace: %s", err, log.Stack(2)) 207 log.Error("The indexer files are likely corrupted and may need to be deleted") 208 log.Error("You can completely remove the \"%s\" index to make Gitea recreate the indexes", setting.Indexer.RepoConnStr) 209 } 210 }() 211 212 rIndexer, populate, err = NewElasticSearchIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName) 213 if err != nil { 214 cancel() 215 indexer.Close() 216 close(waitChannel) 217 log.Fatal("PID: %d Unable to initialize the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err) 218 } 219 default: 220 log.Fatal("PID: %d Unknown Indexer type: %s", os.Getpid(), setting.Indexer.RepoType) 221 } 222 223 indexer.set(rIndexer) 224 225 if queue, ok := indexerQueue.(queue.Pausable); ok { 226 rIndexer.SetAvailabilityChangeCallback(func(available bool) { 227 if !available { 228 log.Info("Code index queue paused") 229 queue.Pause() 230 } else { 231 log.Info("Code index queue resumed") 232 queue.Resume() 233 } 234 }) 235 } 236 237 // Start processing the queue 238 go graceful.GetManager().RunWithShutdownFns(indexerQueue.Run) 239 240 if populate { 241 go graceful.GetManager().RunWithShutdownContext(populateRepoIndexer) 242 } 243 select { 244 case waitChannel <- time.Since(start): 245 case <-graceful.GetManager().IsShutdown(): 246 } 247 248 close(waitChannel) 249 }() 250 251 if setting.Indexer.StartupTimeout > 0 { 252 go func() { 253 pprof.SetGoroutineLabels(ctx) 254 timeout := setting.Indexer.StartupTimeout 255 if graceful.GetManager().IsChild() && setting.GracefulHammerTime > 0 { 256 timeout += setting.GracefulHammerTime 257 } 258 select { 259 case <-graceful.GetManager().IsShutdown(): 260 log.Warn("Shutdown before Repository Indexer completed initialization") 261 cancel() 262 indexer.Close() 263 case duration, ok := <-waitChannel: 264 if !ok { 265 log.Warn("Repository Indexer Initialization failed") 266 cancel() 267 indexer.Close() 268 return 269 } 270 log.Info("Repository Indexer Initialization took %v", duration) 271 case <-time.After(timeout): 272 cancel() 273 indexer.Close() 274 log.Fatal("Repository Indexer Initialization Timed-Out after: %v", timeout) 275 } 276 }() 277 } 278 } 279 280 // UpdateRepoIndexer update a repository's entries in the indexer 281 func UpdateRepoIndexer(repo *repo_model.Repository) { 282 indexData := &IndexerData{RepoID: repo.ID} 283 if err := indexerQueue.Push(indexData); err != nil { 284 log.Error("Update repo index data %v failed: %v", indexData, err) 285 } 286 } 287 288 // IsAvailable checks if issue indexer is available 289 func IsAvailable() bool { 290 idx, err := indexer.get() 291 if err != nil { 292 log.Error("IsAvailable(): unable to get indexer: %v", err) 293 return false 294 } 295 296 return idx.Ping() 297 } 298 299 // populateRepoIndexer populate the repo indexer with pre-existing data. This 300 // should only be run when the indexer is created for the first time. 301 func populateRepoIndexer(ctx context.Context) { 302 log.Info("Populating the repo indexer with existing repositories") 303 304 exist, err := db.IsTableNotEmpty("repository") 305 if err != nil { 306 log.Fatal("System error: %v", err) 307 } else if !exist { 308 return 309 } 310 311 // if there is any existing repo indexer metadata in the DB, delete it 312 // since we are starting afresh. Also, xorm requires deletes to have a 313 // condition, and we want to delete everything, thus 1=1. 314 if err := db.DeleteAllRecords("repo_indexer_status"); err != nil { 315 log.Fatal("System error: %v", err) 316 } 317 318 var maxRepoID int64 319 if maxRepoID, err = db.GetMaxID("repository"); err != nil { 320 log.Fatal("System error: %v", err) 321 } 322 323 // start with the maximum existing repo ID and work backwards, so that we 324 // don't include repos that are created after gitea starts; such repos will 325 // already be added to the indexer, and we don't need to add them again. 326 for maxRepoID > 0 { 327 select { 328 case <-ctx.Done(): 329 log.Info("Repository Indexer population shutdown before completion") 330 return 331 default: 332 } 333 ids, err := repo_model.GetUnindexedRepos(repo_model.RepoIndexerTypeCode, maxRepoID, 0, 50) 334 if err != nil { 335 log.Error("populateRepoIndexer: %v", err) 336 return 337 } else if len(ids) == 0 { 338 break 339 } 340 for _, id := range ids { 341 select { 342 case <-ctx.Done(): 343 log.Info("Repository Indexer population shutdown before completion") 344 return 345 default: 346 } 347 if err := indexerQueue.Push(&IndexerData{RepoID: id}); err != nil { 348 log.Error("indexerQueue.Push: %v", err) 349 return 350 } 351 maxRepoID = id - 1 352 } 353 } 354 log.Info("Done (re)populating the repo indexer with existing repositories") 355 }