github.com/drellem2/pogo@v0.0.0-20240503070746-2c2b76da329a/internal/plugins/search/search_index.go (about) 1 package search 2 3 import ( 4 "context" 5 "encoding/json" 6 "errors" 7 "io/ioutil" 8 "os" 9 "path/filepath" 10 "strconv" 11 "strings" 12 "time" 13 14 "github.com/sabhiram/go-gitignore" 15 "github.com/sourcegraph/zoekt" 16 "github.com/sourcegraph/zoekt/query" 17 18 pogoPlugin "github.com/drellem2/pogo/pkg/plugin" 19 ) 20 21 const saveFileName = "search_index.json" 22 const codeSearchIndexFileName = "code_search_index" 23 const indexStartCapacity = 50 24 const indexCacheMinutes = 24 * 60 25 26 type PogoChunkMatch struct { 27 Line uint32 `json:"line"` 28 Content string `json:"content"` 29 } 30 31 type PogoFileMatch struct { 32 Path string `json:"path"` 33 Matches []PogoChunkMatch `json:"matches"` 34 } 35 36 type SearchResults struct { 37 Files []PogoFileMatch `json:"files"` 38 } 39 40 type IndexedProject struct { 41 Root string `json:"root"` 42 Paths []string `json:"paths"` 43 } 44 45 /* 46 * 47 48 Contains channels that can be written to in order to update the project. 49 */ 50 type ProjectUpdater struct { 51 c chan *IndexedProject 52 addFw chan string 53 removeFw chan string 54 quit chan bool 55 closed bool 56 } 57 58 func absolute(path string) (string, error) { 59 str, err := filepath.Abs(path) 60 if err != nil { 61 return "", err 62 } 63 info, err2 := os.Lstat(path) 64 if err2 != nil { 65 return "", err2 66 } 67 if info.IsDir() { 68 return str + "/", nil 69 } 70 return str, nil 71 } 72 73 /* 74 * 75 76 Returns some channels that can be written to in order to update the project. 77 Starts a goroutine that will read these channels. 78 */ 79 func (g *BasicSearch) newProjectUpdater() *ProjectUpdater { 80 u := &ProjectUpdater{ 81 c: make(chan *IndexedProject), 82 addFw: make(chan string), 83 removeFw: make(chan string), 84 quit: make(chan bool), 85 closed: false, 86 } 87 go g.write(u) 88 return u 89 } 90 91 func (g *BasicSearch) write(u *ProjectUpdater) { 92 for !u.closed { 93 func() { 94 select { 95 case proj := <-u.c: 96 g.projects[proj.Root] = *proj 97 g.serializeProjectIndex(proj) 98 case p := <-u.addFw: 99 if g.watcher == nil { 100 g.logger.Warn("watcher is nil") 101 } 102 w := g.watcher.Add(p) 103 if w != nil { 104 g.logger.Error("Error adding file watcher: %v", w) 105 } 106 case p := <-u.removeFw: 107 if g.watcher == nil { 108 g.logger.Warn("watcher is nil") 109 } 110 g.watcher.Remove(p) 111 case <-u.quit: 112 u.closed = true 113 } 114 }() 115 } 116 } 117 118 // Should only be called by index 119 func (g *BasicSearch) indexRec(proj *IndexedProject, path string, 120 gitIgnore *ignore.GitIgnore, u *ProjectUpdater) error { 121 // First index all files in the project 122 file, err := os.Open(path) 123 if err != nil { 124 return err 125 } 126 defer file.Close() 127 dirnames, err := file.Readdirnames(0) 128 g.logger.Debug("Found dirs: ", dirnames) 129 if err != nil { 130 return err 131 } 132 if len(dirnames) == 0 { 133 return nil 134 } 135 files := make([]string, 0, len(dirnames)/2) 136 for _, subFile := range dirnames { 137 newPath := filepath.Join(path, subFile) 138 fileInfo, err := os.Lstat(newPath) 139 if err != nil { 140 g.logger.Warn(err.Error()) 141 continue 142 } 143 // Remove projectRoot prefix from newPath 144 relativePath := strings.TrimPrefix(newPath, proj.Root) 145 146 if !gitIgnore.MatchesPath(relativePath) && subFile != ".git" && subFile != ".pogo" { 147 if fileInfo.IsDir() { 148 u.addFw <- newPath 149 err = g.indexRec(proj, newPath, gitIgnore, u) 150 if err != nil { 151 g.logger.Warn(err.Error()) 152 } 153 } else { 154 files = append(files, relativePath) 155 } 156 } 157 } 158 proj.Paths = append(proj.Paths, files...) 159 return nil 160 } 161 162 // Try to index all files in the project, then create a code search index. 163 // The first is table stakes - so we error on failure. If the second fails, we log it and return. 164 func (g *BasicSearch) index(proj *IndexedProject, path string, 165 gitIgnore *ignore.GitIgnore) { 166 167 u := g.updater 168 169 err := g.indexRec(proj, path, gitIgnore, u) 170 if err != nil { 171 g.logger.Warn("Error indexing project: ", err.Error()) 172 return 173 } 174 u.c <- proj 175 } 176 177 func (g *BasicSearch) ReIndex(path string) { 178 fileInfo, e := os.Lstat(path) 179 if e != nil { 180 g.logger.Error("Error getting path info: ", e) 181 return 182 } 183 if !fileInfo.IsDir() { 184 path = filepath.Dir(path) 185 } 186 g.logger.Info("Reindexing ", path) 187 go func() { 188 fullPath, err2 := absolute(path) 189 if err2 != nil { 190 g.logger.Error("Error getting absolute path", path) 191 return 192 } 193 for projectRoot, indexed := range g.projects { 194 if strings.HasPrefix(fullPath, projectRoot) { 195 /* Below is a golang idiom for removing 196 elements with prefix from the slice. We 197 want to remove all file watchers before 198 reindexing, so we only add back the files 199 that still exist. */ 200 relativePath := strings.TrimPrefix(fullPath, projectRoot) 201 paths := indexed.Paths 202 paths2 := paths 203 paths = paths[:0] 204 u := g.updater 205 for _, p := range paths2 { 206 if !strings.HasPrefix(p, relativePath) { 207 paths = append(paths, p) 208 } else { 209 u.removeFw <- p 210 } 211 } 212 indexed.Paths = paths 213 214 gitIgnore, err := ParseGitIgnore(projectRoot) 215 if err != nil { 216 g.logger.Error("Error parsing gitignore %v", err) 217 } 218 g.index(&indexed, fullPath, gitIgnore) 219 break 220 } 221 } 222 }() 223 } 224 225 /* 226 Even if this function encounters an error, it will always at least return a 227 GitIgnore that matches nothing. 228 */ 229 func ParseGitIgnore(path string) (*ignore.GitIgnore, error) { 230 // Read .gitignore if exists 231 ignorePath := filepath.Join(path, ".gitignore") 232 var err error 233 _, err = os.Lstat(ignorePath) 234 var gitIgnore *ignore.GitIgnore 235 if err != nil { 236 if errors.Is(err, os.ErrNotExist) { 237 err = nil 238 } 239 gitIgnore = ignore.CompileIgnoreLines("") 240 } else { 241 gitIgnore, err = ignore.CompileIgnoreFile(ignorePath) 242 if err != nil { 243 gitIgnore = ignore.CompileIgnoreLines("") 244 } 245 } 246 return gitIgnore, err 247 } 248 249 func (g *BasicSearch) deleteIndexFile(p *IndexedProject) error { 250 searchDir, err := p.makeSearchDir() 251 if err != nil { 252 g.logger.Error("Error making search dir: ", err) 253 return err 254 } 255 indexPath := filepath.Join(searchDir, codeSearchIndexFileName) 256 // First check if indexPath exists 257 _, err = os.Lstat(indexPath) 258 if err != nil { 259 if errors.Is(err, os.ErrNotExist) { 260 return nil 261 } else { 262 return err 263 } 264 } 265 return os.Remove(indexPath) 266 } 267 268 func (g *BasicSearch) getSearchFile(p *IndexedProject, filename string) (*os.File, error) { 269 path := p.Root 270 searchDir, err := p.makeSearchDir() 271 if err != nil { 272 g.logger.Error("Error making search dir: ", err) 273 return nil, err 274 } 275 indexPath := filepath.Join(searchDir, filename) 276 indexFile, err := os.OpenFile(indexPath, os.O_CREATE|os.O_WRONLY, 0600) 277 if err != nil { 278 g.logger.Error("Error opening index file ", path) 279 return nil, err 280 } 281 return indexFile, nil 282 } 283 284 func (g *BasicSearch) getIndexFile(p *IndexedProject) (*os.File, error) { 285 return g.getSearchFile(p, codeSearchIndexFileName) 286 } 287 288 func (g *BasicSearch) Index(req *pogoPlugin.IProcessProjectReq) { 289 path := (*req).Path() 290 p, ok := g.projects[path] 291 if ok && p.Paths != nil && len(p.Paths) > 0 { 292 g.logger.Info("Already indexed ", path) 293 return 294 } 295 proj := IndexedProject{ 296 Root: path, 297 Paths: make([]string, 0, indexStartCapacity), 298 } 299 gitIgnore, err := ParseGitIgnore(path) 300 if err != nil { 301 // Non-fatal error 302 g.logger.Error("Error parsing gitignore", err) 303 } 304 g.index(&proj, path, gitIgnore) 305 } 306 307 // Here is the method where we extract the code above 308 func (g *BasicSearch) serializeProjectIndex(proj *IndexedProject) { 309 searchDir, err := proj.makeSearchDir() 310 if err != nil { 311 g.logger.Error("Error making search dir: ", err) 312 return 313 } 314 saveFilePath := filepath.Join(searchDir, saveFileName) 315 outBytes, err2 := json.Marshal(proj) 316 if err2 != nil { 317 g.logger.Error("Error serializing index to json", "index", *proj) 318 } 319 err3 := os.WriteFile(saveFilePath, outBytes, 0644) 320 if err3 != nil { 321 g.logger.Error("Error saving index", "save_path", saveFilePath) 322 } 323 g.logger.Info("Indexed " + strconv.Itoa(len(proj.Paths)) + " files for " + proj.Root) 324 325 // Now serialize zoekt index 326 327 // First delete the old index 328 g.deleteIndexFile(proj) 329 330 indexer, err := zoekt.NewIndexBuilder(nil) 331 if err != nil { 332 g.logger.Error("Error creating search index") 333 return 334 } 335 336 // Next create the code search index 337 // TODO - add some useful repository metadata 338 for _, path := range proj.Paths { 339 // Prepend Root to path 340 fullPath := filepath.Join(proj.Root, path) 341 absPath, err := absolute(fullPath) 342 if err != nil { 343 g.logger.Error("Error getting absolute path - file may not exist", path) 344 } else { 345 bytes, err := ioutil.ReadFile(absPath) 346 if err != nil { 347 g.logger.Error("Error reading file ", absPath) 348 } else { 349 indexer.AddFile(absPath, bytes) 350 } 351 } 352 } 353 indexFile, err := g.getIndexFile(proj) 354 if err != nil { 355 g.logger.Error("Error getting index file ", proj.Root) 356 return 357 } 358 defer indexFile.Close() 359 err = indexer.Write(indexFile) 360 if err != nil { 361 g.logger.Error("Error writing index file ", proj.Root) 362 g.logger.Error("Error: ", err.Error()) 363 return 364 } 365 } 366 367 func (g *BasicSearch) Load(projectRoot string) (*IndexedProject, error) { 368 project := &IndexedProject{ 369 Root: projectRoot, 370 Paths: make([]string, 0, indexStartCapacity), 371 } 372 searchDir, err := project.makeSearchDir() 373 if err != nil { 374 g.logger.Error("Error making search dir: ", err) 375 return nil, err 376 } 377 saveFilePath := filepath.Join(searchDir, saveFileName) 378 stat, err := os.Lstat(saveFilePath) 379 if err != nil { 380 if errors.Is(err, os.ErrNotExist) { 381 g.projects[projectRoot] = *project 382 // Return empty struct 383 return project, nil 384 } 385 return nil, err 386 } 387 // Check if index is stale 388 if time.Since(stat.ModTime()).Minutes() > indexCacheMinutes { 389 g.logger.Info("Index is stale for " + projectRoot) 390 return project, nil 391 } 392 393 file, err := os.Open(saveFilePath) 394 if err != nil { 395 g.logger.Error("Error opening index file.") 396 return nil, err 397 } 398 defer file.Close() 399 byteValue, _ := ioutil.ReadAll(file) 400 err = json.Unmarshal(byteValue, project) 401 if err != nil { 402 g.logger.Error("Error deserializing index file: %v", err) 403 return nil, err 404 } 405 g.logger.Info("Loaded " + strconv.Itoa(len(project.Paths)) + " files for " + projectRoot) 406 g.updater.c <- project 407 return project, nil 408 } 409 410 func (g *BasicSearch) GetFiles(projectRoot string) (*IndexedProject, error) { 411 project, ok := g.projects[projectRoot] 412 if !ok { 413 return nil, errors.New("Project not indexed " + projectRoot) 414 } 415 return &project, nil 416 } 417 418 func (g *BasicSearch) Search(projectRoot string, data string, duration string) (*SearchResults, error) { 419 project, ok := g.projects[projectRoot] 420 var knownProjects string 421 for k := range g.projects { 422 knownProjects += k 423 } 424 if !ok { 425 return nil, errors.New("Unknown project " + projectRoot + ". Known projects: " + knownProjects) 426 } 427 // Open index file 428 searchDir, err := project.makeSearchDir() 429 if err != nil { 430 g.logger.Error("Error making search dir: ", err) 431 return nil, err 432 } 433 indexPath := filepath.Join(searchDir, codeSearchIndexFileName) 434 indexFile, err := os.Open(indexPath) 435 if err != nil { 436 g.logger.Error("Error opening index file ", indexPath) 437 return nil, err 438 } 439 defer indexFile.Close() 440 index, err2 := zoekt.NewIndexFile(indexFile) 441 if err2 != nil { 442 g.logger.Error("Error reading index file ", indexPath) 443 return nil, err2 444 } 445 // Search 446 searcher, err := zoekt.NewSearcher(index) 447 if err != nil { 448 g.logger.Error("Error creating searcher", err) 449 return nil, err 450 } 451 defer searcher.Close() 452 453 var ( 454 ctx context.Context 455 cancel context.CancelFunc 456 ) 457 458 timeout, err := time.ParseDuration(duration) 459 if err == nil { 460 // The request has a timeout, so create a context that is 461 // canceled automatically when the timeout expires. 462 ctx, cancel = context.WithTimeout(context.Background(), timeout) 463 } else { 464 ctx, cancel = context.WithCancel(context.Background()) 465 } 466 defer cancel() 467 468 query, err := query.Parse(data) 469 if err != nil { 470 g.logger.Error("Error parsing query") 471 return nil, err 472 } 473 474 queryOptions := &zoekt.SearchOptions{ 475 ChunkMatches: true, 476 } 477 478 result, err := searcher.Search(ctx, query, queryOptions) 479 if err != nil { 480 g.logger.Error("Error searching index") 481 return nil, err 482 } 483 484 // Create PogoFileMatch array of same size as result.Files 485 fileMatches := make([]PogoFileMatch, len(result.Files)) 486 487 for i, file := range result.Files { 488 chunkMatches := make([]PogoChunkMatch, len(file.ChunkMatches)) 489 for j, match := range file.ChunkMatches { 490 chunkMatches[j] = PogoChunkMatch{ 491 Line: match.ContentStart.LineNumber, 492 Content: "", 493 } 494 if len(match.Content) > 0 { 495 chunkMatches[j].Content = strings.TrimSpace(string(match.Content)) 496 } 497 } 498 fileMatches[i] = PogoFileMatch{ 499 Path: strings.Replace(file.FileName, projectRoot, "", 1), 500 Matches: chunkMatches, 501 } 502 } 503 return &SearchResults{ 504 Files: fileMatches, 505 }, nil 506 }