github.com/boyter/gocodewalker@v1.3.2/file.go (about) 1 // Package file provides file operations specific to code repositories 2 // such as walking the file tree obeying .ignore and .gitignore files 3 // or looking for the root directory assuming already in a git project 4 5 // SPDX-License-Identifier: MIT OR Unlicense 6 7 package gocodewalker 8 9 import ( 10 "bytes" 11 "errors" 12 "github.com/boyter/gocodewalker/go-gitignore" 13 "golang.org/x/sync/errgroup" 14 "os" 15 "path" 16 "path/filepath" 17 "regexp" 18 "strings" 19 "sync" 20 ) 21 22 const ( 23 GitIgnore = ".gitignore" 24 Ignore = ".ignore" 25 ) 26 27 // ErrTerminateWalk error which indicates that the walker was terminated 28 var ErrTerminateWalk = errors.New("gocodewalker terminated") 29 30 // File is a struct returned which contains the location and the filename of the file that passed all exclusion rules 31 type File struct { 32 Location string 33 Filename string 34 } 35 36 type FileWalker struct { 37 fileListQueue chan *File 38 errorsHandler func(error) bool // If returns true will continue to process where possible, otherwise returns if possible 39 directory string 40 directories []string 41 LocationExcludePattern []string // Case-sensitive patterns which exclude directory/file matches 42 IncludeDirectory []string 43 ExcludeDirectory []string // Paths to always ignore such as .git,.svn and .hg 44 IncludeFilename []string 45 ExcludeFilename []string 46 IncludeDirectoryRegex []*regexp.Regexp // Must match regex as logical OR IE can match any of them 47 ExcludeDirectoryRegex []*regexp.Regexp 48 IncludeFilenameRegex []*regexp.Regexp 49 ExcludeFilenameRegex []*regexp.Regexp 50 AllowListExtensions []string // Which extensions should be allowed case sensitive 51 ExcludeListExtensions []string // Which extensions should be excluded case sensitive 52 walkMutex sync.Mutex 53 terminateWalking bool 54 isWalking bool 55 IgnoreIgnoreFile bool // Should .ignore files be respected? 56 IgnoreGitIgnore bool // Should .gitignore files be respected? 57 IncludeHidden bool // Should hidden files and directories be included/walked 58 osOpen func(name string) (*os.File, error) 59 osReadFile func(name string) ([]byte, error) 60 } 61 62 // NewFileWalker constructs a filewalker, which will walk the supplied directory 63 // and output File results to the supplied queue as it finds them 64 func NewFileWalker(directory string, fileListQueue chan *File) *FileWalker { 65 return &FileWalker{ 66 fileListQueue: fileListQueue, 67 errorsHandler: func(e error) bool { return true }, // a generic one that just swallows everything 68 directory: directory, 69 LocationExcludePattern: nil, 70 IncludeDirectory: nil, 71 ExcludeDirectory: nil, 72 IncludeFilename: nil, 73 ExcludeFilename: nil, 74 IncludeDirectoryRegex: nil, 75 ExcludeDirectoryRegex: nil, 76 IncludeFilenameRegex: nil, 77 ExcludeFilenameRegex: nil, 78 AllowListExtensions: nil, 79 ExcludeListExtensions: nil, 80 walkMutex: sync.Mutex{}, 81 terminateWalking: false, 82 isWalking: false, 83 IgnoreIgnoreFile: false, 84 IgnoreGitIgnore: false, 85 IncludeHidden: false, 86 osOpen: os.Open, 87 osReadFile: os.ReadFile, 88 } 89 } 90 91 // NewParallelFileWalker constructs a filewalker, which will walk the supplied directories in parallel 92 // and output File results to the supplied queue as it finds them 93 func NewParallelFileWalker(directories []string, fileListQueue chan *File) *FileWalker { 94 return &FileWalker{ 95 fileListQueue: fileListQueue, 96 errorsHandler: func(e error) bool { return true }, // a generic one that just swallows everything 97 directories: directories, 98 LocationExcludePattern: nil, 99 IncludeDirectory: nil, 100 ExcludeDirectory: nil, 101 IncludeFilename: nil, 102 ExcludeFilename: nil, 103 IncludeDirectoryRegex: nil, 104 ExcludeDirectoryRegex: nil, 105 IncludeFilenameRegex: nil, 106 ExcludeFilenameRegex: nil, 107 AllowListExtensions: nil, 108 ExcludeListExtensions: nil, 109 walkMutex: sync.Mutex{}, 110 terminateWalking: false, 111 isWalking: false, 112 IgnoreIgnoreFile: false, 113 IgnoreGitIgnore: false, 114 IncludeHidden: false, 115 osOpen: os.Open, 116 osReadFile: os.ReadFile, 117 } 118 } 119 120 // Walking gets the state of the file walker and determine 121 // if we are walking or not 122 func (f *FileWalker) Walking() bool { 123 f.walkMutex.Lock() 124 defer f.walkMutex.Unlock() 125 return f.isWalking 126 } 127 128 // Terminate have the walker break out of walking and return as 129 // soon as it possibly can. This is needed because 130 // this walker needs to work in a TUI interactive mode and 131 // as such we need to be able to end old processes 132 func (f *FileWalker) Terminate() { 133 f.walkMutex.Lock() 134 defer f.walkMutex.Unlock() 135 f.terminateWalking = true 136 } 137 138 // SetErrorHandler sets the function that is called on processing any error 139 // where if you return true it will attempt to continue processing, and if false 140 // will return the error instantly 141 func (f *FileWalker) SetErrorHandler(errors func(error) bool) { 142 if errors != nil { 143 f.errorsHandler = errors 144 } 145 } 146 147 // Start will start walking the supplied directory with the supplied settings 148 // and putting files that mach into the supplied channel. 149 // Returns usual ioutil errors if there is a file issue 150 // and a ErrTerminateWalk if terminate is called while walking 151 func (f *FileWalker) Start() error { 152 f.walkMutex.Lock() 153 f.isWalking = true 154 f.walkMutex.Unlock() 155 156 var err error 157 if len(f.directories) != 0 { 158 eg := errgroup.Group{} 159 for _, directory := range f.directories { 160 d := directory // capture var 161 eg.Go(func() error { 162 return f.walkDirectoryRecursive(d, []gitignore.GitIgnore{}, []gitignore.GitIgnore{}) 163 }) 164 } 165 166 err = eg.Wait() 167 } else { 168 if f.directory != "" { 169 err = f.walkDirectoryRecursive(f.directory, []gitignore.GitIgnore{}, []gitignore.GitIgnore{}) 170 } 171 } 172 173 close(f.fileListQueue) 174 175 f.walkMutex.Lock() 176 f.isWalking = false 177 f.walkMutex.Unlock() 178 179 return err 180 } 181 182 func (f *FileWalker) walkDirectoryRecursive(directory string, gitignores []gitignore.GitIgnore, ignores []gitignore.GitIgnore) error { 183 // NB have to call unlock not using defer because method is recursive 184 // and will deadlock if not done manually 185 f.walkMutex.Lock() 186 if f.terminateWalking { 187 f.walkMutex.Unlock() 188 return ErrTerminateWalk 189 } 190 f.walkMutex.Unlock() 191 192 d, err := f.osOpen(directory) 193 if err != nil { 194 // nothing we can do with this so return nil and process as best we can 195 if f.errorsHandler(err) { 196 return nil 197 } 198 return err 199 } 200 defer d.Close() 201 202 foundFiles, err := d.Readdir(-1) 203 if err != nil { 204 // nothing we can do with this so return nil and process as best we can 205 if f.errorsHandler(err) { 206 return nil 207 } 208 return err 209 } 210 211 files := []os.FileInfo{} 212 dirs := []os.FileInfo{} 213 214 // We want to break apart the files and directories from the 215 // return as we loop over them differently and this avoids some 216 // nested if logic at the expense of a "redundant" loop 217 for _, file := range foundFiles { 218 if file.IsDir() { 219 dirs = append(dirs, file) 220 } else { 221 files = append(files, file) 222 } 223 } 224 225 // Pull out all ignore and gitignore files and add them 226 // to out collection of gitignores to be applied for this pass 227 // and any subdirectories 228 // Since they can apply to the current list of files we need to ensure 229 // we do this before processing files themselves 230 for _, file := range files { 231 if !f.IgnoreGitIgnore { 232 if file.Name() == GitIgnore { 233 c, err := f.osReadFile(filepath.Join(directory, file.Name())) 234 if err != nil { 235 if f.errorsHandler(err) { 236 continue // if asked to ignore it lets continue 237 } 238 return err 239 } 240 241 abs, err := filepath.Abs(directory) 242 if err != nil { 243 if f.errorsHandler(err) { 244 continue // if asked to ignore it lets continue 245 } 246 return err 247 } 248 249 gitIgnore := gitignore.New(bytes.NewReader(c), abs, nil) 250 gitignores = append(gitignores, gitIgnore) 251 } 252 } 253 254 if !f.IgnoreIgnoreFile { 255 if file.Name() == Ignore { 256 c, err := f.osReadFile(filepath.Join(directory, file.Name())) 257 if err != nil { 258 if f.errorsHandler(err) { 259 continue // if asked to ignore it lets continue 260 } 261 return err 262 } 263 264 abs, err := filepath.Abs(directory) 265 if err != nil { 266 if f.errorsHandler(err) { 267 continue // if asked to ignore it lets continue 268 } 269 return err 270 } 271 272 gitIgnore := gitignore.New(bytes.NewReader(c), abs, nil) 273 ignores = append(ignores, gitIgnore) 274 } 275 } 276 } 277 278 // Process files first to start feeding whatever process is consuming 279 // the output before traversing into directories for more files 280 for _, file := range files { 281 shouldIgnore := false 282 joined := filepath.Join(directory, file.Name()) 283 284 for _, ignore := range gitignores { 285 // we have the following situations 286 // 1. none of the gitignores match 287 // 2. one or more match 288 // for #1 this means we should include the file 289 // for #2 this means the last one wins since it should be the most correct 290 if ignore.MatchIsDir(joined, false) != nil { 291 shouldIgnore = ignore.Ignore(joined) 292 } 293 } 294 295 for _, ignore := range ignores { 296 // same rules as above 297 if ignore.MatchIsDir(joined, false) != nil { 298 shouldIgnore = ignore.Ignore(joined) 299 } 300 } 301 302 if len(f.IncludeFilename) != 0 { 303 // include files 304 found := false 305 for _, allow := range f.IncludeFilename { 306 if file.Name() == allow { 307 found = true 308 } 309 } 310 if !found { 311 shouldIgnore = true 312 } 313 } 314 // Exclude comes after include as it takes precedence 315 for _, deny := range f.ExcludeFilename { 316 if file.Name() == deny { 317 shouldIgnore = true 318 } 319 } 320 321 if len(f.IncludeFilenameRegex) != 0 { 322 found := false 323 for _, allow := range f.IncludeFilenameRegex { 324 if allow.Match([]byte(file.Name())) { 325 found = true 326 } 327 } 328 if !found { 329 shouldIgnore = true 330 } 331 } 332 // Exclude comes after include as it takes precedence 333 for _, deny := range f.ExcludeFilenameRegex { 334 if deny.Match([]byte(file.Name())) { 335 shouldIgnore = true 336 } 337 } 338 339 // Ignore hidden files 340 if !f.IncludeHidden { 341 s, err := IsHidden(file, directory) 342 if err != nil { 343 if !f.errorsHandler(err) { 344 return err 345 } 346 } 347 348 if s { 349 shouldIgnore = true 350 } 351 } 352 353 // Check against extensions 354 if len(f.AllowListExtensions) != 0 { 355 ext := GetExtension(file.Name()) 356 357 a := false 358 for _, v := range f.AllowListExtensions { 359 if v == ext { 360 a = true 361 } 362 } 363 364 // try again because we could have one of those pesky ones such as something.spec.tsx 365 // but only if we didn't already find something to save on a bit of processing 366 if !a { 367 ext = GetExtension(ext) 368 for _, v := range f.AllowListExtensions { 369 if v == ext { 370 a = true 371 } 372 } 373 } 374 375 if !a { 376 shouldIgnore = true 377 } 378 } 379 380 for _, deny := range f.ExcludeListExtensions { 381 ext := GetExtension(file.Name()) 382 if ext == deny { 383 shouldIgnore = true 384 } 385 386 if !shouldIgnore { 387 ext = GetExtension(ext) 388 if ext == deny { 389 shouldIgnore = true 390 } 391 } 392 } 393 394 for _, p := range f.LocationExcludePattern { 395 if strings.Contains(joined, p) { 396 shouldIgnore = true 397 } 398 } 399 400 if !shouldIgnore { 401 f.fileListQueue <- &File{ 402 Location: joined, 403 Filename: file.Name(), 404 } 405 } 406 } 407 408 // Now we process the directories after hopefully giving the 409 // channel some files to process 410 for _, dir := range dirs { 411 var shouldIgnore bool 412 joined := filepath.Join(directory, dir.Name()) 413 414 // Check against the ignore files we have if the file we are looking at 415 // should be ignored 416 // It is safe to always call this because the gitignores will not be added 417 // in previous steps 418 for _, ignore := range gitignores { 419 // we have the following situations 420 // 1. none of the gitignores match 421 // 2. one or more match 422 // for #1 this means we should include the file 423 // for #2 this means the last one wins since it should be the most correct 424 if ignore.MatchIsDir(joined, true) != nil { 425 shouldIgnore = ignore.Ignore(joined) 426 } 427 } 428 for _, ignore := range ignores { 429 // same rules as above 430 if ignore.MatchIsDir(joined, true) != nil { 431 shouldIgnore = ignore.Ignore(joined) 432 } 433 } 434 435 // start by saying we didn't find it then check each possible 436 // choice to see if we did find it 437 // if we didn't find it then we should ignore 438 if len(f.IncludeDirectory) != 0 { 439 found := false 440 for _, allow := range f.IncludeDirectory { 441 if dir.Name() == allow { 442 found = true 443 } 444 } 445 if !found { 446 shouldIgnore = true 447 } 448 } 449 // Confirm if there are any files in the path deny list which usually includes 450 // things like .git .hg and .svn 451 // Comes after include as it takes precedence 452 for _, deny := range f.ExcludeDirectory { 453 if dir.Name() == deny { 454 shouldIgnore = true 455 } 456 } 457 458 if len(f.IncludeDirectoryRegex) != 0 { 459 found := false 460 for _, allow := range f.IncludeDirectoryRegex { 461 if allow.Match([]byte(dir.Name())) { 462 found = true 463 } 464 } 465 if !found { 466 shouldIgnore = true 467 } 468 } 469 // Exclude comes after include as it takes precedence 470 for _, deny := range f.ExcludeDirectoryRegex { 471 if deny.Match([]byte(dir.Name())) { 472 shouldIgnore = true 473 } 474 } 475 476 // Ignore hidden directories 477 if !f.IncludeHidden { 478 s, err := IsHidden(dir, directory) 479 if err != nil { 480 if !f.errorsHandler(err) { 481 return err 482 } 483 } 484 485 if s { 486 shouldIgnore = true 487 } 488 } 489 490 if !shouldIgnore { 491 for _, p := range f.LocationExcludePattern { 492 if strings.Contains(joined, p) { 493 shouldIgnore = true 494 } 495 } 496 497 err = f.walkDirectoryRecursive(joined, gitignores, ignores) 498 if err != nil { 499 return err 500 } 501 } 502 } 503 504 return nil 505 } 506 507 // FindRepositoryRoot given the supplied directory backwards looking for .git or .hg 508 // directories indicating we should start our search from that 509 // location as it's the root. 510 // Returns the first directory below supplied with .git or .hg in it 511 // otherwise the supplied directory 512 func FindRepositoryRoot(startDirectory string) string { 513 // Firstly try to determine our real location 514 curdir, err := os.Getwd() 515 if err != nil { 516 return startDirectory 517 } 518 519 // Check if we have .git or .hg where we are and if 520 // so just return because we are already there 521 if checkForGitOrMercurial(curdir) { 522 return startDirectory 523 } 524 525 // We did not find something, so now we need to walk the file tree 526 // backwards in a cross platform way and if we find 527 // a match we return that 528 lastIndex := strings.LastIndex(curdir, string(os.PathSeparator)) 529 for lastIndex != -1 { 530 curdir = curdir[:lastIndex] 531 532 if checkForGitOrMercurial(curdir) { 533 return curdir 534 } 535 536 lastIndex = strings.LastIndex(curdir, string(os.PathSeparator)) 537 } 538 539 // If we didn't find a good match return the supplied directory 540 // so that we start the search from where we started at least 541 // rather than the root 542 return startDirectory 543 } 544 545 // Check if there is a .git or .hg folder in the supplied directory 546 func checkForGitOrMercurial(curdir string) bool { 547 if stat, err := os.Stat(filepath.Join(curdir, ".git")); err == nil && stat.IsDir() { 548 return true 549 } 550 551 if stat, err := os.Stat(filepath.Join(curdir, ".hg")); err == nil && stat.IsDir() { 552 return true 553 } 554 555 return false 556 } 557 558 // GetExtension is a custom version of extracting extensions for a file 559 // which deals with extensions specific to code such as 560 // .travis.yml and the like 561 func GetExtension(name string) string { 562 name = strings.ToLower(name) 563 if !strings.Contains(name, ".") { 564 return name 565 } 566 567 if strings.LastIndex(name, ".") == 0 { 568 return name 569 } 570 571 return path.Ext(name)[1:] 572 }