github.com/elmarschill/hugo_sample@v0.47.1/hugolib/page_bundler_capture.go (about) 1 // Copyright 2017-present The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package hugolib 15 16 import ( 17 "errors" 18 "fmt" 19 "os" 20 "path" 21 "path/filepath" 22 "runtime" 23 "sort" 24 "strings" 25 "sync" 26 27 "github.com/spf13/afero" 28 29 "github.com/gohugoio/hugo/hugofs" 30 31 "github.com/gohugoio/hugo/helpers" 32 33 "golang.org/x/sync/errgroup" 34 35 "github.com/gohugoio/hugo/source" 36 jww "github.com/spf13/jwalterweatherman" 37 ) 38 39 var errSkipCyclicDir = errors.New("skip potential cyclic dir") 40 41 type capturer struct { 42 // To prevent symbolic link cycles: Visit same folder only once. 43 seen map[string]bool 44 seenMu sync.Mutex 45 46 handler captureResultHandler 47 48 sourceSpec *source.SourceSpec 49 fs afero.Fs 50 logger *jww.Notepad 51 52 // Filenames limits the content to process to a list of filenames/directories. 53 // This is used for partial building in server mode. 54 filenames []string 55 56 // Used to determine how to handle content changes in server mode. 57 contentChanges *contentChangeMap 58 59 // Semaphore used to throttle the concurrent sub directory handling. 60 sem chan bool 61 } 62 63 func newCapturer( 64 logger *jww.Notepad, 65 sourceSpec *source.SourceSpec, 66 handler captureResultHandler, 67 contentChanges *contentChangeMap, 68 filenames ...string) *capturer { 69 70 numWorkers := 4 71 if n := runtime.NumCPU(); n > numWorkers { 72 numWorkers = n 73 } 74 75 // TODO(bep) the "index" vs "_index" check/strings should be moved in one place. 76 isBundleHeader := func(filename string) bool { 77 base := filepath.Base(filename) 78 name := helpers.Filename(base) 79 return isContentFile(base) && (name == "index" || name == "_index") 80 } 81 82 // Make sure that any bundle header files are processed before the others. This makes 83 // sure that any bundle head is processed before its resources. 84 sort.Slice(filenames, func(i, j int) bool { 85 a, b := filenames[i], filenames[j] 86 ac, bc := isBundleHeader(a), isBundleHeader(b) 87 88 if ac { 89 return true 90 } 91 92 if bc { 93 return false 94 } 95 96 return a < b 97 }) 98 99 c := &capturer{ 100 sem: make(chan bool, numWorkers), 101 handler: handler, 102 sourceSpec: sourceSpec, 103 fs: sourceSpec.SourceFs, 104 logger: logger, 105 contentChanges: contentChanges, 106 seen: make(map[string]bool), 107 filenames: filenames} 108 109 return c 110 } 111 112 // Captured files and bundles ready to be processed will be passed on to 113 // these channels. 114 type captureResultHandler interface { 115 handleSingles(fis ...*fileInfo) 116 handleCopyFiles(fis ...pathLangFile) 117 captureBundlesHandler 118 } 119 120 type captureBundlesHandler interface { 121 handleBundles(b *bundleDirs) 122 } 123 124 type captureResultHandlerChain struct { 125 handlers []captureBundlesHandler 126 } 127 128 func (c *captureResultHandlerChain) handleSingles(fis ...*fileInfo) { 129 for _, h := range c.handlers { 130 if hh, ok := h.(captureResultHandler); ok { 131 hh.handleSingles(fis...) 132 } 133 } 134 } 135 func (c *captureResultHandlerChain) handleBundles(b *bundleDirs) { 136 for _, h := range c.handlers { 137 h.handleBundles(b) 138 } 139 } 140 141 func (c *captureResultHandlerChain) handleCopyFiles(files ...pathLangFile) { 142 for _, h := range c.handlers { 143 if hh, ok := h.(captureResultHandler); ok { 144 hh.handleCopyFiles(files...) 145 } 146 } 147 } 148 149 func (c *capturer) capturePartial(filenames ...string) error { 150 handled := make(map[string]bool) 151 152 for _, filename := range filenames { 153 dir, resolvedFilename, tp := c.contentChanges.resolveAndRemove(filename) 154 if handled[resolvedFilename] { 155 continue 156 } 157 158 handled[resolvedFilename] = true 159 160 switch tp { 161 case bundleLeaf: 162 if err := c.handleDir(resolvedFilename); err != nil { 163 // Directory may have been deleted. 164 if !os.IsNotExist(err) { 165 return err 166 } 167 } 168 case bundleBranch: 169 if err := c.handleBranchDir(resolvedFilename); err != nil { 170 // Directory may have been deleted. 171 if !os.IsNotExist(err) { 172 return err 173 } 174 } 175 default: 176 fi, err := c.resolveRealPath(resolvedFilename) 177 if os.IsNotExist(err) { 178 // File has been deleted. 179 continue 180 } 181 182 // Just in case the owning dir is a new symlink -- this will 183 // create the proper mapping for it. 184 c.resolveRealPath(dir) 185 186 f, active := c.newFileInfo(fi, tp) 187 if active { 188 c.copyOrHandleSingle(f) 189 } 190 } 191 } 192 193 return nil 194 } 195 196 func (c *capturer) capture() error { 197 if len(c.filenames) > 0 { 198 return c.capturePartial(c.filenames...) 199 } 200 201 err := c.handleDir(helpers.FilePathSeparator) 202 if err != nil { 203 return err 204 } 205 206 return nil 207 } 208 209 func (c *capturer) handleNestedDir(dirname string) error { 210 select { 211 case c.sem <- true: 212 var g errgroup.Group 213 214 g.Go(func() error { 215 defer func() { 216 <-c.sem 217 }() 218 return c.handleDir(dirname) 219 }) 220 return g.Wait() 221 default: 222 // For deeply nested file trees, waiting for a semaphore wil deadlock. 223 return c.handleDir(dirname) 224 } 225 } 226 227 // This handles a bundle branch and its resources only. This is used 228 // in server mode on changes. If this dir does not (anymore) represent a bundle 229 // branch, the handling is upgraded to the full handleDir method. 230 func (c *capturer) handleBranchDir(dirname string) error { 231 files, err := c.readDir(dirname) 232 if err != nil { 233 234 return err 235 } 236 237 var ( 238 dirType bundleDirType 239 ) 240 241 for _, fi := range files { 242 if !fi.IsDir() { 243 tp, _ := classifyBundledFile(fi.RealName()) 244 if dirType == bundleNot { 245 dirType = tp 246 } 247 248 if dirType == bundleLeaf { 249 return c.handleDir(dirname) 250 } 251 } 252 } 253 254 if dirType != bundleBranch { 255 return c.handleDir(dirname) 256 } 257 258 dirs := newBundleDirs(bundleBranch, c) 259 260 var secondPass []*fileInfo 261 262 // Handle potential bundle headers first. 263 for _, fi := range files { 264 if fi.IsDir() { 265 continue 266 } 267 268 tp, isContent := classifyBundledFile(fi.RealName()) 269 270 f, active := c.newFileInfo(fi, tp) 271 272 if !active { 273 continue 274 } 275 276 if !f.isOwner() { 277 if !isContent { 278 // This is a partial update -- we only care about the files that 279 // is in this bundle. 280 secondPass = append(secondPass, f) 281 } 282 continue 283 } 284 dirs.addBundleHeader(f) 285 } 286 287 for _, f := range secondPass { 288 dirs.addBundleFiles(f) 289 } 290 291 c.handler.handleBundles(dirs) 292 293 return nil 294 295 } 296 297 func (c *capturer) handleDir(dirname string) error { 298 299 files, err := c.readDir(dirname) 300 if err != nil { 301 return err 302 } 303 304 type dirState int 305 306 const ( 307 dirStateDefault dirState = iota 308 309 dirStateAssetsOnly 310 dirStateSinglesOnly 311 ) 312 313 var ( 314 fileBundleTypes = make([]bundleDirType, len(files)) 315 316 // Start with the assumption that this dir contains only non-content assets (images etc.) 317 // If that is still true after we had a first look at the list of files, we 318 // can just copy the files to destination. We will still have to look at the 319 // sub-folders for potential bundles. 320 state = dirStateAssetsOnly 321 322 // Start with the assumption that this dir is not a bundle. 323 // A directory is a bundle if it contains a index content file, 324 // e.g. index.md (a leaf bundle) or a _index.md (a branch bundle). 325 bundleType = bundleNot 326 ) 327 328 /* First check for any content files. 329 - If there are none, then this is a assets folder only (images etc.) 330 and we can just plainly copy them to 331 destination. 332 - If this is a section with no image etc. or similar, we can just handle it 333 as it was a single content file. 334 */ 335 var hasNonContent, isBranch bool 336 337 for i, fi := range files { 338 if !fi.IsDir() { 339 tp, isContent := classifyBundledFile(fi.RealName()) 340 341 fileBundleTypes[i] = tp 342 if !isBranch { 343 isBranch = tp == bundleBranch 344 } 345 346 if isContent { 347 // This is not a assets-only folder. 348 state = dirStateDefault 349 } else { 350 hasNonContent = true 351 } 352 } 353 } 354 355 if isBranch && !hasNonContent { 356 // This is a section or similar with no need for any bundle handling. 357 state = dirStateSinglesOnly 358 } 359 360 if state > dirStateDefault { 361 return c.handleNonBundle(dirname, files, state == dirStateSinglesOnly) 362 } 363 364 var fileInfos = make([]*fileInfo, 0, len(files)) 365 366 for i, fi := range files { 367 368 currentType := bundleNot 369 370 if !fi.IsDir() { 371 currentType = fileBundleTypes[i] 372 if bundleType == bundleNot && currentType != bundleNot { 373 bundleType = currentType 374 } 375 } 376 377 if bundleType == bundleNot && currentType != bundleNot { 378 bundleType = currentType 379 } 380 381 f, active := c.newFileInfo(fi, currentType) 382 383 if !active { 384 continue 385 } 386 387 fileInfos = append(fileInfos, f) 388 } 389 390 var todo []*fileInfo 391 392 if bundleType != bundleLeaf { 393 for _, fi := range fileInfos { 394 if fi.FileInfo().IsDir() { 395 // Handle potential nested bundles. 396 if err := c.handleNestedDir(fi.Path()); err != nil { 397 return err 398 } 399 } else if bundleType == bundleNot || (!fi.isOwner() && fi.isContentFile()) { 400 // Not in a bundle. 401 c.copyOrHandleSingle(fi) 402 } else { 403 // This is a section folder or similar with non-content files in it. 404 todo = append(todo, fi) 405 } 406 } 407 } else { 408 todo = fileInfos 409 } 410 411 if len(todo) == 0 { 412 return nil 413 } 414 415 dirs, err := c.createBundleDirs(todo, bundleType) 416 if err != nil { 417 return err 418 } 419 420 // Send the bundle to the next step in the processor chain. 421 c.handler.handleBundles(dirs) 422 423 return nil 424 } 425 426 func (c *capturer) handleNonBundle( 427 dirname string, 428 fileInfos pathLangFileFis, 429 singlesOnly bool) error { 430 431 for _, fi := range fileInfos { 432 if fi.IsDir() { 433 if err := c.handleNestedDir(fi.Filename()); err != nil { 434 return err 435 } 436 } else { 437 if singlesOnly { 438 f, active := c.newFileInfo(fi, bundleNot) 439 if !active { 440 continue 441 } 442 c.handler.handleSingles(f) 443 } else { 444 c.handler.handleCopyFiles(fi) 445 } 446 } 447 } 448 449 return nil 450 } 451 452 func (c *capturer) copyOrHandleSingle(fi *fileInfo) { 453 if fi.isContentFile() { 454 c.handler.handleSingles(fi) 455 } else { 456 // These do not currently need any further processing. 457 c.handler.handleCopyFiles(fi) 458 } 459 } 460 461 func (c *capturer) createBundleDirs(fileInfos []*fileInfo, bundleType bundleDirType) (*bundleDirs, error) { 462 dirs := newBundleDirs(bundleType, c) 463 464 for _, fi := range fileInfos { 465 if fi.FileInfo().IsDir() { 466 var collector func(fis ...*fileInfo) 467 468 if bundleType == bundleBranch { 469 // All files in the current directory are part of this bundle. 470 // Trying to include sub folders in these bundles are filled with ambiguity. 471 collector = func(fis ...*fileInfo) { 472 for _, fi := range fis { 473 c.copyOrHandleSingle(fi) 474 } 475 } 476 } else { 477 // All nested files and directories are part of this bundle. 478 collector = func(fis ...*fileInfo) { 479 fileInfos = append(fileInfos, fis...) 480 } 481 } 482 err := c.collectFiles(fi.Path(), collector) 483 if err != nil { 484 return nil, err 485 } 486 487 } else if fi.isOwner() { 488 // There can be more than one language, so: 489 // 1. Content files must be attached to its language's bundle. 490 // 2. Other files must be attached to all languages. 491 // 3. Every content file needs a bundle header. 492 dirs.addBundleHeader(fi) 493 } 494 } 495 496 for _, fi := range fileInfos { 497 if fi.FileInfo().IsDir() || fi.isOwner() { 498 continue 499 } 500 501 if fi.isContentFile() { 502 if bundleType != bundleBranch { 503 dirs.addBundleContentFile(fi) 504 } 505 } else { 506 dirs.addBundleFiles(fi) 507 } 508 } 509 510 return dirs, nil 511 } 512 513 func (c *capturer) collectFiles(dirname string, handleFiles func(fis ...*fileInfo)) error { 514 515 filesInDir, err := c.readDir(dirname) 516 if err != nil { 517 return err 518 } 519 520 for _, fi := range filesInDir { 521 if fi.IsDir() { 522 err := c.collectFiles(fi.Filename(), handleFiles) 523 if err != nil { 524 return err 525 } 526 } else { 527 f, active := c.newFileInfo(fi, bundleNot) 528 if active { 529 handleFiles(f) 530 } 531 } 532 } 533 534 return nil 535 } 536 537 func (c *capturer) readDir(dirname string) (pathLangFileFis, error) { 538 if c.sourceSpec.IgnoreFile(dirname) { 539 return nil, nil 540 } 541 542 dir, err := c.fs.Open(dirname) 543 if err != nil { 544 return nil, err 545 } 546 defer dir.Close() 547 fis, err := dir.Readdir(-1) 548 if err != nil { 549 return nil, err 550 } 551 552 pfis := make(pathLangFileFis, 0, len(fis)) 553 554 for _, fi := range fis { 555 fip := fi.(pathLangFileFi) 556 557 if !c.sourceSpec.IgnoreFile(fip.Filename()) { 558 559 err := c.resolveRealPathIn(fip) 560 561 if err != nil { 562 // It may have been deleted in the meantime. 563 if err == errSkipCyclicDir || os.IsNotExist(err) { 564 continue 565 } 566 return nil, err 567 } 568 569 pfis = append(pfis, fip) 570 } 571 } 572 573 return pfis, nil 574 } 575 576 func (c *capturer) newFileInfo(fi pathLangFileFi, tp bundleDirType) (*fileInfo, bool) { 577 f := newFileInfo(c.sourceSpec, "", "", fi, tp) 578 return f, !f.disabled 579 } 580 581 type pathLangFile interface { 582 hugofs.LanguageAnnouncer 583 hugofs.FilePather 584 } 585 586 type pathLangFileFi interface { 587 os.FileInfo 588 pathLangFile 589 } 590 591 type pathLangFileFis []pathLangFileFi 592 593 type bundleDirs struct { 594 tp bundleDirType 595 // Maps languages to bundles. 596 bundles map[string]*bundleDir 597 598 // Keeps track of language overrides for non-content files, e.g. logo.en.png. 599 langOverrides map[string]bool 600 601 c *capturer 602 } 603 604 func newBundleDirs(tp bundleDirType, c *capturer) *bundleDirs { 605 return &bundleDirs{tp: tp, bundles: make(map[string]*bundleDir), langOverrides: make(map[string]bool), c: c} 606 } 607 608 type bundleDir struct { 609 tp bundleDirType 610 fi *fileInfo 611 612 resources map[string]*fileInfo 613 } 614 615 func (b bundleDir) clone() *bundleDir { 616 b.resources = make(map[string]*fileInfo) 617 fic := *b.fi 618 b.fi = &fic 619 return &b 620 } 621 622 func newBundleDir(fi *fileInfo, bundleType bundleDirType) *bundleDir { 623 return &bundleDir{fi: fi, tp: bundleType, resources: make(map[string]*fileInfo)} 624 } 625 626 func (b *bundleDirs) addBundleContentFile(fi *fileInfo) { 627 dir, found := b.bundles[fi.Lang()] 628 if !found { 629 // Every bundled content file needs a bundle header. 630 // If one does not exist in its language, we pick the default 631 // language version, or a random one if that doesn't exist, either. 632 tl := b.c.sourceSpec.DefaultContentLanguage 633 ldir, found := b.bundles[tl] 634 if !found { 635 // Just pick one. 636 for _, v := range b.bundles { 637 ldir = v 638 break 639 } 640 } 641 642 if ldir == nil { 643 panic(fmt.Sprintf("bundle not found for file %q", fi.Filename())) 644 } 645 646 dir = ldir.clone() 647 dir.fi.overriddenLang = fi.Lang() 648 b.bundles[fi.Lang()] = dir 649 } 650 651 dir.resources[fi.Path()] = fi 652 } 653 654 func (b *bundleDirs) addBundleFiles(fi *fileInfo) { 655 dir := filepath.ToSlash(fi.Dir()) 656 p := dir + fi.TranslationBaseName() + "." + fi.Ext() 657 for lang, bdir := range b.bundles { 658 key := path.Join(lang, p) 659 660 // Given mypage.de.md (German translation) and mypage.md we pick the most 661 // specific for that language. 662 if fi.Lang() == lang || !b.langOverrides[key] { 663 bdir.resources[key] = fi 664 } 665 b.langOverrides[key] = true 666 } 667 } 668 669 func (b *bundleDirs) addBundleHeader(fi *fileInfo) { 670 b.bundles[fi.Lang()] = newBundleDir(fi, b.tp) 671 } 672 673 func (c *capturer) isSeen(dirname string) bool { 674 c.seenMu.Lock() 675 defer c.seenMu.Unlock() 676 seen := c.seen[dirname] 677 c.seen[dirname] = true 678 if seen { 679 c.logger.WARN.Printf("Content dir %q already processed; skipped to avoid infinite recursion.", dirname) 680 return true 681 682 } 683 return false 684 } 685 686 func (c *capturer) resolveRealPath(path string) (pathLangFileFi, error) { 687 fileInfo, err := c.lstatIfPossible(path) 688 if err != nil { 689 return nil, err 690 } 691 return fileInfo, c.resolveRealPathIn(fileInfo) 692 } 693 694 func (c *capturer) resolveRealPathIn(fileInfo pathLangFileFi) error { 695 696 basePath := fileInfo.BaseDir() 697 path := fileInfo.Filename() 698 699 realPath := path 700 701 if fileInfo.Mode()&os.ModeSymlink == os.ModeSymlink { 702 link, err := filepath.EvalSymlinks(path) 703 if err != nil { 704 return fmt.Errorf("Cannot read symbolic link %q, error was: %s", path, err) 705 } 706 707 // This is a file on the outside of any base fs, so we have to use the os package. 708 sfi, err := os.Stat(link) 709 if err != nil { 710 return fmt.Errorf("Cannot stat %q, error was: %s", link, err) 711 } 712 713 // TODO(bep) improve all of this. 714 if a, ok := fileInfo.(*hugofs.LanguageFileInfo); ok { 715 a.FileInfo = sfi 716 } 717 718 realPath = link 719 720 if realPath != path && sfi.IsDir() && c.isSeen(realPath) { 721 // Avoid cyclic symlinks. 722 // Note that this may prevent some uses that isn't cyclic and also 723 // potential useful, but this implementation is both robust and simple: 724 // We stop at the first directory that we have seen before, e.g. 725 // /content/blog will only be processed once. 726 return errSkipCyclicDir 727 } 728 729 if c.contentChanges != nil { 730 // Keep track of symbolic links in watch mode. 731 var from, to string 732 if sfi.IsDir() { 733 from = realPath 734 to = path 735 736 if !strings.HasSuffix(to, helpers.FilePathSeparator) { 737 to = to + helpers.FilePathSeparator 738 } 739 if !strings.HasSuffix(from, helpers.FilePathSeparator) { 740 from = from + helpers.FilePathSeparator 741 } 742 743 if !strings.HasSuffix(basePath, helpers.FilePathSeparator) { 744 basePath = basePath + helpers.FilePathSeparator 745 } 746 747 if strings.HasPrefix(from, basePath) { 748 // With symbolic links inside /content we need to keep 749 // a reference to both. This may be confusing with --navigateToChanged 750 // but the user has chosen this him or herself. 751 c.contentChanges.addSymbolicLinkMapping(from, from) 752 } 753 754 } else { 755 from = realPath 756 to = path 757 } 758 759 c.contentChanges.addSymbolicLinkMapping(from, to) 760 } 761 } 762 763 return nil 764 } 765 766 func (c *capturer) lstatIfPossible(path string) (pathLangFileFi, error) { 767 fi, err := helpers.LstatIfPossible(c.fs, path) 768 if err != nil { 769 return nil, err 770 } 771 return fi.(pathLangFileFi), nil 772 }