github.com/shohhei1126/hugo@v0.42.2-0.20180623210752-3d5928889ad7/hugolib/page_bundler_capture.go (about) 1 // Copyright 2017-present The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package hugolib 15 16 import ( 17 "errors" 18 "fmt" 19 "os" 20 "path" 21 "path/filepath" 22 "runtime" 23 "strings" 24 "sync" 25 26 "github.com/spf13/afero" 27 28 "github.com/gohugoio/hugo/hugofs" 29 30 "github.com/gohugoio/hugo/helpers" 31 32 "golang.org/x/sync/errgroup" 33 34 "github.com/gohugoio/hugo/source" 35 jww "github.com/spf13/jwalterweatherman" 36 ) 37 38 var errSkipCyclicDir = errors.New("skip potential cyclic dir") 39 40 type capturer struct { 41 // To prevent symbolic link cycles: Visit same folder only once. 42 seen map[string]bool 43 seenMu sync.Mutex 44 45 handler captureResultHandler 46 47 sourceSpec *source.SourceSpec 48 fs afero.Fs 49 logger *jww.Notepad 50 51 // Filenames limits the content to process to a list of filenames/directories. 52 // This is used for partial building in server mode. 53 filenames []string 54 55 // Used to determine how to handle content changes in server mode. 56 contentChanges *contentChangeMap 57 58 // Semaphore used to throttle the concurrent sub directory handling. 59 sem chan bool 60 } 61 62 func newCapturer( 63 logger *jww.Notepad, 64 sourceSpec *source.SourceSpec, 65 handler captureResultHandler, 66 contentChanges *contentChangeMap, 67 filenames ...string) *capturer { 68 69 numWorkers := 4 70 if n := runtime.NumCPU(); n > numWorkers { 71 numWorkers = n 72 } 73 74 c := &capturer{ 75 sem: make(chan bool, numWorkers), 76 handler: handler, 77 sourceSpec: sourceSpec, 78 fs: sourceSpec.SourceFs, 79 logger: logger, 80 contentChanges: contentChanges, 81 seen: make(map[string]bool), 82 filenames: filenames} 83 84 return c 85 } 86 87 // Captured files and bundles ready to be processed will be passed on to 88 // these channels. 89 type captureResultHandler interface { 90 handleSingles(fis ...*fileInfo) 91 handleCopyFiles(fis ...pathLangFile) 92 captureBundlesHandler 93 } 94 95 type captureBundlesHandler interface { 96 handleBundles(b *bundleDirs) 97 } 98 99 type captureResultHandlerChain struct { 100 handlers []captureBundlesHandler 101 } 102 103 func (c *captureResultHandlerChain) handleSingles(fis ...*fileInfo) { 104 for _, h := range c.handlers { 105 if hh, ok := h.(captureResultHandler); ok { 106 hh.handleSingles(fis...) 107 } 108 } 109 } 110 func (c *captureResultHandlerChain) handleBundles(b *bundleDirs) { 111 for _, h := range c.handlers { 112 h.handleBundles(b) 113 } 114 } 115 116 func (c *captureResultHandlerChain) handleCopyFiles(files ...pathLangFile) { 117 for _, h := range c.handlers { 118 if hh, ok := h.(captureResultHandler); ok { 119 hh.handleCopyFiles(files...) 120 } 121 } 122 } 123 124 func (c *capturer) capturePartial(filenames ...string) error { 125 handled := make(map[string]bool) 126 127 for _, filename := range filenames { 128 dir, resolvedFilename, tp := c.contentChanges.resolveAndRemove(filename) 129 if handled[resolvedFilename] { 130 continue 131 } 132 133 handled[resolvedFilename] = true 134 135 switch tp { 136 case bundleLeaf: 137 if err := c.handleDir(resolvedFilename); err != nil { 138 return err 139 } 140 case bundleBranch: 141 if err := c.handleBranchDir(resolvedFilename); err != nil { 142 return err 143 } 144 default: 145 fi, err := c.resolveRealPath(resolvedFilename) 146 if os.IsNotExist(err) { 147 // File has been deleted. 148 continue 149 } 150 151 // Just in case the owning dir is a new symlink -- this will 152 // create the proper mapping for it. 153 c.resolveRealPath(dir) 154 155 f, active := c.newFileInfo(fi, tp) 156 if active { 157 c.copyOrHandleSingle(f) 158 } 159 } 160 } 161 162 return nil 163 } 164 165 func (c *capturer) capture() error { 166 if len(c.filenames) > 0 { 167 return c.capturePartial(c.filenames...) 168 } 169 170 err := c.handleDir(helpers.FilePathSeparator) 171 if err != nil { 172 return err 173 } 174 175 return nil 176 } 177 178 func (c *capturer) handleNestedDir(dirname string) error { 179 select { 180 case c.sem <- true: 181 var g errgroup.Group 182 183 g.Go(func() error { 184 defer func() { 185 <-c.sem 186 }() 187 return c.handleDir(dirname) 188 }) 189 return g.Wait() 190 default: 191 // For deeply nested file trees, waiting for a semaphore wil deadlock. 192 return c.handleDir(dirname) 193 } 194 } 195 196 // This handles a bundle branch and its resources only. This is used 197 // in server mode on changes. If this dir does not (anymore) represent a bundle 198 // branch, the handling is upgraded to the full handleDir method. 199 func (c *capturer) handleBranchDir(dirname string) error { 200 files, err := c.readDir(dirname) 201 if err != nil { 202 203 return err 204 } 205 206 var ( 207 dirType bundleDirType 208 ) 209 210 for _, fi := range files { 211 if !fi.IsDir() { 212 tp, _ := classifyBundledFile(fi.RealName()) 213 if dirType == bundleNot { 214 dirType = tp 215 } 216 217 if dirType == bundleLeaf { 218 return c.handleDir(dirname) 219 } 220 } 221 } 222 223 if dirType != bundleBranch { 224 return c.handleDir(dirname) 225 } 226 227 dirs := newBundleDirs(bundleBranch, c) 228 229 var secondPass []*fileInfo 230 231 // Handle potential bundle headers first. 232 for _, fi := range files { 233 if fi.IsDir() { 234 continue 235 } 236 237 tp, isContent := classifyBundledFile(fi.RealName()) 238 239 f, active := c.newFileInfo(fi, tp) 240 241 if !active { 242 continue 243 } 244 245 if !f.isOwner() { 246 if !isContent { 247 // This is a partial update -- we only care about the files that 248 // is in this bundle. 249 secondPass = append(secondPass, f) 250 } 251 continue 252 } 253 dirs.addBundleHeader(f) 254 } 255 256 for _, f := range secondPass { 257 dirs.addBundleFiles(f) 258 } 259 260 c.handler.handleBundles(dirs) 261 262 return nil 263 264 } 265 266 func (c *capturer) handleDir(dirname string) error { 267 268 files, err := c.readDir(dirname) 269 if err != nil { 270 return err 271 } 272 273 type dirState int 274 275 const ( 276 dirStateDefault dirState = iota 277 278 dirStateAssetsOnly 279 dirStateSinglesOnly 280 ) 281 282 var ( 283 fileBundleTypes = make([]bundleDirType, len(files)) 284 285 // Start with the assumption that this dir contains only non-content assets (images etc.) 286 // If that is still true after we had a first look at the list of files, we 287 // can just copy the files to destination. We will still have to look at the 288 // sub-folders for potential bundles. 289 state = dirStateAssetsOnly 290 291 // Start with the assumption that this dir is not a bundle. 292 // A directory is a bundle if it contains a index content file, 293 // e.g. index.md (a leaf bundle) or a _index.md (a branch bundle). 294 bundleType = bundleNot 295 ) 296 297 /* First check for any content files. 298 - If there are none, then this is a assets folder only (images etc.) 299 and we can just plainly copy them to 300 destination. 301 - If this is a section with no image etc. or similar, we can just handle it 302 as it was a single content file. 303 */ 304 var hasNonContent, isBranch bool 305 306 for i, fi := range files { 307 if !fi.IsDir() { 308 tp, isContent := classifyBundledFile(fi.RealName()) 309 310 fileBundleTypes[i] = tp 311 if !isBranch { 312 isBranch = tp == bundleBranch 313 } 314 315 if isContent { 316 // This is not a assets-only folder. 317 state = dirStateDefault 318 } else { 319 hasNonContent = true 320 } 321 } 322 } 323 324 if isBranch && !hasNonContent { 325 // This is a section or similar with no need for any bundle handling. 326 state = dirStateSinglesOnly 327 } 328 329 if state > dirStateDefault { 330 return c.handleNonBundle(dirname, files, state == dirStateSinglesOnly) 331 } 332 333 var fileInfos = make([]*fileInfo, 0, len(files)) 334 335 for i, fi := range files { 336 337 currentType := bundleNot 338 339 if !fi.IsDir() { 340 currentType = fileBundleTypes[i] 341 if bundleType == bundleNot && currentType != bundleNot { 342 bundleType = currentType 343 } 344 } 345 346 if bundleType == bundleNot && currentType != bundleNot { 347 bundleType = currentType 348 } 349 350 f, active := c.newFileInfo(fi, currentType) 351 352 if !active { 353 continue 354 } 355 356 fileInfos = append(fileInfos, f) 357 } 358 359 var todo []*fileInfo 360 361 if bundleType != bundleLeaf { 362 for _, fi := range fileInfos { 363 if fi.FileInfo().IsDir() { 364 // Handle potential nested bundles. 365 if err := c.handleNestedDir(fi.Path()); err != nil { 366 return err 367 } 368 } else if bundleType == bundleNot || (!fi.isOwner() && fi.isContentFile()) { 369 // Not in a bundle. 370 c.copyOrHandleSingle(fi) 371 } else { 372 // This is a section folder or similar with non-content files in it. 373 todo = append(todo, fi) 374 } 375 } 376 } else { 377 todo = fileInfos 378 } 379 380 if len(todo) == 0 { 381 return nil 382 } 383 384 dirs, err := c.createBundleDirs(todo, bundleType) 385 if err != nil { 386 return err 387 } 388 389 // Send the bundle to the next step in the processor chain. 390 c.handler.handleBundles(dirs) 391 392 return nil 393 } 394 395 func (c *capturer) handleNonBundle( 396 dirname string, 397 fileInfos pathLangFileFis, 398 singlesOnly bool) error { 399 400 for _, fi := range fileInfos { 401 if fi.IsDir() { 402 if err := c.handleNestedDir(fi.Filename()); err != nil { 403 return err 404 } 405 } else { 406 if singlesOnly { 407 f, active := c.newFileInfo(fi, bundleNot) 408 if !active { 409 continue 410 } 411 c.handler.handleSingles(f) 412 } else { 413 c.handler.handleCopyFiles(fi) 414 } 415 } 416 } 417 418 return nil 419 } 420 421 func (c *capturer) copyOrHandleSingle(fi *fileInfo) { 422 if fi.isContentFile() { 423 c.handler.handleSingles(fi) 424 } else { 425 // These do not currently need any further processing. 426 c.handler.handleCopyFiles(fi) 427 } 428 } 429 430 func (c *capturer) createBundleDirs(fileInfos []*fileInfo, bundleType bundleDirType) (*bundleDirs, error) { 431 dirs := newBundleDirs(bundleType, c) 432 433 for _, fi := range fileInfos { 434 if fi.FileInfo().IsDir() { 435 var collector func(fis ...*fileInfo) 436 437 if bundleType == bundleBranch { 438 // All files in the current directory are part of this bundle. 439 // Trying to include sub folders in these bundles are filled with ambiguity. 440 collector = func(fis ...*fileInfo) { 441 for _, fi := range fis { 442 c.copyOrHandleSingle(fi) 443 } 444 } 445 } else { 446 // All nested files and directories are part of this bundle. 447 collector = func(fis ...*fileInfo) { 448 fileInfos = append(fileInfos, fis...) 449 } 450 } 451 err := c.collectFiles(fi.Path(), collector) 452 if err != nil { 453 return nil, err 454 } 455 456 } else if fi.isOwner() { 457 // There can be more than one language, so: 458 // 1. Content files must be attached to its language's bundle. 459 // 2. Other files must be attached to all languages. 460 // 3. Every content file needs a bundle header. 461 dirs.addBundleHeader(fi) 462 } 463 } 464 465 for _, fi := range fileInfos { 466 if fi.FileInfo().IsDir() || fi.isOwner() { 467 continue 468 } 469 470 if fi.isContentFile() { 471 if bundleType != bundleBranch { 472 dirs.addBundleContentFile(fi) 473 } 474 } else { 475 dirs.addBundleFiles(fi) 476 } 477 } 478 479 return dirs, nil 480 } 481 482 func (c *capturer) collectFiles(dirname string, handleFiles func(fis ...*fileInfo)) error { 483 484 filesInDir, err := c.readDir(dirname) 485 if err != nil { 486 return err 487 } 488 489 for _, fi := range filesInDir { 490 if fi.IsDir() { 491 err := c.collectFiles(fi.Filename(), handleFiles) 492 if err != nil { 493 return err 494 } 495 } else { 496 f, active := c.newFileInfo(fi, bundleNot) 497 if active { 498 handleFiles(f) 499 } 500 } 501 } 502 503 return nil 504 } 505 506 func (c *capturer) readDir(dirname string) (pathLangFileFis, error) { 507 if c.sourceSpec.IgnoreFile(dirname) { 508 return nil, nil 509 } 510 511 dir, err := c.fs.Open(dirname) 512 if err != nil { 513 return nil, fmt.Errorf("readDir: %s", err) 514 } 515 defer dir.Close() 516 fis, err := dir.Readdir(-1) 517 if err != nil { 518 return nil, err 519 } 520 521 pfis := make(pathLangFileFis, 0, len(fis)) 522 523 for _, fi := range fis { 524 fip := fi.(pathLangFileFi) 525 526 if !c.sourceSpec.IgnoreFile(fip.Filename()) { 527 528 err := c.resolveRealPathIn(fip) 529 530 if err != nil { 531 // It may have been deleted in the meantime. 532 if err == errSkipCyclicDir || os.IsNotExist(err) { 533 continue 534 } 535 return nil, err 536 } 537 538 pfis = append(pfis, fip) 539 } 540 } 541 542 return pfis, nil 543 } 544 545 func (c *capturer) newFileInfo(fi pathLangFileFi, tp bundleDirType) (*fileInfo, bool) { 546 f := newFileInfo(c.sourceSpec, "", "", fi, tp) 547 return f, !f.disabled 548 } 549 550 type pathLangFile interface { 551 hugofs.LanguageAnnouncer 552 hugofs.FilePather 553 } 554 555 type pathLangFileFi interface { 556 os.FileInfo 557 pathLangFile 558 } 559 560 type pathLangFileFis []pathLangFileFi 561 562 type bundleDirs struct { 563 tp bundleDirType 564 // Maps languages to bundles. 565 bundles map[string]*bundleDir 566 567 // Keeps track of language overrides for non-content files, e.g. logo.en.png. 568 langOverrides map[string]bool 569 570 c *capturer 571 } 572 573 func newBundleDirs(tp bundleDirType, c *capturer) *bundleDirs { 574 return &bundleDirs{tp: tp, bundles: make(map[string]*bundleDir), langOverrides: make(map[string]bool), c: c} 575 } 576 577 type bundleDir struct { 578 tp bundleDirType 579 fi *fileInfo 580 581 resources map[string]*fileInfo 582 } 583 584 func (b bundleDir) clone() *bundleDir { 585 b.resources = make(map[string]*fileInfo) 586 fic := *b.fi 587 b.fi = &fic 588 return &b 589 } 590 591 func newBundleDir(fi *fileInfo, bundleType bundleDirType) *bundleDir { 592 return &bundleDir{fi: fi, tp: bundleType, resources: make(map[string]*fileInfo)} 593 } 594 595 func (b *bundleDirs) addBundleContentFile(fi *fileInfo) { 596 dir, found := b.bundles[fi.Lang()] 597 if !found { 598 // Every bundled content file needs a bundle header. 599 // If one does not exist in its language, we pick the default 600 // language version, or a random one if that doesn't exist, either. 601 tl := b.c.sourceSpec.DefaultContentLanguage 602 ldir, found := b.bundles[tl] 603 if !found { 604 // Just pick one. 605 for _, v := range b.bundles { 606 ldir = v 607 break 608 } 609 } 610 611 if ldir == nil { 612 panic(fmt.Sprintf("bundle not found for file %q", fi.Filename())) 613 } 614 615 dir = ldir.clone() 616 dir.fi.overriddenLang = fi.Lang() 617 b.bundles[fi.Lang()] = dir 618 } 619 620 dir.resources[fi.Path()] = fi 621 } 622 623 func (b *bundleDirs) addBundleFiles(fi *fileInfo) { 624 dir := filepath.ToSlash(fi.Dir()) 625 p := dir + fi.TranslationBaseName() + "." + fi.Ext() 626 for lang, bdir := range b.bundles { 627 key := path.Join(lang, p) 628 629 // Given mypage.de.md (German translation) and mypage.md we pick the most 630 // specific for that language. 631 if fi.Lang() == lang || !b.langOverrides[key] { 632 bdir.resources[key] = fi 633 } 634 b.langOverrides[key] = true 635 } 636 } 637 638 func (b *bundleDirs) addBundleHeader(fi *fileInfo) { 639 b.bundles[fi.Lang()] = newBundleDir(fi, b.tp) 640 } 641 642 func (c *capturer) isSeen(dirname string) bool { 643 c.seenMu.Lock() 644 defer c.seenMu.Unlock() 645 seen := c.seen[dirname] 646 c.seen[dirname] = true 647 if seen { 648 c.logger.WARN.Printf("Content dir %q already processed; skipped to avoid infinite recursion.", dirname) 649 return true 650 651 } 652 return false 653 } 654 655 func (c *capturer) resolveRealPath(path string) (pathLangFileFi, error) { 656 fileInfo, err := c.lstatIfPossible(path) 657 if err != nil { 658 return nil, err 659 } 660 return fileInfo, c.resolveRealPathIn(fileInfo) 661 } 662 663 func (c *capturer) resolveRealPathIn(fileInfo pathLangFileFi) error { 664 665 basePath := fileInfo.BaseDir() 666 path := fileInfo.Filename() 667 668 realPath := path 669 670 if fileInfo.Mode()&os.ModeSymlink == os.ModeSymlink { 671 link, err := filepath.EvalSymlinks(path) 672 if err != nil { 673 return fmt.Errorf("Cannot read symbolic link %q, error was: %s", path, err) 674 } 675 676 // This is a file on the outside of any base fs, so we have to use the os package. 677 sfi, err := os.Stat(link) 678 if err != nil { 679 return fmt.Errorf("Cannot stat %q, error was: %s", link, err) 680 } 681 682 // TODO(bep) improve all of this. 683 if a, ok := fileInfo.(*hugofs.LanguageFileInfo); ok { 684 a.FileInfo = sfi 685 } 686 687 realPath = link 688 689 if realPath != path && sfi.IsDir() && c.isSeen(realPath) { 690 // Avoid cyclic symlinks. 691 // Note that this may prevent some uses that isn't cyclic and also 692 // potential useful, but this implementation is both robust and simple: 693 // We stop at the first directory that we have seen before, e.g. 694 // /content/blog will only be processed once. 695 return errSkipCyclicDir 696 } 697 698 if c.contentChanges != nil { 699 // Keep track of symbolic links in watch mode. 700 var from, to string 701 if sfi.IsDir() { 702 from = realPath 703 to = path 704 705 if !strings.HasSuffix(to, helpers.FilePathSeparator) { 706 to = to + helpers.FilePathSeparator 707 } 708 if !strings.HasSuffix(from, helpers.FilePathSeparator) { 709 from = from + helpers.FilePathSeparator 710 } 711 712 if !strings.HasSuffix(basePath, helpers.FilePathSeparator) { 713 basePath = basePath + helpers.FilePathSeparator 714 } 715 716 if strings.HasPrefix(from, basePath) { 717 // With symbolic links inside /content we need to keep 718 // a reference to both. This may be confusing with --navigateToChanged 719 // but the user has chosen this him or herself. 720 c.contentChanges.addSymbolicLinkMapping(from, from) 721 } 722 723 } else { 724 from = realPath 725 to = path 726 } 727 728 c.contentChanges.addSymbolicLinkMapping(from, to) 729 } 730 } 731 732 return nil 733 } 734 735 func (c *capturer) lstatIfPossible(path string) (pathLangFileFi, error) { 736 fi, err := helpers.LstatIfPossible(c.fs, path) 737 if err != nil { 738 return nil, err 739 } 740 return fi.(pathLangFileFi), nil 741 }