github.com/olliephillips/hugo@v0.42.2/hugolib/page_bundler_capture.go (about)

     1  // Copyright 2017-present The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package hugolib
    15  
    16  import (
    17  	"errors"
    18  	"fmt"
    19  	"os"
    20  	"path"
    21  	"path/filepath"
    22  	"runtime"
    23  	"strings"
    24  	"sync"
    25  
    26  	"github.com/spf13/afero"
    27  
    28  	"github.com/gohugoio/hugo/hugofs"
    29  
    30  	"github.com/gohugoio/hugo/helpers"
    31  
    32  	"golang.org/x/sync/errgroup"
    33  
    34  	"github.com/gohugoio/hugo/source"
    35  	jww "github.com/spf13/jwalterweatherman"
    36  )
    37  
    38  var errSkipCyclicDir = errors.New("skip potential cyclic dir")
    39  
    40  type capturer struct {
    41  	// To prevent symbolic link cycles: Visit same folder only once.
    42  	seen   map[string]bool
    43  	seenMu sync.Mutex
    44  
    45  	handler captureResultHandler
    46  
    47  	sourceSpec *source.SourceSpec
    48  	fs         afero.Fs
    49  	logger     *jww.Notepad
    50  
    51  	// Filenames limits the content to process to a list of filenames/directories.
    52  	// This is used for partial building in server mode.
    53  	filenames []string
    54  
    55  	// Used to determine how to handle content changes in server mode.
    56  	contentChanges *contentChangeMap
    57  
    58  	// Semaphore used to throttle the concurrent sub directory handling.
    59  	sem chan bool
    60  }
    61  
    62  func newCapturer(
    63  	logger *jww.Notepad,
    64  	sourceSpec *source.SourceSpec,
    65  	handler captureResultHandler,
    66  	contentChanges *contentChangeMap,
    67  	filenames ...string) *capturer {
    68  
    69  	numWorkers := 4
    70  	if n := runtime.NumCPU(); n > numWorkers {
    71  		numWorkers = n
    72  	}
    73  
    74  	c := &capturer{
    75  		sem:            make(chan bool, numWorkers),
    76  		handler:        handler,
    77  		sourceSpec:     sourceSpec,
    78  		fs:             sourceSpec.SourceFs,
    79  		logger:         logger,
    80  		contentChanges: contentChanges,
    81  		seen:           make(map[string]bool),
    82  		filenames:      filenames}
    83  
    84  	return c
    85  }
    86  
    87  // Captured files and bundles ready to be processed will be passed on to
    88  // these channels.
    89  type captureResultHandler interface {
    90  	handleSingles(fis ...*fileInfo)
    91  	handleCopyFiles(fis ...pathLangFile)
    92  	captureBundlesHandler
    93  }
    94  
    95  type captureBundlesHandler interface {
    96  	handleBundles(b *bundleDirs)
    97  }
    98  
    99  type captureResultHandlerChain struct {
   100  	handlers []captureBundlesHandler
   101  }
   102  
   103  func (c *captureResultHandlerChain) handleSingles(fis ...*fileInfo) {
   104  	for _, h := range c.handlers {
   105  		if hh, ok := h.(captureResultHandler); ok {
   106  			hh.handleSingles(fis...)
   107  		}
   108  	}
   109  }
   110  func (c *captureResultHandlerChain) handleBundles(b *bundleDirs) {
   111  	for _, h := range c.handlers {
   112  		h.handleBundles(b)
   113  	}
   114  }
   115  
   116  func (c *captureResultHandlerChain) handleCopyFiles(files ...pathLangFile) {
   117  	for _, h := range c.handlers {
   118  		if hh, ok := h.(captureResultHandler); ok {
   119  			hh.handleCopyFiles(files...)
   120  		}
   121  	}
   122  }
   123  
   124  func (c *capturer) capturePartial(filenames ...string) error {
   125  	handled := make(map[string]bool)
   126  
   127  	for _, filename := range filenames {
   128  		dir, resolvedFilename, tp := c.contentChanges.resolveAndRemove(filename)
   129  		if handled[resolvedFilename] {
   130  			continue
   131  		}
   132  
   133  		handled[resolvedFilename] = true
   134  
   135  		switch tp {
   136  		case bundleLeaf:
   137  			if err := c.handleDir(resolvedFilename); err != nil {
   138  				return err
   139  			}
   140  		case bundleBranch:
   141  			if err := c.handleBranchDir(resolvedFilename); err != nil {
   142  				return err
   143  			}
   144  		default:
   145  			fi, err := c.resolveRealPath(resolvedFilename)
   146  			if os.IsNotExist(err) {
   147  				// File has been deleted.
   148  				continue
   149  			}
   150  
   151  			// Just in case the owning dir is a new symlink -- this will
   152  			// create the proper mapping for it.
   153  			c.resolveRealPath(dir)
   154  
   155  			f, active := c.newFileInfo(fi, tp)
   156  			if active {
   157  				c.copyOrHandleSingle(f)
   158  			}
   159  		}
   160  	}
   161  
   162  	return nil
   163  }
   164  
   165  func (c *capturer) capture() error {
   166  	if len(c.filenames) > 0 {
   167  		return c.capturePartial(c.filenames...)
   168  	}
   169  
   170  	err := c.handleDir(helpers.FilePathSeparator)
   171  	if err != nil {
   172  		return err
   173  	}
   174  
   175  	return nil
   176  }
   177  
   178  func (c *capturer) handleNestedDir(dirname string) error {
   179  	select {
   180  	case c.sem <- true:
   181  		var g errgroup.Group
   182  
   183  		g.Go(func() error {
   184  			defer func() {
   185  				<-c.sem
   186  			}()
   187  			return c.handleDir(dirname)
   188  		})
   189  		return g.Wait()
   190  	default:
   191  		// For deeply nested file trees, waiting for a semaphore wil deadlock.
   192  		return c.handleDir(dirname)
   193  	}
   194  }
   195  
   196  // This handles a bundle branch and its resources only. This is used
   197  // in server mode on changes. If this dir does not (anymore) represent a bundle
   198  // branch, the handling is upgraded to the full handleDir method.
   199  func (c *capturer) handleBranchDir(dirname string) error {
   200  	files, err := c.readDir(dirname)
   201  	if err != nil {
   202  
   203  		return err
   204  	}
   205  
   206  	var (
   207  		dirType bundleDirType
   208  	)
   209  
   210  	for _, fi := range files {
   211  		if !fi.IsDir() {
   212  			tp, _ := classifyBundledFile(fi.RealName())
   213  			if dirType == bundleNot {
   214  				dirType = tp
   215  			}
   216  
   217  			if dirType == bundleLeaf {
   218  				return c.handleDir(dirname)
   219  			}
   220  		}
   221  	}
   222  
   223  	if dirType != bundleBranch {
   224  		return c.handleDir(dirname)
   225  	}
   226  
   227  	dirs := newBundleDirs(bundleBranch, c)
   228  
   229  	var secondPass []*fileInfo
   230  
   231  	// Handle potential bundle headers first.
   232  	for _, fi := range files {
   233  		if fi.IsDir() {
   234  			continue
   235  		}
   236  
   237  		tp, isContent := classifyBundledFile(fi.RealName())
   238  
   239  		f, active := c.newFileInfo(fi, tp)
   240  
   241  		if !active {
   242  			continue
   243  		}
   244  
   245  		if !f.isOwner() {
   246  			if !isContent {
   247  				// This is a partial update -- we only care about the files that
   248  				// is in this bundle.
   249  				secondPass = append(secondPass, f)
   250  			}
   251  			continue
   252  		}
   253  		dirs.addBundleHeader(f)
   254  	}
   255  
   256  	for _, f := range secondPass {
   257  		dirs.addBundleFiles(f)
   258  	}
   259  
   260  	c.handler.handleBundles(dirs)
   261  
   262  	return nil
   263  
   264  }
   265  
   266  func (c *capturer) handleDir(dirname string) error {
   267  
   268  	files, err := c.readDir(dirname)
   269  	if err != nil {
   270  		return err
   271  	}
   272  
   273  	type dirState int
   274  
   275  	const (
   276  		dirStateDefault dirState = iota
   277  
   278  		dirStateAssetsOnly
   279  		dirStateSinglesOnly
   280  	)
   281  
   282  	var (
   283  		fileBundleTypes = make([]bundleDirType, len(files))
   284  
   285  		// Start with the assumption that this dir contains only non-content assets (images etc.)
   286  		// If that is still true after we had a first look at the list of files, we
   287  		// can just copy the files to destination. We will still have to look at the
   288  		// sub-folders for potential bundles.
   289  		state = dirStateAssetsOnly
   290  
   291  		// Start with the assumption that this dir is not a bundle.
   292  		// A directory is a bundle if it contains a index content file,
   293  		// e.g. index.md (a leaf bundle) or a _index.md (a branch bundle).
   294  		bundleType = bundleNot
   295  	)
   296  
   297  	/* First check for any content files.
   298  	- If there are none, then this is a assets folder only (images etc.)
   299  	and we can just plainly copy them to
   300  	destination.
   301  	- If this is a section with no image etc. or similar, we can just handle it
   302  	as it was a single content file.
   303  	*/
   304  	var hasNonContent, isBranch bool
   305  
   306  	for i, fi := range files {
   307  		if !fi.IsDir() {
   308  			tp, isContent := classifyBundledFile(fi.RealName())
   309  
   310  			fileBundleTypes[i] = tp
   311  			if !isBranch {
   312  				isBranch = tp == bundleBranch
   313  			}
   314  
   315  			if isContent {
   316  				// This is not a assets-only folder.
   317  				state = dirStateDefault
   318  			} else {
   319  				hasNonContent = true
   320  			}
   321  		}
   322  	}
   323  
   324  	if isBranch && !hasNonContent {
   325  		// This is a section or similar with no need for any bundle handling.
   326  		state = dirStateSinglesOnly
   327  	}
   328  
   329  	if state > dirStateDefault {
   330  		return c.handleNonBundle(dirname, files, state == dirStateSinglesOnly)
   331  	}
   332  
   333  	var fileInfos = make([]*fileInfo, 0, len(files))
   334  
   335  	for i, fi := range files {
   336  
   337  		currentType := bundleNot
   338  
   339  		if !fi.IsDir() {
   340  			currentType = fileBundleTypes[i]
   341  			if bundleType == bundleNot && currentType != bundleNot {
   342  				bundleType = currentType
   343  			}
   344  		}
   345  
   346  		if bundleType == bundleNot && currentType != bundleNot {
   347  			bundleType = currentType
   348  		}
   349  
   350  		f, active := c.newFileInfo(fi, currentType)
   351  
   352  		if !active {
   353  			continue
   354  		}
   355  
   356  		fileInfos = append(fileInfos, f)
   357  	}
   358  
   359  	var todo []*fileInfo
   360  
   361  	if bundleType != bundleLeaf {
   362  		for _, fi := range fileInfos {
   363  			if fi.FileInfo().IsDir() {
   364  				// Handle potential nested bundles.
   365  				if err := c.handleNestedDir(fi.Path()); err != nil {
   366  					return err
   367  				}
   368  			} else if bundleType == bundleNot || (!fi.isOwner() && fi.isContentFile()) {
   369  				// Not in a bundle.
   370  				c.copyOrHandleSingle(fi)
   371  			} else {
   372  				// This is a section folder or similar with non-content files in it.
   373  				todo = append(todo, fi)
   374  			}
   375  		}
   376  	} else {
   377  		todo = fileInfos
   378  	}
   379  
   380  	if len(todo) == 0 {
   381  		return nil
   382  	}
   383  
   384  	dirs, err := c.createBundleDirs(todo, bundleType)
   385  	if err != nil {
   386  		return err
   387  	}
   388  
   389  	// Send the bundle to the next step in the processor chain.
   390  	c.handler.handleBundles(dirs)
   391  
   392  	return nil
   393  }
   394  
   395  func (c *capturer) handleNonBundle(
   396  	dirname string,
   397  	fileInfos pathLangFileFis,
   398  	singlesOnly bool) error {
   399  
   400  	for _, fi := range fileInfos {
   401  		if fi.IsDir() {
   402  			if err := c.handleNestedDir(fi.Filename()); err != nil {
   403  				return err
   404  			}
   405  		} else {
   406  			if singlesOnly {
   407  				f, active := c.newFileInfo(fi, bundleNot)
   408  				if !active {
   409  					continue
   410  				}
   411  				c.handler.handleSingles(f)
   412  			} else {
   413  				c.handler.handleCopyFiles(fi)
   414  			}
   415  		}
   416  	}
   417  
   418  	return nil
   419  }
   420  
   421  func (c *capturer) copyOrHandleSingle(fi *fileInfo) {
   422  	if fi.isContentFile() {
   423  		c.handler.handleSingles(fi)
   424  	} else {
   425  		// These do not currently need any further processing.
   426  		c.handler.handleCopyFiles(fi)
   427  	}
   428  }
   429  
   430  func (c *capturer) createBundleDirs(fileInfos []*fileInfo, bundleType bundleDirType) (*bundleDirs, error) {
   431  	dirs := newBundleDirs(bundleType, c)
   432  
   433  	for _, fi := range fileInfos {
   434  		if fi.FileInfo().IsDir() {
   435  			var collector func(fis ...*fileInfo)
   436  
   437  			if bundleType == bundleBranch {
   438  				// All files in the current directory are part of this bundle.
   439  				// Trying to include sub folders in these bundles are filled with ambiguity.
   440  				collector = func(fis ...*fileInfo) {
   441  					for _, fi := range fis {
   442  						c.copyOrHandleSingle(fi)
   443  					}
   444  				}
   445  			} else {
   446  				// All nested files and directories are part of this bundle.
   447  				collector = func(fis ...*fileInfo) {
   448  					fileInfos = append(fileInfos, fis...)
   449  				}
   450  			}
   451  			err := c.collectFiles(fi.Path(), collector)
   452  			if err != nil {
   453  				return nil, err
   454  			}
   455  
   456  		} else if fi.isOwner() {
   457  			// There can be more than one language, so:
   458  			// 1. Content files must be attached to its language's bundle.
   459  			// 2. Other files must be attached to all languages.
   460  			// 3. Every content file needs a bundle header.
   461  			dirs.addBundleHeader(fi)
   462  		}
   463  	}
   464  
   465  	for _, fi := range fileInfos {
   466  		if fi.FileInfo().IsDir() || fi.isOwner() {
   467  			continue
   468  		}
   469  
   470  		if fi.isContentFile() {
   471  			if bundleType != bundleBranch {
   472  				dirs.addBundleContentFile(fi)
   473  			}
   474  		} else {
   475  			dirs.addBundleFiles(fi)
   476  		}
   477  	}
   478  
   479  	return dirs, nil
   480  }
   481  
   482  func (c *capturer) collectFiles(dirname string, handleFiles func(fis ...*fileInfo)) error {
   483  
   484  	filesInDir, err := c.readDir(dirname)
   485  	if err != nil {
   486  		return err
   487  	}
   488  
   489  	for _, fi := range filesInDir {
   490  		if fi.IsDir() {
   491  			err := c.collectFiles(fi.Filename(), handleFiles)
   492  			if err != nil {
   493  				return err
   494  			}
   495  		} else {
   496  			f, active := c.newFileInfo(fi, bundleNot)
   497  			if active {
   498  				handleFiles(f)
   499  			}
   500  		}
   501  	}
   502  
   503  	return nil
   504  }
   505  
   506  func (c *capturer) readDir(dirname string) (pathLangFileFis, error) {
   507  	if c.sourceSpec.IgnoreFile(dirname) {
   508  		return nil, nil
   509  	}
   510  
   511  	dir, err := c.fs.Open(dirname)
   512  	if err != nil {
   513  		return nil, fmt.Errorf("readDir: %s", err)
   514  	}
   515  	defer dir.Close()
   516  	fis, err := dir.Readdir(-1)
   517  	if err != nil {
   518  		return nil, err
   519  	}
   520  
   521  	pfis := make(pathLangFileFis, 0, len(fis))
   522  
   523  	for _, fi := range fis {
   524  		fip := fi.(pathLangFileFi)
   525  
   526  		if !c.sourceSpec.IgnoreFile(fip.Filename()) {
   527  
   528  			err := c.resolveRealPathIn(fip)
   529  
   530  			if err != nil {
   531  				// It may have been deleted in the meantime.
   532  				if err == errSkipCyclicDir || os.IsNotExist(err) {
   533  					continue
   534  				}
   535  				return nil, err
   536  			}
   537  
   538  			pfis = append(pfis, fip)
   539  		}
   540  	}
   541  
   542  	return pfis, nil
   543  }
   544  
   545  func (c *capturer) newFileInfo(fi pathLangFileFi, tp bundleDirType) (*fileInfo, bool) {
   546  	f := newFileInfo(c.sourceSpec, "", "", fi, tp)
   547  	return f, !f.disabled
   548  }
   549  
   550  type pathLangFile interface {
   551  	hugofs.LanguageAnnouncer
   552  	hugofs.FilePather
   553  }
   554  
   555  type pathLangFileFi interface {
   556  	os.FileInfo
   557  	pathLangFile
   558  }
   559  
   560  type pathLangFileFis []pathLangFileFi
   561  
   562  type bundleDirs struct {
   563  	tp bundleDirType
   564  	// Maps languages to bundles.
   565  	bundles map[string]*bundleDir
   566  
   567  	// Keeps track of language overrides for non-content files, e.g. logo.en.png.
   568  	langOverrides map[string]bool
   569  
   570  	c *capturer
   571  }
   572  
   573  func newBundleDirs(tp bundleDirType, c *capturer) *bundleDirs {
   574  	return &bundleDirs{tp: tp, bundles: make(map[string]*bundleDir), langOverrides: make(map[string]bool), c: c}
   575  }
   576  
   577  type bundleDir struct {
   578  	tp bundleDirType
   579  	fi *fileInfo
   580  
   581  	resources map[string]*fileInfo
   582  }
   583  
   584  func (b bundleDir) clone() *bundleDir {
   585  	b.resources = make(map[string]*fileInfo)
   586  	fic := *b.fi
   587  	b.fi = &fic
   588  	return &b
   589  }
   590  
   591  func newBundleDir(fi *fileInfo, bundleType bundleDirType) *bundleDir {
   592  	return &bundleDir{fi: fi, tp: bundleType, resources: make(map[string]*fileInfo)}
   593  }
   594  
   595  func (b *bundleDirs) addBundleContentFile(fi *fileInfo) {
   596  	dir, found := b.bundles[fi.Lang()]
   597  	if !found {
   598  		// Every bundled content file needs a bundle header.
   599  		// If one does not exist in its language, we pick the default
   600  		// language version, or a random one if that doesn't exist, either.
   601  		tl := b.c.sourceSpec.DefaultContentLanguage
   602  		ldir, found := b.bundles[tl]
   603  		if !found {
   604  			// Just pick one.
   605  			for _, v := range b.bundles {
   606  				ldir = v
   607  				break
   608  			}
   609  		}
   610  
   611  		if ldir == nil {
   612  			panic(fmt.Sprintf("bundle not found for file %q", fi.Filename()))
   613  		}
   614  
   615  		dir = ldir.clone()
   616  		dir.fi.overriddenLang = fi.Lang()
   617  		b.bundles[fi.Lang()] = dir
   618  	}
   619  
   620  	dir.resources[fi.Path()] = fi
   621  }
   622  
   623  func (b *bundleDirs) addBundleFiles(fi *fileInfo) {
   624  	dir := filepath.ToSlash(fi.Dir())
   625  	p := dir + fi.TranslationBaseName() + "." + fi.Ext()
   626  	for lang, bdir := range b.bundles {
   627  		key := path.Join(lang, p)
   628  
   629  		// Given mypage.de.md (German translation) and mypage.md we pick the most
   630  		// specific for that language.
   631  		if fi.Lang() == lang || !b.langOverrides[key] {
   632  			bdir.resources[key] = fi
   633  		}
   634  		b.langOverrides[key] = true
   635  	}
   636  }
   637  
   638  func (b *bundleDirs) addBundleHeader(fi *fileInfo) {
   639  	b.bundles[fi.Lang()] = newBundleDir(fi, b.tp)
   640  }
   641  
   642  func (c *capturer) isSeen(dirname string) bool {
   643  	c.seenMu.Lock()
   644  	defer c.seenMu.Unlock()
   645  	seen := c.seen[dirname]
   646  	c.seen[dirname] = true
   647  	if seen {
   648  		c.logger.WARN.Printf("Content dir %q already processed; skipped to avoid infinite recursion.", dirname)
   649  		return true
   650  
   651  	}
   652  	return false
   653  }
   654  
   655  func (c *capturer) resolveRealPath(path string) (pathLangFileFi, error) {
   656  	fileInfo, err := c.lstatIfPossible(path)
   657  	if err != nil {
   658  		return nil, err
   659  	}
   660  	return fileInfo, c.resolveRealPathIn(fileInfo)
   661  }
   662  
   663  func (c *capturer) resolveRealPathIn(fileInfo pathLangFileFi) error {
   664  
   665  	basePath := fileInfo.BaseDir()
   666  	path := fileInfo.Filename()
   667  
   668  	realPath := path
   669  
   670  	if fileInfo.Mode()&os.ModeSymlink == os.ModeSymlink {
   671  		link, err := filepath.EvalSymlinks(path)
   672  		if err != nil {
   673  			return fmt.Errorf("Cannot read symbolic link %q, error was: %s", path, err)
   674  		}
   675  
   676  		// This is a file on the outside of any base fs, so we have to use the os package.
   677  		sfi, err := os.Stat(link)
   678  		if err != nil {
   679  			return fmt.Errorf("Cannot stat  %q, error was: %s", link, err)
   680  		}
   681  
   682  		// TODO(bep) improve all of this.
   683  		if a, ok := fileInfo.(*hugofs.LanguageFileInfo); ok {
   684  			a.FileInfo = sfi
   685  		}
   686  
   687  		realPath = link
   688  
   689  		if realPath != path && sfi.IsDir() && c.isSeen(realPath) {
   690  			// Avoid cyclic symlinks.
   691  			// Note that this may prevent some uses that isn't cyclic and also
   692  			// potential useful, but this implementation is both robust and simple:
   693  			// We stop at the first directory that we have seen before, e.g.
   694  			// /content/blog will only be processed once.
   695  			return errSkipCyclicDir
   696  		}
   697  
   698  		if c.contentChanges != nil {
   699  			// Keep track of symbolic links in watch mode.
   700  			var from, to string
   701  			if sfi.IsDir() {
   702  				from = realPath
   703  				to = path
   704  
   705  				if !strings.HasSuffix(to, helpers.FilePathSeparator) {
   706  					to = to + helpers.FilePathSeparator
   707  				}
   708  				if !strings.HasSuffix(from, helpers.FilePathSeparator) {
   709  					from = from + helpers.FilePathSeparator
   710  				}
   711  
   712  				if !strings.HasSuffix(basePath, helpers.FilePathSeparator) {
   713  					basePath = basePath + helpers.FilePathSeparator
   714  				}
   715  
   716  				if strings.HasPrefix(from, basePath) {
   717  					// With symbolic links inside /content we need to keep
   718  					// a reference to both. This may be confusing with --navigateToChanged
   719  					// but the user has chosen this him or herself.
   720  					c.contentChanges.addSymbolicLinkMapping(from, from)
   721  				}
   722  
   723  			} else {
   724  				from = realPath
   725  				to = path
   726  			}
   727  
   728  			c.contentChanges.addSymbolicLinkMapping(from, to)
   729  		}
   730  	}
   731  
   732  	return nil
   733  }
   734  
   735  func (c *capturer) lstatIfPossible(path string) (pathLangFileFi, error) {
   736  	fi, err := helpers.LstatIfPossible(c.fs, path)
   737  	if err != nil {
   738  		return nil, err
   739  	}
   740  	return fi.(pathLangFileFi), nil
   741  }