github.com/linchen2chris/hugo@v0.0.0-20230307053224-cec209389705/hugolib/pages_capture.go (about)

     1  // Copyright 2019 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package hugolib
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	pth "path"
    20  	"path/filepath"
    21  	"reflect"
    22  
    23  	"github.com/gohugoio/hugo/common/herrors"
    24  	"github.com/gohugoio/hugo/common/maps"
    25  
    26  	"github.com/gohugoio/hugo/parser/pageparser"
    27  
    28  	"github.com/gohugoio/hugo/hugofs/files"
    29  
    30  	"github.com/gohugoio/hugo/source"
    31  
    32  	"github.com/gohugoio/hugo/common/loggers"
    33  	"github.com/gohugoio/hugo/hugofs"
    34  	"github.com/spf13/afero"
    35  )
    36  
    37  const (
    38  	walkIsRootFileMetaKey = "walkIsRootFileMetaKey"
    39  )
    40  
    41  func newPagesCollector(
    42  	sp *source.SourceSpec,
    43  	contentMap *pageMaps,
    44  	logger loggers.Logger,
    45  	contentTracker *contentChangeMap,
    46  	proc pagesCollectorProcessorProvider, filenames ...string) *pagesCollector {
    47  	return &pagesCollector{
    48  		fs:         sp.SourceFs,
    49  		contentMap: contentMap,
    50  		proc:       proc,
    51  		sp:         sp,
    52  		logger:     logger,
    53  		filenames:  filenames,
    54  		tracker:    contentTracker,
    55  	}
    56  }
    57  
    58  type contentDirKey struct {
    59  	dirname  string
    60  	filename string
    61  	tp       bundleDirType
    62  }
    63  
    64  type fileinfoBundle struct {
    65  	header    hugofs.FileMetaInfo
    66  	resources []hugofs.FileMetaInfo
    67  }
    68  
    69  func (b *fileinfoBundle) containsResource(name string) bool {
    70  	for _, r := range b.resources {
    71  		if r.Name() == name {
    72  			return true
    73  		}
    74  	}
    75  
    76  	return false
    77  }
    78  
    79  type pageBundles map[string]*fileinfoBundle
    80  
    81  type pagesCollector struct {
    82  	sp     *source.SourceSpec
    83  	fs     afero.Fs
    84  	logger loggers.Logger
    85  
    86  	contentMap *pageMaps
    87  
    88  	// Ordered list (bundle headers first) used in partial builds.
    89  	filenames []string
    90  
    91  	// Content files tracker used in partial builds.
    92  	tracker *contentChangeMap
    93  
    94  	proc pagesCollectorProcessorProvider
    95  }
    96  
    97  // isCascadingEdit returns whether the dir represents a cascading edit.
    98  // That is, if a front matter cascade section is removed, added or edited.
    99  // If this is the case we must re-evaluate its descendants.
   100  func (c *pagesCollector) isCascadingEdit(dir contentDirKey) (bool, string) {
   101  	// This is either a section or a taxonomy node. Find it.
   102  	prefix := cleanTreeKey(dir.dirname)
   103  
   104  	section := "/"
   105  	var isCascade bool
   106  
   107  	c.contentMap.walkBranchesPrefix(prefix, func(s string, n *contentNode) bool {
   108  		if n.fi == nil || dir.filename != n.fi.Meta().Filename {
   109  			return false
   110  		}
   111  
   112  		f, err := n.fi.Meta().Open()
   113  		if err != nil {
   114  			// File may have been removed, assume a cascading edit.
   115  			// Some false positives is not too bad.
   116  			isCascade = true
   117  			return true
   118  		}
   119  
   120  		pf, err := pageparser.ParseFrontMatterAndContent(f)
   121  		f.Close()
   122  		if err != nil {
   123  			isCascade = true
   124  			return true
   125  		}
   126  
   127  		if n.p == nil || n.p.bucket == nil {
   128  			return true
   129  		}
   130  
   131  		section = s
   132  
   133  		maps.PrepareParams(pf.FrontMatter)
   134  		cascade1, ok := pf.FrontMatter["cascade"]
   135  		hasCascade := n.p.bucket.cascade != nil && len(n.p.bucket.cascade) > 0
   136  		if !ok {
   137  			isCascade = hasCascade
   138  
   139  			return true
   140  		}
   141  
   142  		if !hasCascade {
   143  			isCascade = true
   144  			return true
   145  		}
   146  
   147  		for _, v := range n.p.bucket.cascade {
   148  			isCascade = !reflect.DeepEqual(cascade1, v)
   149  			if isCascade {
   150  				break
   151  			}
   152  		}
   153  
   154  		return true
   155  	})
   156  
   157  	return isCascade, section
   158  }
   159  
   160  // Collect.
   161  func (c *pagesCollector) Collect() (collectErr error) {
   162  	c.proc.Start(context.Background())
   163  	defer func() {
   164  		err := c.proc.Wait()
   165  		if collectErr == nil {
   166  			collectErr = err
   167  		}
   168  	}()
   169  
   170  	if len(c.filenames) == 0 {
   171  		// Collect everything.
   172  		collectErr = c.collectDir("", false, nil)
   173  	} else {
   174  		for _, pm := range c.contentMap.pmaps {
   175  			pm.cfg.isRebuild = true
   176  		}
   177  		dirs := make(map[contentDirKey]bool)
   178  		for _, filename := range c.filenames {
   179  			dir, btype := c.tracker.resolveAndRemove(filename)
   180  			dirs[contentDirKey{dir, filename, btype}] = true
   181  		}
   182  
   183  		for dir := range dirs {
   184  			for _, pm := range c.contentMap.pmaps {
   185  				pm.s.ResourceSpec.DeleteBySubstring(dir.dirname)
   186  			}
   187  
   188  			switch dir.tp {
   189  			case bundleLeaf:
   190  				collectErr = c.collectDir(dir.dirname, true, nil)
   191  			case bundleBranch:
   192  				isCascading, section := c.isCascadingEdit(dir)
   193  
   194  				if isCascading {
   195  					c.contentMap.deleteSection(section)
   196  				}
   197  				collectErr = c.collectDir(dir.dirname, !isCascading, nil)
   198  			default:
   199  				// We always start from a directory.
   200  				collectErr = c.collectDir(dir.dirname, true, func(fim hugofs.FileMetaInfo) bool {
   201  					return dir.filename == fim.Meta().Filename
   202  				})
   203  			}
   204  
   205  			if collectErr != nil {
   206  				break
   207  			}
   208  		}
   209  
   210  	}
   211  
   212  	return
   213  }
   214  
   215  func (c *pagesCollector) isBundleHeader(fi hugofs.FileMetaInfo) bool {
   216  	class := fi.Meta().Classifier
   217  	return class == files.ContentClassLeaf || class == files.ContentClassBranch
   218  }
   219  
   220  func (c *pagesCollector) getLang(fi hugofs.FileMetaInfo) string {
   221  	lang := fi.Meta().Lang
   222  	if lang != "" {
   223  		return lang
   224  	}
   225  
   226  	return c.sp.DefaultContentLanguage
   227  }
   228  
   229  func (c *pagesCollector) addToBundle(info hugofs.FileMetaInfo, btyp bundleDirType, bundles pageBundles) error {
   230  	getBundle := func(lang string) *fileinfoBundle {
   231  		return bundles[lang]
   232  	}
   233  
   234  	cloneBundle := func(lang string) *fileinfoBundle {
   235  		// Every bundled content file needs a content file header.
   236  		// Use the default content language if found, else just
   237  		// pick one.
   238  		var (
   239  			source *fileinfoBundle
   240  			found  bool
   241  		)
   242  
   243  		source, found = bundles[c.sp.DefaultContentLanguage]
   244  		if !found {
   245  			for _, b := range bundles {
   246  				source = b
   247  				break
   248  			}
   249  		}
   250  
   251  		if source == nil {
   252  			panic(fmt.Sprintf("no source found, %d", len(bundles)))
   253  		}
   254  
   255  		clone := c.cloneFileInfo(source.header)
   256  		clone.Meta().Lang = lang
   257  
   258  		return &fileinfoBundle{
   259  			header: clone,
   260  		}
   261  	}
   262  
   263  	lang := c.getLang(info)
   264  	bundle := getBundle(lang)
   265  	isBundleHeader := c.isBundleHeader(info)
   266  	if bundle != nil && isBundleHeader {
   267  		// index.md file inside a bundle, see issue 6208.
   268  		info.Meta().Classifier = files.ContentClassContent
   269  		isBundleHeader = false
   270  	}
   271  	classifier := info.Meta().Classifier
   272  	isContent := classifier == files.ContentClassContent
   273  	if bundle == nil {
   274  		if isBundleHeader {
   275  			bundle = &fileinfoBundle{header: info}
   276  			bundles[lang] = bundle
   277  		} else {
   278  			if btyp == bundleBranch {
   279  				// No special logic for branch bundles.
   280  				// Every language needs its own _index.md file.
   281  				// Also, we only clone bundle headers for lonesome, bundled,
   282  				// content files.
   283  				return c.handleFiles(info)
   284  			}
   285  
   286  			if isContent {
   287  				bundle = cloneBundle(lang)
   288  				bundles[lang] = bundle
   289  			}
   290  		}
   291  	}
   292  
   293  	if !isBundleHeader && bundle != nil {
   294  		bundle.resources = append(bundle.resources, info)
   295  	}
   296  
   297  	if classifier == files.ContentClassFile {
   298  		translations := info.Meta().Translations
   299  
   300  		for lang, b := range bundles {
   301  			if !stringSliceContains(lang, translations...) && !b.containsResource(info.Name()) {
   302  
   303  				// Clone and add it to the bundle.
   304  				clone := c.cloneFileInfo(info)
   305  				clone.Meta().Lang = lang
   306  				b.resources = append(b.resources, clone)
   307  			}
   308  		}
   309  	}
   310  
   311  	return nil
   312  }
   313  
   314  func (c *pagesCollector) cloneFileInfo(fi hugofs.FileMetaInfo) hugofs.FileMetaInfo {
   315  	return hugofs.NewFileMetaInfo(fi, hugofs.NewFileMeta())
   316  }
   317  
   318  func (c *pagesCollector) collectDir(dirname string, partial bool, inFilter func(fim hugofs.FileMetaInfo) bool) error {
   319  	fi, err := c.fs.Stat(dirname)
   320  	if err != nil {
   321  		if herrors.IsNotExist(err) {
   322  			// May have been deleted.
   323  			return nil
   324  		}
   325  		return err
   326  	}
   327  
   328  	handleDir := func(
   329  		btype bundleDirType,
   330  		dir hugofs.FileMetaInfo,
   331  		path string,
   332  		readdir []hugofs.FileMetaInfo) error {
   333  		if btype > bundleNot && c.tracker != nil {
   334  			c.tracker.add(path, btype)
   335  		}
   336  
   337  		if btype == bundleBranch {
   338  			if err := c.handleBundleBranch(readdir); err != nil {
   339  				return err
   340  			}
   341  			// A branch bundle is only this directory level, so keep walking.
   342  			return nil
   343  		} else if btype == bundleLeaf {
   344  			if err := c.handleBundleLeaf(dir, path, readdir); err != nil {
   345  				return err
   346  			}
   347  
   348  			return nil
   349  		}
   350  
   351  		if err := c.handleFiles(readdir...); err != nil {
   352  			return err
   353  		}
   354  
   355  		return nil
   356  	}
   357  
   358  	filter := func(fim hugofs.FileMetaInfo) bool {
   359  		if fim.Meta().SkipDir {
   360  			return false
   361  		}
   362  
   363  		if c.sp.IgnoreFile(fim.Meta().Filename) {
   364  			return false
   365  		}
   366  
   367  		if inFilter != nil {
   368  			return inFilter(fim)
   369  		}
   370  		return true
   371  	}
   372  
   373  	preHook := func(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) ([]hugofs.FileMetaInfo, error) {
   374  		var btype bundleDirType
   375  
   376  		filtered := readdir[:0]
   377  		for _, fi := range readdir {
   378  			if filter(fi) {
   379  				filtered = append(filtered, fi)
   380  
   381  				if c.tracker != nil {
   382  					// Track symlinks.
   383  					c.tracker.addSymbolicLinkMapping(fi)
   384  				}
   385  			}
   386  		}
   387  		walkRoot := dir.Meta().IsRootFile
   388  		readdir = filtered
   389  
   390  		// We merge language directories, so there can be duplicates, but they
   391  		// will be ordered, most important first.
   392  		var duplicates []int
   393  		seen := make(map[string]bool)
   394  
   395  		for i, fi := range readdir {
   396  
   397  			if fi.IsDir() {
   398  				continue
   399  			}
   400  
   401  			meta := fi.Meta()
   402  			meta.IsRootFile = walkRoot
   403  			class := meta.Classifier
   404  			translationBase := meta.TranslationBaseNameWithExt
   405  			key := pth.Join(meta.Lang, translationBase)
   406  
   407  			if seen[key] {
   408  				duplicates = append(duplicates, i)
   409  				continue
   410  			}
   411  			seen[key] = true
   412  
   413  			var thisBtype bundleDirType
   414  
   415  			switch class {
   416  			case files.ContentClassLeaf:
   417  				thisBtype = bundleLeaf
   418  			case files.ContentClassBranch:
   419  				thisBtype = bundleBranch
   420  			}
   421  
   422  			// Folders with both index.md and _index.md type of files have
   423  			// undefined behaviour and can never work.
   424  			// The branch variant will win because of sort order, but log
   425  			// a warning about it.
   426  			if thisBtype > bundleNot && btype > bundleNot && thisBtype != btype {
   427  				c.logger.Warnf("Content directory %q have both index.* and _index.* files, pick one.", dir.Meta().Filename)
   428  				// Reclassify it so it will be handled as a content file inside the
   429  				// section, which is in line with the <= 0.55 behaviour.
   430  				meta.Classifier = files.ContentClassContent
   431  			} else if thisBtype > bundleNot {
   432  				btype = thisBtype
   433  			}
   434  
   435  		}
   436  
   437  		if len(duplicates) > 0 {
   438  			for i := len(duplicates) - 1; i >= 0; i-- {
   439  				idx := duplicates[i]
   440  				readdir = append(readdir[:idx], readdir[idx+1:]...)
   441  			}
   442  		}
   443  
   444  		err := handleDir(btype, dir, path, readdir)
   445  		if err != nil {
   446  			return nil, err
   447  		}
   448  
   449  		if btype == bundleLeaf || partial {
   450  			return nil, filepath.SkipDir
   451  		}
   452  
   453  		// Keep walking.
   454  		return readdir, nil
   455  	}
   456  
   457  	var postHook hugofs.WalkHook
   458  	if c.tracker != nil {
   459  		postHook = func(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) ([]hugofs.FileMetaInfo, error) {
   460  			if c.tracker == nil {
   461  				// Nothing to do.
   462  				return readdir, nil
   463  			}
   464  
   465  			return readdir, nil
   466  		}
   467  	}
   468  
   469  	wfn := func(path string, info hugofs.FileMetaInfo, err error) error {
   470  		if err != nil {
   471  			return err
   472  		}
   473  
   474  		return nil
   475  	}
   476  
   477  	fim := fi.(hugofs.FileMetaInfo)
   478  	// Make sure the pages in this directory gets re-rendered,
   479  	// even in fast render mode.
   480  	fim.Meta().IsRootFile = true
   481  
   482  	w := hugofs.NewWalkway(hugofs.WalkwayConfig{
   483  		Fs:       c.fs,
   484  		Logger:   c.logger,
   485  		Root:     dirname,
   486  		Info:     fim,
   487  		HookPre:  preHook,
   488  		HookPost: postHook,
   489  		WalkFn:   wfn,
   490  	})
   491  
   492  	return w.Walk()
   493  }
   494  
   495  func (c *pagesCollector) handleBundleBranch(readdir []hugofs.FileMetaInfo) error {
   496  	// Maps bundles to its language.
   497  	bundles := pageBundles{}
   498  
   499  	var contentFiles []hugofs.FileMetaInfo
   500  
   501  	for _, fim := range readdir {
   502  
   503  		if fim.IsDir() {
   504  			continue
   505  		}
   506  
   507  		meta := fim.Meta()
   508  
   509  		switch meta.Classifier {
   510  		case files.ContentClassContent:
   511  			contentFiles = append(contentFiles, fim)
   512  		default:
   513  			if err := c.addToBundle(fim, bundleBranch, bundles); err != nil {
   514  				return err
   515  			}
   516  		}
   517  
   518  	}
   519  
   520  	// Make sure the section is created before its pages.
   521  	if err := c.proc.Process(bundles); err != nil {
   522  		return err
   523  	}
   524  
   525  	return c.handleFiles(contentFiles...)
   526  }
   527  
   528  func (c *pagesCollector) handleBundleLeaf(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) error {
   529  	// Maps bundles to its language.
   530  	bundles := pageBundles{}
   531  
   532  	walk := func(path string, info hugofs.FileMetaInfo, err error) error {
   533  		if err != nil {
   534  			return err
   535  		}
   536  		if info.IsDir() {
   537  			return nil
   538  		}
   539  
   540  		return c.addToBundle(info, bundleLeaf, bundles)
   541  	}
   542  
   543  	// Start a new walker from the given path.
   544  	w := hugofs.NewWalkway(hugofs.WalkwayConfig{
   545  		Root:       path,
   546  		Fs:         c.fs,
   547  		Logger:     c.logger,
   548  		Info:       dir,
   549  		DirEntries: readdir,
   550  		WalkFn:     walk,
   551  	})
   552  
   553  	if err := w.Walk(); err != nil {
   554  		return err
   555  	}
   556  
   557  	return c.proc.Process(bundles)
   558  }
   559  
   560  func (c *pagesCollector) handleFiles(fis ...hugofs.FileMetaInfo) error {
   561  	for _, fi := range fis {
   562  		if fi.IsDir() {
   563  			continue
   564  		}
   565  
   566  		if err := c.proc.Process(fi); err != nil {
   567  			return err
   568  		}
   569  	}
   570  	return nil
   571  }
   572  
   573  func stringSliceContains(k string, values ...string) bool {
   574  	for _, v := range values {
   575  		if k == v {
   576  			return true
   577  		}
   578  	}
   579  	return false
   580  }