github.com/neohugo/neohugo@v0.123.8/hugolib/hugo_sites.go (about)

     1  // Copyright 2024 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package hugolib
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"io"
    20  	"strings"
    21  	"sync"
    22  	"sync/atomic"
    23  
    24  	"github.com/bep/logg"
    25  	"github.com/neohugo/neohugo/cache/dynacache"
    26  	"github.com/neohugo/neohugo/config/allconfig"
    27  	"github.com/neohugo/neohugo/hugofs/glob"
    28  	"github.com/neohugo/neohugo/hugolib/doctree"
    29  	"github.com/neohugo/neohugo/resources"
    30  
    31  	"github.com/fsnotify/fsnotify"
    32  
    33  	"github.com/neohugo/neohugo/output"
    34  	"github.com/neohugo/neohugo/parser/metadecoders"
    35  
    36  	"github.com/neohugo/neohugo/common/maps"
    37  	"github.com/neohugo/neohugo/common/neohugo"
    38  	"github.com/neohugo/neohugo/common/para"
    39  	"github.com/neohugo/neohugo/common/types"
    40  	"github.com/neohugo/neohugo/hugofs"
    41  
    42  	"github.com/neohugo/neohugo/source"
    43  
    44  	"github.com/neohugo/neohugo/common/herrors"
    45  	"github.com/neohugo/neohugo/deps"
    46  	"github.com/neohugo/neohugo/helpers"
    47  	"github.com/neohugo/neohugo/lazy"
    48  
    49  	"github.com/neohugo/neohugo/resources/page"
    50  )
    51  
    52  // HugoSites represents the sites to build. Each site represents a language.
    53  type HugoSites struct {
    54  	Sites []*Site
    55  
    56  	Configs *allconfig.Configs
    57  
    58  	hugoInfo neohugo.HugoInfo
    59  
    60  	// Render output formats for all sites.
    61  	renderFormats output.Formats
    62  
    63  	// The currently rendered Site.
    64  	currentSite *Site
    65  
    66  	*deps.Deps
    67  
    68  	gitInfo       *gitInfo
    69  	codeownerInfo *codeownerInfo
    70  
    71  	// As loaded from the /data dirs
    72  	data map[string]any
    73  
    74  	// Cache for page listings.
    75  	cachePages *dynacache.Partition[string, page.Pages]
    76  	// Cache for content sources.
    77  	cacheContentSource *dynacache.Partition[string, *resources.StaleValue[[]byte]]
    78  
    79  	// Before Hugo 0.122.0 we managed all translations in a map using a translationKey
    80  	// that could be overridden in front matter.
    81  	// Now the different page dimensions (e.g. language) are built-in to the page trees above.
    82  	// But we sill need to support the overridden translationKey, but that should
    83  	// be relatively rare and low volume.
    84  	translationKeyPages *maps.SliceCache[page.Page]
    85  
    86  	pageTrees *pageTrees
    87  
    88  	postRenderInit sync.Once
    89  
    90  	// File change events with filename stored in this map will be skipped.
    91  	skipRebuildForFilenamesMu sync.Mutex
    92  	skipRebuildForFilenames   map[string]bool
    93  
    94  	init *hugoSitesInit
    95  
    96  	workersSite     *para.Workers
    97  	numWorkersSites int
    98  	numWorkers      int
    99  
   100  	*fatalErrorHandler
   101  	*buildCounters
   102  	// Tracks invocations of the Build method.
   103  	buildCounter atomic.Uint64
   104  }
   105  
   106  // ShouldSkipFileChangeEvent allows skipping filesystem event early before
   107  // the build is started.
   108  func (h *HugoSites) ShouldSkipFileChangeEvent(ev fsnotify.Event) bool {
   109  	h.skipRebuildForFilenamesMu.Lock()
   110  	defer h.skipRebuildForFilenamesMu.Unlock()
   111  	return h.skipRebuildForFilenames[ev.Name]
   112  }
   113  
   114  // Only used in tests.
   115  type buildCounters struct {
   116  	contentRenderCounter atomic.Uint64
   117  	pageRenderCounter    atomic.Uint64
   118  }
   119  
   120  func (c *buildCounters) loggFields() logg.Fields {
   121  	return logg.Fields{
   122  		{Name: "pages", Value: c.pageRenderCounter.Load()},
   123  		{Name: "content", Value: c.contentRenderCounter.Load()},
   124  	}
   125  }
   126  
   127  type fatalErrorHandler struct {
   128  	mu sync.Mutex
   129  
   130  	h *HugoSites
   131  
   132  	err error
   133  
   134  	done  bool
   135  	donec chan bool // will be closed when done
   136  }
   137  
   138  // FatalError error is used in some rare situations where it does not make sense to
   139  // continue processing, to abort as soon as possible and log the error.
   140  func (f *fatalErrorHandler) FatalError(err error) {
   141  	f.mu.Lock()
   142  	defer f.mu.Unlock()
   143  	if !f.done {
   144  		f.done = true
   145  		close(f.donec)
   146  	}
   147  	f.err = err
   148  }
   149  
   150  func (f *fatalErrorHandler) getErr() error {
   151  	f.mu.Lock()
   152  	defer f.mu.Unlock()
   153  	return f.err
   154  }
   155  
   156  func (f *fatalErrorHandler) Done() <-chan bool {
   157  	return f.donec
   158  }
   159  
   160  type hugoSitesInit struct {
   161  	// Loads the data from all of the /data folders.
   162  	data *lazy.Init
   163  
   164  	// Performs late initialization (before render) of the templates.
   165  	layouts *lazy.Init
   166  
   167  	// Loads the Git info and CODEOWNERS for all the pages if enabled.
   168  	gitInfo *lazy.Init
   169  }
   170  
   171  func (h *HugoSites) Data() map[string]any {
   172  	if _, err := h.init.data.Do(context.Background()); err != nil {
   173  		h.SendError(fmt.Errorf("failed to load data: %w", err))
   174  		return nil
   175  	}
   176  	return h.data
   177  }
   178  
   179  // Pages returns all pages for all sites.
   180  func (h *HugoSites) Pages() page.Pages {
   181  	key := "pages"
   182  	v, err := h.cachePages.GetOrCreate(key, func(string) (page.Pages, error) {
   183  		var pages page.Pages
   184  		for _, s := range h.Sites {
   185  			pages = append(pages, s.Pages()...)
   186  		}
   187  		page.SortByDefault(pages)
   188  		return pages, nil
   189  	})
   190  	if err != nil {
   191  		panic(err)
   192  	}
   193  	return v
   194  }
   195  
   196  // Pages returns all regularpages for all sites.
   197  func (h *HugoSites) RegularPages() page.Pages {
   198  	key := "regular-pages"
   199  	v, err := h.cachePages.GetOrCreate(key, func(string) (page.Pages, error) {
   200  		var pages page.Pages
   201  		for _, s := range h.Sites {
   202  			pages = append(pages, s.RegularPages()...)
   203  		}
   204  		page.SortByDefault(pages)
   205  
   206  		return pages, nil
   207  	})
   208  	if err != nil {
   209  		panic(err)
   210  	}
   211  	return v
   212  }
   213  
   214  func (h *HugoSites) gitInfoForPage(p page.Page) (source.GitInfo, error) {
   215  	if _, err := h.init.gitInfo.Do(context.Background()); err != nil {
   216  		return source.GitInfo{}, err
   217  	}
   218  
   219  	if h.gitInfo == nil {
   220  		return source.GitInfo{}, nil
   221  	}
   222  
   223  	return h.gitInfo.forPage(p), nil
   224  }
   225  
   226  func (h *HugoSites) codeownersForPage(p page.Page) ([]string, error) {
   227  	if _, err := h.init.gitInfo.Do(context.Background()); err != nil {
   228  		return nil, err
   229  	}
   230  
   231  	if h.codeownerInfo == nil {
   232  		return nil, nil
   233  	}
   234  
   235  	return h.codeownerInfo.forPage(p), nil
   236  }
   237  
   238  func (h *HugoSites) pickOneAndLogTheRest(errors []error) error {
   239  	if len(errors) == 0 {
   240  		return nil
   241  	}
   242  
   243  	var i int
   244  
   245  	for j, err := range errors {
   246  		// If this is in server mode, we want to return an error to the client
   247  		// with a file context, if possible.
   248  		if herrors.UnwrapFileError(err) != nil {
   249  			i = j
   250  			break
   251  		}
   252  	}
   253  
   254  	// Log the rest, but add a threshold to avoid flooding the log.
   255  	const errLogThreshold = 5
   256  
   257  	for j, err := range errors {
   258  		if j == i || err == nil {
   259  			continue
   260  		}
   261  
   262  		if j >= errLogThreshold {
   263  			break
   264  		}
   265  
   266  		h.Log.Errorln(err)
   267  	}
   268  
   269  	return errors[i]
   270  }
   271  
   272  func (h *HugoSites) isMultiLingual() bool {
   273  	return len(h.Sites) > 1
   274  }
   275  
   276  // TODO(bep) consolidate
   277  func (h *HugoSites) LanguageSet() map[string]int {
   278  	set := make(map[string]int)
   279  	for i, s := range h.Sites {
   280  		set[s.language.Lang] = i
   281  	}
   282  	return set
   283  }
   284  
   285  func (h *HugoSites) NumLogErrors() int {
   286  	if h == nil {
   287  		return 0
   288  	}
   289  	return h.Log.LoggCount(logg.LevelError)
   290  }
   291  
   292  func (h *HugoSites) PrintProcessingStats(w io.Writer) {
   293  	stats := make([]*helpers.ProcessingStats, len(h.Sites))
   294  	for i := 0; i < len(h.Sites); i++ {
   295  		stats[i] = h.Sites[i].PathSpec.ProcessingStats
   296  	}
   297  	helpers.ProcessingStatsTable(w, stats...)
   298  }
   299  
   300  // GetContentPage finds a Page with content given the absolute filename.
   301  // Returns nil if none found.
   302  func (h *HugoSites) GetContentPage(filename string) page.Page {
   303  	var p page.Page
   304  
   305  	h.withPage(func(s string, p2 *pageState) bool {
   306  		if p2.File() == nil {
   307  			return false
   308  		}
   309  
   310  		if p2.File().FileInfo().Meta().Filename == filename {
   311  			p = p2
   312  			return true
   313  		}
   314  
   315  		for _, r := range p2.Resources().ByType(pageResourceType) {
   316  			p3 := r.(page.Page)
   317  			if p3.File() != nil && p3.File().FileInfo().Meta().Filename == filename {
   318  				p = p3
   319  				return true
   320  			}
   321  		}
   322  
   323  		return false
   324  	})
   325  
   326  	return p
   327  }
   328  
   329  func (h *HugoSites) loadGitInfo() error {
   330  	if h.Configs.Base.EnableGitInfo {
   331  		gi, err := newGitInfo(h.Conf)
   332  		if err != nil {
   333  			h.Log.Errorln("Failed to read Git log:", err)
   334  		} else {
   335  			h.gitInfo = gi
   336  		}
   337  
   338  		co, err := newCodeOwners(h.Configs.LoadingInfo.BaseConfig.WorkingDir)
   339  		if err != nil {
   340  			h.Log.Errorln("Failed to read CODEOWNERS:", err)
   341  		} else {
   342  			h.codeownerInfo = co
   343  		}
   344  	}
   345  	return nil
   346  }
   347  
   348  // Reset resets the sites and template caches etc., making it ready for a full rebuild.
   349  func (h *HugoSites) reset(config *BuildCfg) {
   350  	h.fatalErrorHandler = &fatalErrorHandler{
   351  		h:     h,
   352  		donec: make(chan bool),
   353  	}
   354  }
   355  
   356  // resetLogs resets the log counters etc. Used to do a new build on the same sites.
   357  func (h *HugoSites) resetLogs() {
   358  	h.Log.Reset()
   359  	for _, s := range h.Sites {
   360  		s.Deps.Log.Reset()
   361  	}
   362  }
   363  
   364  func (h *HugoSites) withSite(fn func(s *Site) error) error {
   365  	for _, s := range h.Sites {
   366  		if err := fn(s); err != nil {
   367  			return err
   368  		}
   369  	}
   370  	return nil
   371  }
   372  
   373  func (h *HugoSites) withPage(fn func(s string, p *pageState) bool) {
   374  	// nolint
   375  	h.withSite(func(s *Site) error {
   376  		w := &doctree.NodeShiftTreeWalker[contentNodeI]{
   377  			Tree:     s.pageMap.treePages,
   378  			LockType: doctree.LockTypeRead,
   379  			Handle: func(s string, n contentNodeI, match doctree.DimensionFlag) (bool, error) {
   380  				return fn(s, n.(*pageState)), nil
   381  			},
   382  		}
   383  		return w.Walk(context.Background())
   384  	})
   385  }
   386  
   387  // BuildCfg holds build options used to, as an example, skip the render step.
   388  type BuildCfg struct {
   389  	// Skip rendering. Useful for testing.
   390  	SkipRender bool
   391  	// Use this to indicate what changed (for rebuilds).
   392  	whatChanged *whatChanged
   393  
   394  	// This is a partial re-render of some selected pages.
   395  	PartialReRender bool
   396  
   397  	// Set in server mode when the last build failed for some reason.
   398  	ErrRecovery bool
   399  
   400  	// Recently visited URLs. This is used for partial re-rendering.
   401  	RecentlyVisited *types.EvictingStringQueue
   402  
   403  	// Can be set to build only with a sub set of the content source.
   404  	ContentInclusionFilter *glob.FilenameFilter
   405  
   406  	// Set when the buildlock is already acquired (e.g. the archetype content builder).
   407  	NoBuildLock bool
   408  
   409  	testCounters *buildCounters
   410  }
   411  
   412  // shouldRender returns whether this output format should be rendered or not.
   413  func (cfg *BuildCfg) shouldRender(p *pageState) bool {
   414  	if !p.renderOnce {
   415  		return true
   416  	}
   417  
   418  	// The render state is incremented on render and reset when a related change is detected.
   419  	// Note that this is set per output format.
   420  	shouldRender := p.renderState == 0
   421  
   422  	if !shouldRender {
   423  		return false
   424  	}
   425  
   426  	fastRenderMode := p.s.Conf.FastRenderMode()
   427  
   428  	if !fastRenderMode || p.s.h.buildCounter.Load() == 0 {
   429  		return shouldRender
   430  	}
   431  
   432  	if !p.render {
   433  		// Not be to rendered for this output format.
   434  		return false
   435  	}
   436  
   437  	if p.outputFormat().IsHTML {
   438  		// This is fast render mode and the output format is HTML,
   439  		// rerender if this page is one of the recently visited.
   440  		return cfg.RecentlyVisited.Contains(p.RelPermalink())
   441  	}
   442  
   443  	// In fast render mode, we want to avoid re-rendering the sitemaps etc. and
   444  	// other big listings whenever we e.g. change a content file,
   445  	// but we want partial renders of the recently visited pages to also include
   446  	// alternative formats of the same HTML page (e.g. RSS, JSON).
   447  	for _, po := range p.pageOutputs {
   448  		if po.render && po.f.IsHTML && cfg.RecentlyVisited.Contains(po.RelPermalink()) {
   449  			return true
   450  		}
   451  	}
   452  
   453  	return false
   454  }
   455  
   456  func (s *Site) preparePagesForRender(isRenderingSite bool, idx int) error {
   457  	var err error
   458  
   459  	initPage := func(p *pageState) error {
   460  		if err = p.shiftToOutputFormat(isRenderingSite, idx); err != nil {
   461  			return err
   462  		}
   463  		return nil
   464  	}
   465  
   466  	return s.pageMap.forEeachPageIncludingBundledPages(nil,
   467  		func(p *pageState) (bool, error) {
   468  			return false, initPage(p)
   469  		},
   470  	)
   471  }
   472  
   473  func (h *HugoSites) loadData() error {
   474  	h.data = make(map[string]any)
   475  	w := hugofs.NewWalkway(
   476  		hugofs.WalkwayConfig{
   477  			Fs:         h.PathSpec.BaseFs.Data.Fs,
   478  			IgnoreFile: h.SourceSpec.IgnoreFile,
   479  			WalkFn: func(path string, fi hugofs.FileMetaInfo) error {
   480  				if fi.IsDir() {
   481  					return nil
   482  				}
   483  				pi := fi.Meta().PathInfo
   484  				if pi == nil {
   485  					panic("no path info")
   486  				}
   487  				return h.handleDataFile(source.NewFileInfo(fi))
   488  			},
   489  		})
   490  
   491  	if err := w.Walk(); err != nil {
   492  		return err
   493  	}
   494  	return nil
   495  }
   496  
   497  func (h *HugoSites) handleDataFile(r *source.File) error {
   498  	var current map[string]any
   499  
   500  	f, err := r.FileInfo().Meta().Open()
   501  	if err != nil {
   502  		return fmt.Errorf("data: failed to open %q: %w", r.LogicalName(), err)
   503  	}
   504  	defer f.Close()
   505  
   506  	// Crawl in data tree to insert data
   507  	current = h.data
   508  	dataPath := r.FileInfo().Meta().PathInfo.Unnormalized().Dir()[1:]
   509  	keyParts := strings.Split(dataPath, "/")
   510  
   511  	for _, key := range keyParts {
   512  		if key != "" {
   513  			if _, ok := current[key]; !ok {
   514  				current[key] = make(map[string]any)
   515  			}
   516  			current = current[key].(map[string]any)
   517  		}
   518  	}
   519  
   520  	data, err := h.readData(r)
   521  	if err != nil {
   522  		return h.errWithFileContext(err, r)
   523  	}
   524  
   525  	if data == nil {
   526  		return nil
   527  	}
   528  
   529  	// filepath.Walk walks the files in lexical order, '/' comes before '.'
   530  	higherPrecedentData := current[r.BaseFileName()]
   531  
   532  	switch data.(type) {
   533  	case nil:
   534  	case map[string]any:
   535  
   536  		switch higherPrecedentData.(type) {
   537  		case nil:
   538  			current[r.BaseFileName()] = data
   539  		case map[string]any:
   540  			// merge maps: insert entries from data for keys that
   541  			// don't already exist in higherPrecedentData
   542  			higherPrecedentMap := higherPrecedentData.(map[string]any)
   543  			for key, value := range data.(map[string]any) {
   544  				if _, exists := higherPrecedentMap[key]; exists {
   545  					// this warning could happen if
   546  					// 1. A theme uses the same key; the main data folder wins
   547  					// 2. A sub folder uses the same key: the sub folder wins
   548  					// TODO(bep) figure out a way to detect 2) above and make that a WARN
   549  					h.Log.Infof("Data for key '%s' in path '%s' is overridden by higher precedence data already in the data tree", key, r.Path())
   550  				} else {
   551  					higherPrecedentMap[key] = value
   552  				}
   553  			}
   554  		default:
   555  			// can't merge: higherPrecedentData is not a map
   556  			h.Log.Warnf("The %T data from '%s' overridden by "+
   557  				"higher precedence %T data already in the data tree", data, r.Path(), higherPrecedentData)
   558  		}
   559  
   560  	case []any:
   561  		if higherPrecedentData == nil {
   562  			current[r.BaseFileName()] = data
   563  		} else {
   564  			// we don't merge array data
   565  			h.Log.Warnf("The %T data from '%s' overridden by "+
   566  				"higher precedence %T data already in the data tree", data, r.Path(), higherPrecedentData)
   567  		}
   568  
   569  	default:
   570  		h.Log.Errorf("unexpected data type %T in file %s", data, r.LogicalName())
   571  	}
   572  
   573  	return nil
   574  }
   575  
   576  func (h *HugoSites) errWithFileContext(err error, f *source.File) error {
   577  	realFilename := f.FileInfo().Meta().Filename
   578  	return herrors.NewFileErrorFromFile(err, realFilename, h.Fs.Source, nil)
   579  }
   580  
   581  func (h *HugoSites) readData(f *source.File) (any, error) {
   582  	file, err := f.FileInfo().Meta().Open()
   583  	if err != nil {
   584  		return nil, fmt.Errorf("readData: failed to open data file: %w", err)
   585  	}
   586  	defer file.Close()
   587  	content := helpers.ReaderToBytes(file)
   588  
   589  	format := metadecoders.FormatFromString(f.Ext())
   590  	return metadecoders.Default.Unmarshal(content, format)
   591  }