github.com/neohugo/neohugo@v0.123.8/hugolib/pages_capture.go (about)

     1  // Copyright 2021 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package hugolib
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"os"
    20  	"path/filepath"
    21  	"strings"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"github.com/bep/logg"
    27  	"github.com/neohugo/neohugo/common/hstrings"
    28  	"github.com/neohugo/neohugo/common/paths"
    29  	"github.com/neohugo/neohugo/common/rungroup"
    30  	"github.com/spf13/afero"
    31  
    32  	"github.com/neohugo/neohugo/source"
    33  
    34  	"github.com/neohugo/neohugo/common/loggers"
    35  	"github.com/neohugo/neohugo/hugofs"
    36  )
    37  
    38  func newPagesCollector(
    39  	ctx context.Context,
    40  	h *HugoSites,
    41  	sp *source.SourceSpec,
    42  	logger loggers.Logger,
    43  	infoLogger logg.LevelLogger,
    44  	m *pageMap,
    45  	ids []pathChange,
    46  ) *pagesCollector {
    47  	return &pagesCollector{
    48  		ctx:        ctx,
    49  		h:          h,
    50  		fs:         sp.BaseFs.Content.Fs,
    51  		m:          m,
    52  		sp:         sp,
    53  		logger:     logger,
    54  		infoLogger: infoLogger,
    55  		ids:        ids,
    56  		seenDirs:   make(map[string]bool),
    57  	}
    58  }
    59  
    60  type pagesCollector struct {
    61  	ctx        context.Context
    62  	h          *HugoSites
    63  	sp         *source.SourceSpec
    64  	logger     loggers.Logger
    65  	infoLogger logg.LevelLogger
    66  
    67  	m *pageMap
    68  
    69  	fs afero.Fs
    70  
    71  	// List of paths that have changed. Used in partial builds.
    72  	ids      []pathChange
    73  	seenDirs map[string]bool
    74  
    75  	g rungroup.Group[hugofs.FileMetaInfo]
    76  }
    77  
    78  // Collect collects content by walking the file system and storing
    79  // it in the content tree.
    80  // It may be restricted by filenames set on the collector (partial build).
    81  func (c *pagesCollector) Collect() (collectErr error) {
    82  	var (
    83  		numWorkers             = c.h.numWorkers
    84  		numFilesProcessedTotal atomic.Uint64
    85  		numFilesProcessedLast  uint64
    86  		fileBatchTimer         = time.Now()
    87  		fileBatchTimerMu       sync.Mutex
    88  	)
    89  
    90  	l := c.infoLogger.WithField("substep", "collect")
    91  
    92  	logFilesProcessed := func(force bool) {
    93  		fileBatchTimerMu.Lock()
    94  		if force || time.Since(fileBatchTimer) > 3*time.Second {
    95  			numFilesProcessedBatch := numFilesProcessedTotal.Load() - numFilesProcessedLast
    96  			numFilesProcessedLast = numFilesProcessedTotal.Load()
    97  			loggers.TimeTrackf(l, fileBatchTimer,
    98  				logg.Fields{
    99  					logg.Field{Name: "files", Value: numFilesProcessedBatch},
   100  					logg.Field{Name: "files_total", Value: numFilesProcessedTotal.Load()},
   101  				},
   102  				"",
   103  			)
   104  			fileBatchTimer = time.Now()
   105  		}
   106  		fileBatchTimerMu.Unlock()
   107  	}
   108  
   109  	defer func() {
   110  		logFilesProcessed(true)
   111  	}()
   112  
   113  	c.g = rungroup.Run[hugofs.FileMetaInfo](c.ctx, rungroup.Config[hugofs.FileMetaInfo]{
   114  		NumWorkers: numWorkers,
   115  		Handle: func(ctx context.Context, fi hugofs.FileMetaInfo) error {
   116  			if err := c.m.AddFi(fi); err != nil {
   117  				return hugofs.AddFileInfoToError(err, fi, c.fs)
   118  			}
   119  			numFilesProcessedTotal.Add(1)
   120  			if numFilesProcessedTotal.Load()%1000 == 0 {
   121  				logFilesProcessed(false)
   122  			}
   123  			return nil
   124  		},
   125  	})
   126  
   127  	if c.ids == nil {
   128  		// Collect everything.
   129  		collectErr = c.collectDir(nil, false, nil)
   130  	} else {
   131  		for _, s := range c.h.Sites {
   132  			s.pageMap.cfg.isRebuild = true
   133  		}
   134  
   135  		for _, id := range c.ids {
   136  			if id.p.IsLeafBundle() {
   137  				collectErr = c.collectDir(
   138  					id.p,
   139  					false,
   140  					func(fim hugofs.FileMetaInfo) bool {
   141  						return true
   142  					},
   143  				)
   144  			} else if id.p.IsBranchBundle() {
   145  				collectErr = c.collectDir(
   146  					id.p,
   147  					false,
   148  					func(fim hugofs.FileMetaInfo) bool {
   149  						if fim.IsDir() {
   150  							return true
   151  						}
   152  						fimp := fim.Meta().PathInfo
   153  						if fimp == nil {
   154  							return false
   155  						}
   156  
   157  						return strings.HasPrefix(fimp.Path(), paths.AddTrailingSlash(id.p.Dir()))
   158  					},
   159  				)
   160  			} else {
   161  				// We always start from a directory.
   162  				collectErr = c.collectDir(id.p, id.isDir, func(fim hugofs.FileMetaInfo) bool {
   163  					if id.delete || id.isDir {
   164  						if id.isDir {
   165  							return strings.HasPrefix(fim.Meta().PathInfo.Path(), paths.AddTrailingSlash(id.p.Path()))
   166  						}
   167  
   168  						return id.p.Dir() == fim.Meta().PathInfo.Dir()
   169  					}
   170  
   171  					if fim.Meta().PathInfo.IsLeafBundle() && id.p.BundleType() == paths.PathTypeContentSingle {
   172  						return id.p.Dir() == fim.Meta().PathInfo.Dir()
   173  					}
   174  
   175  					return id.p.Path() == fim.Meta().PathInfo.Path()
   176  				})
   177  			}
   178  
   179  			if collectErr != nil {
   180  				break
   181  			}
   182  		}
   183  
   184  	}
   185  
   186  	werr := c.g.Wait()
   187  	if collectErr == nil {
   188  		collectErr = werr
   189  	}
   190  
   191  	return
   192  }
   193  
   194  func (c *pagesCollector) collectDir(dirPath *paths.Path, isDir bool, inFilter func(fim hugofs.FileMetaInfo) bool) error {
   195  	var dpath string
   196  	if dirPath != nil {
   197  		if isDir {
   198  			dpath = filepath.FromSlash(dirPath.Unnormalized().Path())
   199  		} else {
   200  			dpath = filepath.FromSlash(dirPath.Unnormalized().Dir())
   201  		}
   202  	}
   203  
   204  	if c.seenDirs[dpath] {
   205  		return nil
   206  	}
   207  	c.seenDirs[dpath] = true
   208  
   209  	root, err := c.fs.Stat(dpath)
   210  	if err != nil {
   211  		if os.IsNotExist(err) {
   212  			return nil
   213  		}
   214  		return err
   215  	}
   216  
   217  	rootm := root.(hugofs.FileMetaInfo)
   218  
   219  	if err := c.collectDirDir(dpath, rootm, inFilter); err != nil {
   220  		return err
   221  	}
   222  
   223  	return nil
   224  }
   225  
   226  func (c *pagesCollector) collectDirDir(path string, root hugofs.FileMetaInfo, inFilter func(fim hugofs.FileMetaInfo) bool) error {
   227  	filter := func(fim hugofs.FileMetaInfo) bool {
   228  		if inFilter != nil {
   229  			return inFilter(fim)
   230  		}
   231  		return true
   232  	}
   233  
   234  	preHook := func(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) ([]hugofs.FileMetaInfo, error) {
   235  		filtered := readdir[:0]
   236  		for _, fi := range readdir {
   237  			if filter(fi) {
   238  				filtered = append(filtered, fi)
   239  			}
   240  		}
   241  		readdir = filtered
   242  		if len(readdir) == 0 {
   243  			return nil, nil
   244  		}
   245  
   246  		// Pick the first regular file.
   247  		var first hugofs.FileMetaInfo
   248  		for _, fi := range readdir {
   249  			if fi.IsDir() {
   250  				continue
   251  			}
   252  			first = fi
   253  			break
   254  		}
   255  
   256  		if first == nil {
   257  			// Only dirs, keep walking.
   258  			return readdir, nil
   259  		}
   260  
   261  		// Any bundle file will always be first.
   262  		firstPi := first.Meta().PathInfo
   263  		if firstPi == nil {
   264  			panic(fmt.Sprintf("collectDirDir: no path info for %q", first.Meta().Filename))
   265  		}
   266  
   267  		if firstPi.IsLeafBundle() {
   268  			if err := c.handleBundleLeaf(dir, first, path, readdir); err != nil {
   269  				return nil, err
   270  			}
   271  			return nil, filepath.SkipDir
   272  		}
   273  
   274  		seen := map[hstrings.Tuple]bool{}
   275  		for _, fi := range readdir {
   276  			if fi.IsDir() {
   277  				continue
   278  			}
   279  
   280  			pi := fi.Meta().PathInfo
   281  			meta := fi.Meta()
   282  
   283  			// Filter out duplicate page or resource.
   284  			// These would eventually have been filtered out as duplicates when
   285  			// inserting them into the document store,
   286  			// but doing it here will preserve a consistent ordering.
   287  			baseLang := hstrings.Tuple{First: pi.Base(), Second: meta.Lang}
   288  			if seen[baseLang] {
   289  				continue
   290  			}
   291  			seen[baseLang] = true
   292  
   293  			if pi == nil {
   294  				panic(fmt.Sprintf("no path info for %q", meta.Filename))
   295  			}
   296  
   297  			if meta.Lang == "" {
   298  				panic("lang not set")
   299  			}
   300  
   301  			if err := c.g.Enqueue(fi); err != nil {
   302  				return nil, err
   303  			}
   304  		}
   305  
   306  		// Keep walking.
   307  		return readdir, nil
   308  	}
   309  
   310  	var postHook hugofs.WalkHook
   311  
   312  	wfn := func(path string, fi hugofs.FileMetaInfo) error {
   313  		return nil
   314  	}
   315  
   316  	w := hugofs.NewWalkway(
   317  		hugofs.WalkwayConfig{
   318  			Logger:     c.logger,
   319  			Root:       path,
   320  			Info:       root,
   321  			Fs:         c.fs,
   322  			IgnoreFile: c.h.SourceSpec.IgnoreFile,
   323  			HookPre:    preHook,
   324  			HookPost:   postHook,
   325  			WalkFn:     wfn,
   326  		})
   327  
   328  	return w.Walk()
   329  }
   330  
   331  func (c *pagesCollector) handleBundleLeaf(dir, bundle hugofs.FileMetaInfo, inPath string, readdir []hugofs.FileMetaInfo) error {
   332  	bundlePi := bundle.Meta().PathInfo
   333  	seen := map[hstrings.Tuple]bool{}
   334  
   335  	walk := func(path string, info hugofs.FileMetaInfo) error {
   336  		if info.IsDir() {
   337  			return nil
   338  		}
   339  
   340  		pi := info.Meta().PathInfo
   341  
   342  		if info != bundle {
   343  			// Everything inside a leaf bundle is a Resource,
   344  			// even the content pages.
   345  			// Note that we do allow index.md as page resources, but not in the bundle root.
   346  			if !pi.IsLeafBundle() || pi.Dir() != bundlePi.Dir() {
   347  				paths.ModifyPathBundleTypeResource(pi)
   348  			}
   349  		}
   350  
   351  		// Filter out duplicate page or resource.
   352  		// These would eventually have been filtered out as duplicates when
   353  		// inserting them into the document store,
   354  		// but doing it here will preserve a consistent ordering.
   355  		baseLang := hstrings.Tuple{First: pi.Base(), Second: info.Meta().Lang}
   356  		if seen[baseLang] {
   357  			return nil
   358  		}
   359  		seen[baseLang] = true
   360  
   361  		return c.g.Enqueue(info)
   362  	}
   363  
   364  	// Start a new walker from the given path.
   365  	w := hugofs.NewWalkway(
   366  		hugofs.WalkwayConfig{
   367  			Root:       inPath,
   368  			Fs:         c.fs,
   369  			Logger:     c.logger,
   370  			Info:       dir,
   371  			DirEntries: readdir,
   372  			IgnoreFile: c.h.SourceSpec.IgnoreFile,
   373  			WalkFn:     walk,
   374  		})
   375  
   376  	return w.Walk()
   377  }