github.com/neohugo/neohugo@v0.123.8/hugolib/pages_capture.go (about) 1 // Copyright 2021 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package hugolib 15 16 import ( 17 "context" 18 "fmt" 19 "os" 20 "path/filepath" 21 "strings" 22 "sync" 23 "sync/atomic" 24 "time" 25 26 "github.com/bep/logg" 27 "github.com/neohugo/neohugo/common/hstrings" 28 "github.com/neohugo/neohugo/common/paths" 29 "github.com/neohugo/neohugo/common/rungroup" 30 "github.com/spf13/afero" 31 32 "github.com/neohugo/neohugo/source" 33 34 "github.com/neohugo/neohugo/common/loggers" 35 "github.com/neohugo/neohugo/hugofs" 36 ) 37 38 func newPagesCollector( 39 ctx context.Context, 40 h *HugoSites, 41 sp *source.SourceSpec, 42 logger loggers.Logger, 43 infoLogger logg.LevelLogger, 44 m *pageMap, 45 ids []pathChange, 46 ) *pagesCollector { 47 return &pagesCollector{ 48 ctx: ctx, 49 h: h, 50 fs: sp.BaseFs.Content.Fs, 51 m: m, 52 sp: sp, 53 logger: logger, 54 infoLogger: infoLogger, 55 ids: ids, 56 seenDirs: make(map[string]bool), 57 } 58 } 59 60 type pagesCollector struct { 61 ctx context.Context 62 h *HugoSites 63 sp *source.SourceSpec 64 logger loggers.Logger 65 infoLogger logg.LevelLogger 66 67 m *pageMap 68 69 fs afero.Fs 70 71 // List of paths that have changed. Used in partial builds. 72 ids []pathChange 73 seenDirs map[string]bool 74 75 g rungroup.Group[hugofs.FileMetaInfo] 76 } 77 78 // Collect collects content by walking the file system and storing 79 // it in the content tree. 80 // It may be restricted by filenames set on the collector (partial build). 81 func (c *pagesCollector) Collect() (collectErr error) { 82 var ( 83 numWorkers = c.h.numWorkers 84 numFilesProcessedTotal atomic.Uint64 85 numFilesProcessedLast uint64 86 fileBatchTimer = time.Now() 87 fileBatchTimerMu sync.Mutex 88 ) 89 90 l := c.infoLogger.WithField("substep", "collect") 91 92 logFilesProcessed := func(force bool) { 93 fileBatchTimerMu.Lock() 94 if force || time.Since(fileBatchTimer) > 3*time.Second { 95 numFilesProcessedBatch := numFilesProcessedTotal.Load() - numFilesProcessedLast 96 numFilesProcessedLast = numFilesProcessedTotal.Load() 97 loggers.TimeTrackf(l, fileBatchTimer, 98 logg.Fields{ 99 logg.Field{Name: "files", Value: numFilesProcessedBatch}, 100 logg.Field{Name: "files_total", Value: numFilesProcessedTotal.Load()}, 101 }, 102 "", 103 ) 104 fileBatchTimer = time.Now() 105 } 106 fileBatchTimerMu.Unlock() 107 } 108 109 defer func() { 110 logFilesProcessed(true) 111 }() 112 113 c.g = rungroup.Run[hugofs.FileMetaInfo](c.ctx, rungroup.Config[hugofs.FileMetaInfo]{ 114 NumWorkers: numWorkers, 115 Handle: func(ctx context.Context, fi hugofs.FileMetaInfo) error { 116 if err := c.m.AddFi(fi); err != nil { 117 return hugofs.AddFileInfoToError(err, fi, c.fs) 118 } 119 numFilesProcessedTotal.Add(1) 120 if numFilesProcessedTotal.Load()%1000 == 0 { 121 logFilesProcessed(false) 122 } 123 return nil 124 }, 125 }) 126 127 if c.ids == nil { 128 // Collect everything. 129 collectErr = c.collectDir(nil, false, nil) 130 } else { 131 for _, s := range c.h.Sites { 132 s.pageMap.cfg.isRebuild = true 133 } 134 135 for _, id := range c.ids { 136 if id.p.IsLeafBundle() { 137 collectErr = c.collectDir( 138 id.p, 139 false, 140 func(fim hugofs.FileMetaInfo) bool { 141 return true 142 }, 143 ) 144 } else if id.p.IsBranchBundle() { 145 collectErr = c.collectDir( 146 id.p, 147 false, 148 func(fim hugofs.FileMetaInfo) bool { 149 if fim.IsDir() { 150 return true 151 } 152 fimp := fim.Meta().PathInfo 153 if fimp == nil { 154 return false 155 } 156 157 return strings.HasPrefix(fimp.Path(), paths.AddTrailingSlash(id.p.Dir())) 158 }, 159 ) 160 } else { 161 // We always start from a directory. 162 collectErr = c.collectDir(id.p, id.isDir, func(fim hugofs.FileMetaInfo) bool { 163 if id.delete || id.isDir { 164 if id.isDir { 165 return strings.HasPrefix(fim.Meta().PathInfo.Path(), paths.AddTrailingSlash(id.p.Path())) 166 } 167 168 return id.p.Dir() == fim.Meta().PathInfo.Dir() 169 } 170 171 if fim.Meta().PathInfo.IsLeafBundle() && id.p.BundleType() == paths.PathTypeContentSingle { 172 return id.p.Dir() == fim.Meta().PathInfo.Dir() 173 } 174 175 return id.p.Path() == fim.Meta().PathInfo.Path() 176 }) 177 } 178 179 if collectErr != nil { 180 break 181 } 182 } 183 184 } 185 186 werr := c.g.Wait() 187 if collectErr == nil { 188 collectErr = werr 189 } 190 191 return 192 } 193 194 func (c *pagesCollector) collectDir(dirPath *paths.Path, isDir bool, inFilter func(fim hugofs.FileMetaInfo) bool) error { 195 var dpath string 196 if dirPath != nil { 197 if isDir { 198 dpath = filepath.FromSlash(dirPath.Unnormalized().Path()) 199 } else { 200 dpath = filepath.FromSlash(dirPath.Unnormalized().Dir()) 201 } 202 } 203 204 if c.seenDirs[dpath] { 205 return nil 206 } 207 c.seenDirs[dpath] = true 208 209 root, err := c.fs.Stat(dpath) 210 if err != nil { 211 if os.IsNotExist(err) { 212 return nil 213 } 214 return err 215 } 216 217 rootm := root.(hugofs.FileMetaInfo) 218 219 if err := c.collectDirDir(dpath, rootm, inFilter); err != nil { 220 return err 221 } 222 223 return nil 224 } 225 226 func (c *pagesCollector) collectDirDir(path string, root hugofs.FileMetaInfo, inFilter func(fim hugofs.FileMetaInfo) bool) error { 227 filter := func(fim hugofs.FileMetaInfo) bool { 228 if inFilter != nil { 229 return inFilter(fim) 230 } 231 return true 232 } 233 234 preHook := func(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) ([]hugofs.FileMetaInfo, error) { 235 filtered := readdir[:0] 236 for _, fi := range readdir { 237 if filter(fi) { 238 filtered = append(filtered, fi) 239 } 240 } 241 readdir = filtered 242 if len(readdir) == 0 { 243 return nil, nil 244 } 245 246 // Pick the first regular file. 247 var first hugofs.FileMetaInfo 248 for _, fi := range readdir { 249 if fi.IsDir() { 250 continue 251 } 252 first = fi 253 break 254 } 255 256 if first == nil { 257 // Only dirs, keep walking. 258 return readdir, nil 259 } 260 261 // Any bundle file will always be first. 262 firstPi := first.Meta().PathInfo 263 if firstPi == nil { 264 panic(fmt.Sprintf("collectDirDir: no path info for %q", first.Meta().Filename)) 265 } 266 267 if firstPi.IsLeafBundle() { 268 if err := c.handleBundleLeaf(dir, first, path, readdir); err != nil { 269 return nil, err 270 } 271 return nil, filepath.SkipDir 272 } 273 274 seen := map[hstrings.Tuple]bool{} 275 for _, fi := range readdir { 276 if fi.IsDir() { 277 continue 278 } 279 280 pi := fi.Meta().PathInfo 281 meta := fi.Meta() 282 283 // Filter out duplicate page or resource. 284 // These would eventually have been filtered out as duplicates when 285 // inserting them into the document store, 286 // but doing it here will preserve a consistent ordering. 287 baseLang := hstrings.Tuple{First: pi.Base(), Second: meta.Lang} 288 if seen[baseLang] { 289 continue 290 } 291 seen[baseLang] = true 292 293 if pi == nil { 294 panic(fmt.Sprintf("no path info for %q", meta.Filename)) 295 } 296 297 if meta.Lang == "" { 298 panic("lang not set") 299 } 300 301 if err := c.g.Enqueue(fi); err != nil { 302 return nil, err 303 } 304 } 305 306 // Keep walking. 307 return readdir, nil 308 } 309 310 var postHook hugofs.WalkHook 311 312 wfn := func(path string, fi hugofs.FileMetaInfo) error { 313 return nil 314 } 315 316 w := hugofs.NewWalkway( 317 hugofs.WalkwayConfig{ 318 Logger: c.logger, 319 Root: path, 320 Info: root, 321 Fs: c.fs, 322 IgnoreFile: c.h.SourceSpec.IgnoreFile, 323 HookPre: preHook, 324 HookPost: postHook, 325 WalkFn: wfn, 326 }) 327 328 return w.Walk() 329 } 330 331 func (c *pagesCollector) handleBundleLeaf(dir, bundle hugofs.FileMetaInfo, inPath string, readdir []hugofs.FileMetaInfo) error { 332 bundlePi := bundle.Meta().PathInfo 333 seen := map[hstrings.Tuple]bool{} 334 335 walk := func(path string, info hugofs.FileMetaInfo) error { 336 if info.IsDir() { 337 return nil 338 } 339 340 pi := info.Meta().PathInfo 341 342 if info != bundle { 343 // Everything inside a leaf bundle is a Resource, 344 // even the content pages. 345 // Note that we do allow index.md as page resources, but not in the bundle root. 346 if !pi.IsLeafBundle() || pi.Dir() != bundlePi.Dir() { 347 paths.ModifyPathBundleTypeResource(pi) 348 } 349 } 350 351 // Filter out duplicate page or resource. 352 // These would eventually have been filtered out as duplicates when 353 // inserting them into the document store, 354 // but doing it here will preserve a consistent ordering. 355 baseLang := hstrings.Tuple{First: pi.Base(), Second: info.Meta().Lang} 356 if seen[baseLang] { 357 return nil 358 } 359 seen[baseLang] = true 360 361 return c.g.Enqueue(info) 362 } 363 364 // Start a new walker from the given path. 365 w := hugofs.NewWalkway( 366 hugofs.WalkwayConfig{ 367 Root: inPath, 368 Fs: c.fs, 369 Logger: c.logger, 370 Info: dir, 371 DirEntries: readdir, 372 IgnoreFile: c.h.SourceSpec.IgnoreFile, 373 WalkFn: walk, 374 }) 375 376 return w.Walk() 377 }