github.com/kovansky/hugo@v0.92.3-0.20220224232819-63076e4ff19f/cache/filecache/filecache.go (about)

     1  // Copyright 2018 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package filecache
    15  
    16  import (
    17  	"bytes"
    18  	"errors"
    19  	"io"
    20  	"io/ioutil"
    21  	"os"
    22  	"path/filepath"
    23  	"strings"
    24  	"sync"
    25  	"time"
    26  
    27  	"github.com/gohugoio/hugo/common/hugio"
    28  
    29  	"github.com/gohugoio/hugo/helpers"
    30  
    31  	"github.com/BurntSushi/locker"
    32  	"github.com/spf13/afero"
    33  )
    34  
    35  // ErrFatal can be used to signal an unrecoverable error.
    36  var ErrFatal = errors.New("fatal filecache error")
    37  
    38  const (
    39  	filecacheRootDirname = "filecache"
    40  )
    41  
    42  // Cache caches a set of files in a directory. This is usually a file on
    43  // disk, but since this is backed by an Afero file system, it can be anything.
    44  type Cache struct {
    45  	Fs afero.Fs
    46  
    47  	// Max age for items in this cache. Negative duration means forever,
    48  	// 0 is effectively turning this cache off.
    49  	maxAge time.Duration
    50  
    51  	// When set, we just remove this entire root directory on expiration.
    52  	pruneAllRootDir string
    53  
    54  	nlocker *lockTracker
    55  }
    56  
    57  type lockTracker struct {
    58  	seenMu sync.RWMutex
    59  	seen   map[string]struct{}
    60  
    61  	*locker.Locker
    62  }
    63  
    64  // Lock tracks the ids in use. We use this information to do garbage collection
    65  // after a Hugo build.
    66  func (l *lockTracker) Lock(id string) {
    67  	l.seenMu.RLock()
    68  	if _, seen := l.seen[id]; !seen {
    69  		l.seenMu.RUnlock()
    70  		l.seenMu.Lock()
    71  		l.seen[id] = struct{}{}
    72  		l.seenMu.Unlock()
    73  	} else {
    74  		l.seenMu.RUnlock()
    75  	}
    76  
    77  	l.Locker.Lock(id)
    78  }
    79  
    80  // ItemInfo contains info about a cached file.
    81  type ItemInfo struct {
    82  	// This is the file's name relative to the cache's filesystem.
    83  	Name string
    84  }
    85  
    86  // NewCache creates a new file cache with the given filesystem and max age.
    87  func NewCache(fs afero.Fs, maxAge time.Duration, pruneAllRootDir string) *Cache {
    88  	return &Cache{
    89  		Fs:              fs,
    90  		nlocker:         &lockTracker{Locker: locker.NewLocker(), seen: make(map[string]struct{})},
    91  		maxAge:          maxAge,
    92  		pruneAllRootDir: pruneAllRootDir,
    93  	}
    94  }
    95  
    96  // lockedFile is a file with a lock that is released on Close.
    97  type lockedFile struct {
    98  	afero.File
    99  	unlock func()
   100  }
   101  
   102  func (l *lockedFile) Close() error {
   103  	defer l.unlock()
   104  	return l.File.Close()
   105  }
   106  
   107  // WriteCloser returns a transactional writer into the cache.
   108  // It's important that it's closed when done.
   109  func (c *Cache) WriteCloser(id string) (ItemInfo, io.WriteCloser, error) {
   110  	id = cleanID(id)
   111  	c.nlocker.Lock(id)
   112  
   113  	info := ItemInfo{Name: id}
   114  
   115  	f, err := helpers.OpenFileForWriting(c.Fs, id)
   116  	if err != nil {
   117  		c.nlocker.Unlock(id)
   118  		return info, nil, err
   119  	}
   120  
   121  	return info, &lockedFile{
   122  		File:   f,
   123  		unlock: func() { c.nlocker.Unlock(id) },
   124  	}, nil
   125  }
   126  
   127  // ReadOrCreate tries to lookup the file in cache.
   128  // If found, it is passed to read and then closed.
   129  // If not found a new file is created and passed to create, which should close
   130  // it when done.
   131  func (c *Cache) ReadOrCreate(id string,
   132  	read func(info ItemInfo, r io.ReadSeeker) error,
   133  	create func(info ItemInfo, w io.WriteCloser) error) (info ItemInfo, err error) {
   134  	id = cleanID(id)
   135  
   136  	c.nlocker.Lock(id)
   137  	defer c.nlocker.Unlock(id)
   138  
   139  	info = ItemInfo{Name: id}
   140  
   141  	if r := c.getOrRemove(id); r != nil {
   142  		err = read(info, r)
   143  		defer r.Close()
   144  		if err == nil || err == ErrFatal {
   145  			// See https://github.com/gohugoio/hugo/issues/6401
   146  			// To recover from file corruption we handle read errors
   147  			// as the cache item was not found.
   148  			// Any file permission issue will also fail in the next step.
   149  			return
   150  		}
   151  	}
   152  
   153  	f, err := helpers.OpenFileForWriting(c.Fs, id)
   154  	if err != nil {
   155  		return
   156  	}
   157  
   158  	err = create(info, f)
   159  
   160  	return
   161  }
   162  
   163  // GetOrCreate tries to get the file with the given id from cache. If not found or expired, create will
   164  // be invoked and the result cached.
   165  // This method is protected by a named lock using the given id as identifier.
   166  func (c *Cache) GetOrCreate(id string, create func() (io.ReadCloser, error)) (ItemInfo, io.ReadCloser, error) {
   167  	id = cleanID(id)
   168  
   169  	c.nlocker.Lock(id)
   170  	defer c.nlocker.Unlock(id)
   171  
   172  	info := ItemInfo{Name: id}
   173  
   174  	if r := c.getOrRemove(id); r != nil {
   175  		return info, r, nil
   176  	}
   177  
   178  	var (
   179  		r   io.ReadCloser
   180  		err error
   181  	)
   182  
   183  	r, err = create()
   184  	if err != nil {
   185  		return info, nil, err
   186  	}
   187  
   188  	if c.maxAge == 0 {
   189  		// No caching.
   190  		return info, hugio.ToReadCloser(r), nil
   191  	}
   192  
   193  	var buff bytes.Buffer
   194  	return info,
   195  		hugio.ToReadCloser(&buff),
   196  		afero.WriteReader(c.Fs, id, io.TeeReader(r, &buff))
   197  }
   198  
   199  // GetOrCreateBytes is the same as GetOrCreate, but produces a byte slice.
   200  func (c *Cache) GetOrCreateBytes(id string, create func() ([]byte, error)) (ItemInfo, []byte, error) {
   201  	id = cleanID(id)
   202  
   203  	c.nlocker.Lock(id)
   204  	defer c.nlocker.Unlock(id)
   205  
   206  	info := ItemInfo{Name: id}
   207  
   208  	if r := c.getOrRemove(id); r != nil {
   209  		defer r.Close()
   210  		b, err := ioutil.ReadAll(r)
   211  		return info, b, err
   212  	}
   213  
   214  	var (
   215  		b   []byte
   216  		err error
   217  	)
   218  
   219  	b, err = create()
   220  	if err != nil {
   221  		return info, nil, err
   222  	}
   223  
   224  	if c.maxAge == 0 {
   225  		return info, b, nil
   226  	}
   227  
   228  	if err := afero.WriteReader(c.Fs, id, bytes.NewReader(b)); err != nil {
   229  		return info, nil, err
   230  	}
   231  	return info, b, nil
   232  }
   233  
   234  // GetBytes gets the file content with the given id from the cache, nil if none found.
   235  func (c *Cache) GetBytes(id string) (ItemInfo, []byte, error) {
   236  	id = cleanID(id)
   237  
   238  	c.nlocker.Lock(id)
   239  	defer c.nlocker.Unlock(id)
   240  
   241  	info := ItemInfo{Name: id}
   242  
   243  	if r := c.getOrRemove(id); r != nil {
   244  		defer r.Close()
   245  		b, err := ioutil.ReadAll(r)
   246  		return info, b, err
   247  	}
   248  
   249  	return info, nil, nil
   250  }
   251  
   252  // Get gets the file with the given id from the cahce, nil if none found.
   253  func (c *Cache) Get(id string) (ItemInfo, io.ReadCloser, error) {
   254  	id = cleanID(id)
   255  
   256  	c.nlocker.Lock(id)
   257  	defer c.nlocker.Unlock(id)
   258  
   259  	info := ItemInfo{Name: id}
   260  
   261  	r := c.getOrRemove(id)
   262  
   263  	return info, r, nil
   264  }
   265  
   266  // getOrRemove gets the file with the given id. If it's expired, it will
   267  // be removed.
   268  func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser {
   269  	if c.maxAge == 0 {
   270  		// No caching.
   271  		return nil
   272  	}
   273  
   274  	if c.maxAge > 0 {
   275  		fi, err := c.Fs.Stat(id)
   276  		if err != nil {
   277  			return nil
   278  		}
   279  
   280  		if c.isExpired(fi.ModTime()) {
   281  			c.Fs.Remove(id)
   282  			return nil
   283  		}
   284  	}
   285  
   286  	f, err := c.Fs.Open(id)
   287  	if err != nil {
   288  		return nil
   289  	}
   290  
   291  	return f
   292  }
   293  
   294  func (c *Cache) isExpired(modTime time.Time) bool {
   295  	if c.maxAge < 0 {
   296  		return false
   297  	}
   298  	return c.maxAge == 0 || time.Since(modTime) > c.maxAge
   299  }
   300  
   301  // For testing
   302  func (c *Cache) getString(id string) string {
   303  	id = cleanID(id)
   304  
   305  	c.nlocker.Lock(id)
   306  	defer c.nlocker.Unlock(id)
   307  
   308  	f, err := c.Fs.Open(id)
   309  	if err != nil {
   310  		return ""
   311  	}
   312  	defer f.Close()
   313  
   314  	b, _ := ioutil.ReadAll(f)
   315  	return string(b)
   316  }
   317  
   318  // Caches is a named set of caches.
   319  type Caches map[string]*Cache
   320  
   321  // Get gets a named cache, nil if none found.
   322  func (f Caches) Get(name string) *Cache {
   323  	return f[strings.ToLower(name)]
   324  }
   325  
   326  // NewCaches creates a new set of file caches from the given
   327  // configuration.
   328  func NewCaches(p *helpers.PathSpec) (Caches, error) {
   329  	var dcfg Configs
   330  	if c, ok := p.Cfg.Get("filecacheConfigs").(Configs); ok {
   331  		dcfg = c
   332  	} else {
   333  		var err error
   334  		dcfg, err = DecodeConfig(p.Fs.Source, p.Cfg)
   335  		if err != nil {
   336  			return nil, err
   337  		}
   338  	}
   339  
   340  	fs := p.Fs.Source
   341  
   342  	m := make(Caches)
   343  	for k, v := range dcfg {
   344  		var cfs afero.Fs
   345  
   346  		if v.isResourceDir {
   347  			cfs = p.BaseFs.ResourcesCache
   348  		} else {
   349  			cfs = fs
   350  		}
   351  
   352  		if cfs == nil {
   353  			// TODO(bep) we still have some places that do not initialize the
   354  			// full dependencies of a site, e.g. the import Jekyll command.
   355  			// That command does not need these caches, so let us just continue
   356  			// for now.
   357  			continue
   358  		}
   359  
   360  		baseDir := v.Dir
   361  
   362  		if err := cfs.MkdirAll(baseDir, 0777); err != nil && !os.IsExist(err) {
   363  			return nil, err
   364  		}
   365  
   366  		bfs := afero.NewBasePathFs(cfs, baseDir)
   367  
   368  		var pruneAllRootDir string
   369  		if k == cacheKeyModules {
   370  			pruneAllRootDir = "pkg"
   371  		}
   372  
   373  		m[k] = NewCache(bfs, v.MaxAge, pruneAllRootDir)
   374  	}
   375  
   376  	return m, nil
   377  }
   378  
   379  func cleanID(name string) string {
   380  	return strings.TrimPrefix(filepath.Clean(name), helpers.FilePathSeparator)
   381  }