github.com/graemephi/kahugo@v0.62.3-0.20211121071557-d78c0423784d/cache/filecache/filecache.go (about)

     1  // Copyright 2018 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package filecache
    15  
    16  import (
    17  	"bytes"
    18  	"errors"
    19  	"io"
    20  	"io/ioutil"
    21  	"os"
    22  	"path/filepath"
    23  	"strings"
    24  	"sync"
    25  	"time"
    26  
    27  	"github.com/gohugoio/hugo/common/hugio"
    28  
    29  	"github.com/gohugoio/hugo/helpers"
    30  
    31  	"github.com/BurntSushi/locker"
    32  	"github.com/spf13/afero"
    33  )
    34  
    35  // ErrFatal can be used to signal an unrecoverable error.
    36  var ErrFatal = errors.New("fatal filecache error")
    37  
    38  const (
    39  	filecacheRootDirname = "filecache"
    40  )
    41  
    42  // Cache caches a set of files in a directory. This is usually a file on
    43  // disk, but since this is backed by an Afero file system, it can be anything.
    44  type Cache struct {
    45  	Fs afero.Fs
    46  
    47  	// Max age for items in this cache. Negative duration means forever,
    48  	// 0 is effectively turning this cache off.
    49  	maxAge time.Duration
    50  
    51  	// When set, we just remove this entire root directory on expiration.
    52  	pruneAllRootDir string
    53  
    54  	nlocker *lockTracker
    55  }
    56  
    57  type lockTracker struct {
    58  	seenMu sync.RWMutex
    59  	seen   map[string]struct{}
    60  
    61  	*locker.Locker
    62  }
    63  
    64  // Lock tracks the ids in use. We use this information to do garbage collection
    65  // after a Hugo build.
    66  func (l *lockTracker) Lock(id string) {
    67  	l.seenMu.RLock()
    68  	if _, seen := l.seen[id]; !seen {
    69  		l.seenMu.RUnlock()
    70  		l.seenMu.Lock()
    71  		l.seen[id] = struct{}{}
    72  		l.seenMu.Unlock()
    73  	} else {
    74  		l.seenMu.RUnlock()
    75  	}
    76  
    77  	l.Locker.Lock(id)
    78  }
    79  
    80  // ItemInfo contains info about a cached file.
    81  type ItemInfo struct {
    82  	// This is the file's name relative to the cache's filesystem.
    83  	Name string
    84  }
    85  
    86  // NewCache creates a new file cache with the given filesystem and max age.
    87  func NewCache(fs afero.Fs, maxAge time.Duration, pruneAllRootDir string) *Cache {
    88  	return &Cache{
    89  		Fs:              fs,
    90  		nlocker:         &lockTracker{Locker: locker.NewLocker(), seen: make(map[string]struct{})},
    91  		maxAge:          maxAge,
    92  		pruneAllRootDir: pruneAllRootDir,
    93  	}
    94  }
    95  
    96  // lockedFile is a file with a lock that is released on Close.
    97  type lockedFile struct {
    98  	afero.File
    99  	unlock func()
   100  }
   101  
   102  func (l *lockedFile) Close() error {
   103  	defer l.unlock()
   104  	return l.File.Close()
   105  }
   106  
   107  // WriteCloser returns a transactional writer into the cache.
   108  // It's important that it's closed when done.
   109  func (c *Cache) WriteCloser(id string) (ItemInfo, io.WriteCloser, error) {
   110  	id = cleanID(id)
   111  	c.nlocker.Lock(id)
   112  
   113  	info := ItemInfo{Name: id}
   114  
   115  	f, err := helpers.OpenFileForWriting(c.Fs, id)
   116  	if err != nil {
   117  		c.nlocker.Unlock(id)
   118  		return info, nil, err
   119  	}
   120  
   121  	return info, &lockedFile{
   122  		File:   f,
   123  		unlock: func() { c.nlocker.Unlock(id) },
   124  	}, nil
   125  }
   126  
   127  // ReadOrCreate tries to lookup the file in cache.
   128  // If found, it is passed to read and then closed.
   129  // If not found a new file is created and passed to create, which should close
   130  // it when done.
   131  func (c *Cache) ReadOrCreate(id string,
   132  	read func(info ItemInfo, r io.ReadSeeker) error,
   133  	create func(info ItemInfo, w io.WriteCloser) error) (info ItemInfo, err error) {
   134  	id = cleanID(id)
   135  
   136  	c.nlocker.Lock(id)
   137  	defer c.nlocker.Unlock(id)
   138  
   139  	info = ItemInfo{Name: id}
   140  
   141  	if r := c.getOrRemove(id); r != nil {
   142  		err = read(info, r)
   143  		defer r.Close()
   144  		if err == nil || err == ErrFatal {
   145  			// See https://github.com/gohugoio/hugo/issues/6401
   146  			// To recover from file corruption we handle read errors
   147  			// as the cache item was not found.
   148  			// Any file permission issue will also fail in the next step.
   149  			return
   150  		}
   151  	}
   152  
   153  	f, err := helpers.OpenFileForWriting(c.Fs, id)
   154  	if err != nil {
   155  		return
   156  	}
   157  
   158  	err = create(info, f)
   159  
   160  	return
   161  }
   162  
   163  // GetOrCreate tries to get the file with the given id from cache. If not found or expired, create will
   164  // be invoked and the result cached.
   165  // This method is protected by a named lock using the given id as identifier.
   166  func (c *Cache) GetOrCreate(id string, create func() (io.ReadCloser, error)) (ItemInfo, io.ReadCloser, error) {
   167  	id = cleanID(id)
   168  
   169  	c.nlocker.Lock(id)
   170  	defer c.nlocker.Unlock(id)
   171  
   172  	info := ItemInfo{Name: id}
   173  
   174  	if r := c.getOrRemove(id); r != nil {
   175  		return info, r, nil
   176  	}
   177  
   178  	r, err := create()
   179  	if err != nil {
   180  		return info, nil, err
   181  	}
   182  
   183  	if c.maxAge == 0 {
   184  		// No caching.
   185  		return info, hugio.ToReadCloser(r), nil
   186  	}
   187  
   188  	var buff bytes.Buffer
   189  	return info,
   190  		hugio.ToReadCloser(&buff),
   191  		afero.WriteReader(c.Fs, id, io.TeeReader(r, &buff))
   192  }
   193  
   194  // GetOrCreateBytes is the same as GetOrCreate, but produces a byte slice.
   195  func (c *Cache) GetOrCreateBytes(id string, create func() ([]byte, error)) (ItemInfo, []byte, error) {
   196  	id = cleanID(id)
   197  
   198  	c.nlocker.Lock(id)
   199  	defer c.nlocker.Unlock(id)
   200  
   201  	info := ItemInfo{Name: id}
   202  
   203  	if r := c.getOrRemove(id); r != nil {
   204  		defer r.Close()
   205  		b, err := ioutil.ReadAll(r)
   206  		return info, b, err
   207  	}
   208  
   209  	b, err := create()
   210  	if err != nil {
   211  		return info, nil, err
   212  	}
   213  
   214  	if c.maxAge == 0 {
   215  		return info, b, nil
   216  	}
   217  
   218  	if err := afero.WriteReader(c.Fs, id, bytes.NewReader(b)); err != nil {
   219  		return info, nil, err
   220  	}
   221  	return info, b, nil
   222  }
   223  
   224  // GetBytes gets the file content with the given id from the cache, nil if none found.
   225  func (c *Cache) GetBytes(id string) (ItemInfo, []byte, error) {
   226  	id = cleanID(id)
   227  
   228  	c.nlocker.Lock(id)
   229  	defer c.nlocker.Unlock(id)
   230  
   231  	info := ItemInfo{Name: id}
   232  
   233  	if r := c.getOrRemove(id); r != nil {
   234  		defer r.Close()
   235  		b, err := ioutil.ReadAll(r)
   236  		return info, b, err
   237  	}
   238  
   239  	return info, nil, nil
   240  }
   241  
   242  // Get gets the file with the given id from the cahce, nil if none found.
   243  func (c *Cache) Get(id string) (ItemInfo, io.ReadCloser, error) {
   244  	id = cleanID(id)
   245  
   246  	c.nlocker.Lock(id)
   247  	defer c.nlocker.Unlock(id)
   248  
   249  	info := ItemInfo{Name: id}
   250  
   251  	r := c.getOrRemove(id)
   252  
   253  	return info, r, nil
   254  }
   255  
   256  // getOrRemove gets the file with the given id. If it's expired, it will
   257  // be removed.
   258  func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser {
   259  	if c.maxAge == 0 {
   260  		// No caching.
   261  		return nil
   262  	}
   263  
   264  	if c.maxAge > 0 {
   265  		fi, err := c.Fs.Stat(id)
   266  		if err != nil {
   267  			return nil
   268  		}
   269  
   270  		if c.isExpired(fi.ModTime()) {
   271  			c.Fs.Remove(id)
   272  			return nil
   273  		}
   274  	}
   275  
   276  	f, err := c.Fs.Open(id)
   277  	if err != nil {
   278  		return nil
   279  	}
   280  
   281  	return f
   282  }
   283  
   284  func (c *Cache) isExpired(modTime time.Time) bool {
   285  	if c.maxAge < 0 {
   286  		return false
   287  	}
   288  	return c.maxAge == 0 || time.Since(modTime) > c.maxAge
   289  }
   290  
   291  // For testing
   292  func (c *Cache) getString(id string) string {
   293  	id = cleanID(id)
   294  
   295  	c.nlocker.Lock(id)
   296  	defer c.nlocker.Unlock(id)
   297  
   298  	f, err := c.Fs.Open(id)
   299  	if err != nil {
   300  		return ""
   301  	}
   302  	defer f.Close()
   303  
   304  	b, _ := ioutil.ReadAll(f)
   305  	return string(b)
   306  }
   307  
   308  // Caches is a named set of caches.
   309  type Caches map[string]*Cache
   310  
   311  // Get gets a named cache, nil if none found.
   312  func (f Caches) Get(name string) *Cache {
   313  	return f[strings.ToLower(name)]
   314  }
   315  
   316  // NewCaches creates a new set of file caches from the given
   317  // configuration.
   318  func NewCaches(p *helpers.PathSpec) (Caches, error) {
   319  	var dcfg Configs
   320  	if c, ok := p.Cfg.Get("filecacheConfigs").(Configs); ok {
   321  		dcfg = c
   322  	} else {
   323  		var err error
   324  		dcfg, err = DecodeConfig(p.Fs.Source, p.Cfg)
   325  		if err != nil {
   326  			return nil, err
   327  		}
   328  	}
   329  
   330  	fs := p.Fs.Source
   331  
   332  	m := make(Caches)
   333  	for k, v := range dcfg {
   334  		var cfs afero.Fs
   335  
   336  		if v.isResourceDir {
   337  			cfs = p.BaseFs.ResourcesCache
   338  		} else {
   339  			cfs = fs
   340  		}
   341  
   342  		if cfs == nil {
   343  			// TODO(bep) we still have some places that do not initialize the
   344  			// full dependencies of a site, e.g. the import Jekyll command.
   345  			// That command does not need these caches, so let us just continue
   346  			// for now.
   347  			continue
   348  		}
   349  
   350  		baseDir := v.Dir
   351  
   352  		if err := cfs.MkdirAll(baseDir, 0777); err != nil && !os.IsExist(err) {
   353  			return nil, err
   354  		}
   355  
   356  		bfs := afero.NewBasePathFs(cfs, baseDir)
   357  
   358  		var pruneAllRootDir string
   359  		if k == cacheKeyModules {
   360  			pruneAllRootDir = "pkg"
   361  		}
   362  
   363  		m[k] = NewCache(bfs, v.MaxAge, pruneAllRootDir)
   364  	}
   365  
   366  	return m, nil
   367  }
   368  
   369  func cleanID(name string) string {
   370  	return strings.TrimPrefix(filepath.Clean(name), helpers.FilePathSeparator)
   371  }