github.com/searKing/golang/go@v1.2.117/os/file_cache.go (about)

     1  // Copyright 2023 The searKing Author. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package os
     6  
     7  import (
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"path/filepath"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/searKing/golang/go/crypto/md5"
    16  	filepath_ "github.com/searKing/golang/go/path/filepath"
    17  )
    18  
    19  // CacheFile is a package cache(Eventual consistency), backed by a file system directory tree.
    20  //
    21  // It is safe for multiple processes on a single machine to use the
    22  // same cache directory in a local file system simultaneously.
    23  // They will coordinate using operating system file locks and may
    24  // duplicate effort but will not corrupt the cache.
    25  //
    26  // However, it is NOT safe for multiple processes on different machines
    27  // to share a cache directory (for example, if the directory were stored
    28  // in a network file system). File locking is notoriously unreliable in
    29  // network file systems and may not suffice to protect the cache.
    30  //
    31  //go:generate go-option -type "CacheFile"
    32  type CacheFile struct {
    33  	BucketRootDir string // cache root dir
    34  	// generate bucket key from key(file path)
    35  	// bucket key should not contain any of the magic characters recognized by [filepath.Match]
    36  	// otherwise, bucket key will be escaped by MD5CacheKey
    37  	// see: https://github.com/golang/go/issues/13516
    38  	BucketKeyFunc func(key string) string
    39  
    40  	CacheMetaExt      string        // the file name extension used by path. ".cache" if empty
    41  	CacheExpiredAfter time.Duration // Cache file expiration time, lazy expire cache files base on cache URL modification time
    42  }
    43  
    44  func NewCacheFile(opts ...CacheFileOption) *CacheFile {
    45  	var f CacheFile
    46  	f.ApplyOptions(opts...)
    47  	if f.CacheMetaExt == "" {
    48  		f.CacheMetaExt = ".cache"
    49  	}
    50  	if k := f.BucketKeyFunc; k != nil {
    51  		f.BucketKeyFunc = func(key string) string {
    52  			bk := k(key)
    53  			if hasMeta(bk) {
    54  				return MD5CacheKey(bk)
    55  			}
    56  			return bk
    57  		}
    58  	}
    59  	return &f
    60  }
    61  
    62  func (f *CacheFile) BucketKey(name string) string {
    63  	if f.BucketKeyFunc != nil {
    64  		return f.BucketKeyFunc(name)
    65  	}
    66  	return MD5CacheKey(name)
    67  }
    68  
    69  // Get looks up the file in the cache and returns
    70  // the cache name of the corresponding data file.
    71  func (f *CacheFile) Get(name string) (cacheFilePath, cacheMetaPath string, hit bool, err error) {
    72  	cacheFilePath = filepath.Join(f.BucketRootDir, f.BucketKey(name))
    73  	cacheMetaPath = cacheFilePath + f.CacheMetaExt
    74  
    75  	cacheFilePath, cacheMetaPath, err = f.getCacheMeta(name, cacheFilePath+".*"+f.CacheMetaExt)
    76  	if err != nil {
    77  		return "", "", false, err
    78  	}
    79  
    80  	// double check
    81  	// handle special case below:
    82  	// 1. cache hit
    83  	// <protect> 2. cache removed by other process or goroutine -- not controllable
    84  	// 3. refresh cache's ModTime
    85  	{
    86  		info, err_ := os.Stat(cacheMetaPath)
    87  		if err_ != nil {
    88  			hit = false
    89  			return
    90  		}
    91  		// violate cache file if cache expired
    92  		expired := time.Since(info.ModTime()) > f.CacheExpiredAfter
    93  		if expired {
    94  			hit = false
    95  			return
    96  		}
    97  		info, err_ = os.Stat(cacheFilePath)
    98  		if err_ != nil {
    99  			hit = false
   100  			return
   101  		}
   102  		if info.Size() == 0 {
   103  			hit = false
   104  			return
   105  		}
   106  	}
   107  
   108  	// STEP3 cache url not conflict, refresh ModTime of cache file and cache file's metadata
   109  	hit = true
   110  	_ = ChtimesNow(cacheMetaPath)
   111  	_ = ChtimesNow(cacheFilePath)
   112  	return
   113  }
   114  
   115  func (f *CacheFile) Put(name string, r io.Reader) (cacheFilePath string, refreshed bool, err error) {
   116  	cacheFilePath, cacheMetaPath, hit, err := f.Get(name)
   117  	if err != nil {
   118  		return "", false, err
   119  	}
   120  	if hit {
   121  		return cacheFilePath, false, nil
   122  	}
   123  
   124  	err = WriteRenameAllFrom(cacheFilePath, r)
   125  	if err != nil {
   126  		return "", false, fmt.Errorf("failed to create cache file: %w", err)
   127  	}
   128  	err = WriteRenameAll(cacheMetaPath, []byte(name))
   129  	if err != nil {
   130  		_ = os.Remove(cacheFilePath)
   131  		return "", false, fmt.Errorf("failed to create cache meta: %w", err)
   132  	}
   133  	return cacheFilePath, true, nil
   134  }
   135  
   136  func (f *CacheFile) getCacheMeta(key, cacheMetaPathPattern string) (cacheFilePath, cacheMetaPath string, err error) {
   137  	var hitMeta bool
   138  
   139  	// STEP1 clean expired cache file
   140  	_ = filepath_.WalkGlob(cacheMetaPathPattern, func(path string) error {
   141  		cacheMetaPath = path
   142  		cacheFilePath = strings.TrimSuffix(cacheMetaPath, f.CacheMetaExt)
   143  		info, err := os.Stat(cacheMetaPath)
   144  		if err == nil {
   145  			// violate cache file if cache expired
   146  			expired := time.Since(info.ModTime()) > f.CacheExpiredAfter
   147  			if expired {
   148  				_ = os.Remove(cacheFilePath)
   149  				_ = os.Remove(cacheMetaPath)
   150  				return nil
   151  			}
   152  		}
   153  		return nil
   154  	})
   155  
   156  	// STEP2 search for cache file in cache open list
   157  	_ = filepath_.WalkGlob(cacheMetaPathPattern, func(path string) error {
   158  		cacheMetaPath = path
   159  		cacheFilePath = strings.TrimSuffix(cacheMetaPath, f.CacheMetaExt)
   160  		// verify whether if cache key in cache file is match
   161  		keyInCache, _ := os.ReadFile(cacheMetaPath)
   162  		if string(keyInCache) == key {
   163  			hitMeta = true
   164  			return filepath.SkipAll
   165  		}
   166  		// cache key conflict, continue search cache file list
   167  		return nil
   168  	})
   169  	if hitMeta {
   170  		return
   171  	}
   172  
   173  	// STEP3 add new cache file to cache open list
   174  	// foo.txt.* -> foo.txt.[0,1,2,...], which exists and seq is max
   175  	nf, _, err := NextFile(cacheMetaPathPattern, 0)
   176  	if err != nil {
   177  		return "", "", fmt.Errorf("failed to open next cache meta: %w", err)
   178  	}
   179  	defer nf.Close()
   180  	// STEP3 cache url not conflict, refresh ModTime of cache file and cache file's metadata
   181  	cacheMetaPath = nf.Name()
   182  	cacheFilePath = strings.TrimSuffix(cacheMetaPath, f.CacheMetaExt)
   183  	_, err = nf.WriteString(key)
   184  	if err != nil {
   185  		return "", "", fmt.Errorf("failed to write next cache meta: %w", err)
   186  	}
   187  	_ = os.Remove(cacheFilePath)
   188  	return
   189  }
   190  
   191  func MD5CacheKey(s string) string {
   192  	// Special CASE 1: filename-as-part-of-a-query-string
   193  	// http://foo.com?url=http://bar.com/kitty.jpg&filename=kitty.jpg
   194  	// https://stackoverflow.com/questions/28915717/how-does-one-safely-pass-a-url-and-filename-as-part-of-a-query-string
   195  	// Special CASE 2: filename-as-url-path,but different by various version in query
   196  	// http://bucket.s3.amazonaws.com/my-image.jpg?versionId=L4kqtJlcpXroDTDmpUMLUo
   197  	// https://docs.aws.amazon.com/AmazonS3/latest/userguide/RetrievingObjectVersions.html
   198  	// https://cloud.tencent.com/document/product/436/19883
   199  	return md5.SumHex(s)
   200  }