github.com/searKing/golang/go@v1.2.117/os/file_cache.go (about) 1 // Copyright 2023 The searKing Author. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package os 6 7 import ( 8 "fmt" 9 "io" 10 "os" 11 "path/filepath" 12 "strings" 13 "time" 14 15 "github.com/searKing/golang/go/crypto/md5" 16 filepath_ "github.com/searKing/golang/go/path/filepath" 17 ) 18 19 // CacheFile is a package cache(Eventual consistency), backed by a file system directory tree. 20 // 21 // It is safe for multiple processes on a single machine to use the 22 // same cache directory in a local file system simultaneously. 23 // They will coordinate using operating system file locks and may 24 // duplicate effort but will not corrupt the cache. 25 // 26 // However, it is NOT safe for multiple processes on different machines 27 // to share a cache directory (for example, if the directory were stored 28 // in a network file system). File locking is notoriously unreliable in 29 // network file systems and may not suffice to protect the cache. 30 // 31 //go:generate go-option -type "CacheFile" 32 type CacheFile struct { 33 BucketRootDir string // cache root dir 34 // generate bucket key from key(file path) 35 // bucket key should not contain any of the magic characters recognized by [filepath.Match] 36 // otherwise, bucket key will be escaped by MD5CacheKey 37 // see: https://github.com/golang/go/issues/13516 38 BucketKeyFunc func(key string) string 39 40 CacheMetaExt string // the file name extension used by path. ".cache" if empty 41 CacheExpiredAfter time.Duration // Cache file expiration time, lazy expire cache files base on cache URL modification time 42 } 43 44 func NewCacheFile(opts ...CacheFileOption) *CacheFile { 45 var f CacheFile 46 f.ApplyOptions(opts...) 47 if f.CacheMetaExt == "" { 48 f.CacheMetaExt = ".cache" 49 } 50 if k := f.BucketKeyFunc; k != nil { 51 f.BucketKeyFunc = func(key string) string { 52 bk := k(key) 53 if hasMeta(bk) { 54 return MD5CacheKey(bk) 55 } 56 return bk 57 } 58 } 59 return &f 60 } 61 62 func (f *CacheFile) BucketKey(name string) string { 63 if f.BucketKeyFunc != nil { 64 return f.BucketKeyFunc(name) 65 } 66 return MD5CacheKey(name) 67 } 68 69 // Get looks up the file in the cache and returns 70 // the cache name of the corresponding data file. 71 func (f *CacheFile) Get(name string) (cacheFilePath, cacheMetaPath string, hit bool, err error) { 72 cacheFilePath = filepath.Join(f.BucketRootDir, f.BucketKey(name)) 73 cacheMetaPath = cacheFilePath + f.CacheMetaExt 74 75 cacheFilePath, cacheMetaPath, err = f.getCacheMeta(name, cacheFilePath+".*"+f.CacheMetaExt) 76 if err != nil { 77 return "", "", false, err 78 } 79 80 // double check 81 // handle special case below: 82 // 1. cache hit 83 // <protect> 2. cache removed by other process or goroutine -- not controllable 84 // 3. refresh cache's ModTime 85 { 86 info, err_ := os.Stat(cacheMetaPath) 87 if err_ != nil { 88 hit = false 89 return 90 } 91 // violate cache file if cache expired 92 expired := time.Since(info.ModTime()) > f.CacheExpiredAfter 93 if expired { 94 hit = false 95 return 96 } 97 info, err_ = os.Stat(cacheFilePath) 98 if err_ != nil { 99 hit = false 100 return 101 } 102 if info.Size() == 0 { 103 hit = false 104 return 105 } 106 } 107 108 // STEP3 cache url not conflict, refresh ModTime of cache file and cache file's metadata 109 hit = true 110 _ = ChtimesNow(cacheMetaPath) 111 _ = ChtimesNow(cacheFilePath) 112 return 113 } 114 115 func (f *CacheFile) Put(name string, r io.Reader) (cacheFilePath string, refreshed bool, err error) { 116 cacheFilePath, cacheMetaPath, hit, err := f.Get(name) 117 if err != nil { 118 return "", false, err 119 } 120 if hit { 121 return cacheFilePath, false, nil 122 } 123 124 err = WriteRenameAllFrom(cacheFilePath, r) 125 if err != nil { 126 return "", false, fmt.Errorf("failed to create cache file: %w", err) 127 } 128 err = WriteRenameAll(cacheMetaPath, []byte(name)) 129 if err != nil { 130 _ = os.Remove(cacheFilePath) 131 return "", false, fmt.Errorf("failed to create cache meta: %w", err) 132 } 133 return cacheFilePath, true, nil 134 } 135 136 func (f *CacheFile) getCacheMeta(key, cacheMetaPathPattern string) (cacheFilePath, cacheMetaPath string, err error) { 137 var hitMeta bool 138 139 // STEP1 clean expired cache file 140 _ = filepath_.WalkGlob(cacheMetaPathPattern, func(path string) error { 141 cacheMetaPath = path 142 cacheFilePath = strings.TrimSuffix(cacheMetaPath, f.CacheMetaExt) 143 info, err := os.Stat(cacheMetaPath) 144 if err == nil { 145 // violate cache file if cache expired 146 expired := time.Since(info.ModTime()) > f.CacheExpiredAfter 147 if expired { 148 _ = os.Remove(cacheFilePath) 149 _ = os.Remove(cacheMetaPath) 150 return nil 151 } 152 } 153 return nil 154 }) 155 156 // STEP2 search for cache file in cache open list 157 _ = filepath_.WalkGlob(cacheMetaPathPattern, func(path string) error { 158 cacheMetaPath = path 159 cacheFilePath = strings.TrimSuffix(cacheMetaPath, f.CacheMetaExt) 160 // verify whether if cache key in cache file is match 161 keyInCache, _ := os.ReadFile(cacheMetaPath) 162 if string(keyInCache) == key { 163 hitMeta = true 164 return filepath.SkipAll 165 } 166 // cache key conflict, continue search cache file list 167 return nil 168 }) 169 if hitMeta { 170 return 171 } 172 173 // STEP3 add new cache file to cache open list 174 // foo.txt.* -> foo.txt.[0,1,2,...], which exists and seq is max 175 nf, _, err := NextFile(cacheMetaPathPattern, 0) 176 if err != nil { 177 return "", "", fmt.Errorf("failed to open next cache meta: %w", err) 178 } 179 defer nf.Close() 180 // STEP3 cache url not conflict, refresh ModTime of cache file and cache file's metadata 181 cacheMetaPath = nf.Name() 182 cacheFilePath = strings.TrimSuffix(cacheMetaPath, f.CacheMetaExt) 183 _, err = nf.WriteString(key) 184 if err != nil { 185 return "", "", fmt.Errorf("failed to write next cache meta: %w", err) 186 } 187 _ = os.Remove(cacheFilePath) 188 return 189 } 190 191 func MD5CacheKey(s string) string { 192 // Special CASE 1: filename-as-part-of-a-query-string 193 // http://foo.com?url=http://bar.com/kitty.jpg&filename=kitty.jpg 194 // https://stackoverflow.com/questions/28915717/how-does-one-safely-pass-a-url-and-filename-as-part-of-a-query-string 195 // Special CASE 2: filename-as-url-path,but different by various version in query 196 // http://bucket.s3.amazonaws.com/my-image.jpg?versionId=L4kqtJlcpXroDTDmpUMLUo 197 // https://docs.aws.amazon.com/AmazonS3/latest/userguide/RetrievingObjectVersions.html 198 // https://cloud.tencent.com/document/product/436/19883 199 return md5.SumHex(s) 200 }