github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/cmd/camput/kvcache.go (about) 1 /* 2 Copyright 2013 The Camlistore Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "errors" 23 "fmt" 24 "hash/crc32" 25 "log" 26 "net/url" 27 "os" 28 "path/filepath" 29 "sort" 30 "strconv" 31 "strings" 32 "time" 33 34 "camlistore.org/pkg/blob" 35 "camlistore.org/pkg/client" 36 "camlistore.org/pkg/kvutil" 37 "camlistore.org/pkg/osutil" 38 "camlistore.org/third_party/github.com/cznic/kv" 39 ) 40 41 var errCacheMiss = errors.New("not in cache") 42 43 // KvHaveCache is a HaveCache on top of a single 44 // mutable database file on disk using github.com/cznic/kv. 45 // It stores the blobref in binary as the key, and 46 // the blobsize in binary as the value. 47 // Access to the cache is restricted to one process 48 // at a time with a lock file. Close should be called 49 // to remove the lock. 50 type KvHaveCache struct { 51 filename string 52 db *kv.DB 53 } 54 55 func NewKvHaveCache(gen string) *KvHaveCache { 56 cleanCacheDir() 57 fullPath := filepath.Join(osutil.CacheDir(), "camput.havecache."+escapeGen(gen)+".kv") 58 db, err := kvutil.Open(fullPath, nil) 59 if err != nil { 60 log.Fatalf("Could not create/open new have cache at %v, %v", fullPath, err) 61 } 62 return &KvHaveCache{ 63 filename: fullPath, 64 db: db, 65 } 66 } 67 68 // Close should be called to commit all the writes 69 // to the db and to unlock the file. 70 func (c *KvHaveCache) Close() error { 71 return c.db.Close() 72 } 73 74 func (c *KvHaveCache) StatBlobCache(br blob.Ref) (size uint32, ok bool) { 75 if !br.Valid() { 76 return 77 } 78 binBr, _ := br.MarshalBinary() 79 binVal, err := c.db.Get(nil, binBr) 80 if err != nil { 81 log.Fatalf("Could not query have cache %v for %v: %v", c.filename, br, err) 82 } 83 if binVal == nil { 84 cachelog.Printf("have cache MISS on %v", br) 85 return 86 } 87 val, err := strconv.ParseUint(string(binVal), 10, 32) 88 if err != nil { 89 log.Fatalf("Could not decode have cache binary value for %v: %v", br, err) 90 } 91 if val < 0 { 92 log.Fatalf("Error decoding have cache binary value for %v: size=%d", br, val) 93 } 94 cachelog.Printf("have cache HIT on %v", br) 95 return uint32(val), true 96 } 97 98 func (c *KvHaveCache) NoteBlobExists(br blob.Ref, size uint32) { 99 if !br.Valid() { 100 return 101 } 102 if size < 0 { 103 log.Fatalf("Got a negative blob size to note in have cache for %v", br) 104 } 105 binBr, _ := br.MarshalBinary() 106 binVal := []byte(strconv.Itoa(int(size))) 107 cachelog.Printf("Adding to have cache %v: %q", br, binVal) 108 _, _, err := c.db.Put(nil, binBr, 109 func(binBr, old []byte) ([]byte, bool, error) { 110 // We do not overwrite dups 111 if old != nil { 112 return nil, false, nil 113 } 114 return binVal, true, nil 115 }) 116 if err != nil { 117 log.Fatalf("Could not write %v in have cache: %v", br, err) 118 } 119 } 120 121 // KvStatCache is an UploadCache on top of a single 122 // mutable database file on disk using github.com/cznic/kv. 123 // It stores a binary combination of an os.FileInfo fingerprint and 124 // a client.Putresult as the key, and the blobsize in binary as 125 // the value. 126 // Access to the cache is restricted to one process 127 // at a time with a lock file. Close should be called 128 // to remove the lock. 129 type KvStatCache struct { 130 filename string 131 db *kv.DB 132 } 133 134 func NewKvStatCache(gen string) *KvStatCache { 135 fullPath := filepath.Join(osutil.CacheDir(), "camput.statcache."+escapeGen(gen)+".kv") 136 db, err := kvutil.Open(fullPath, nil) 137 if err != nil { 138 log.Fatalf("Could not create/open new stat cache at %v, %v", fullPath, err) 139 } 140 return &KvStatCache{ 141 filename: fullPath, 142 db: db, 143 } 144 } 145 146 // Close should be called to commit all the writes 147 // to the db and to unlock the file. 148 func (c *KvStatCache) Close() error { 149 return c.db.Close() 150 } 151 152 func (c *KvStatCache) CachedPutResult(pwd, filename string, fi os.FileInfo, withPermanode bool) (*client.PutResult, error) { 153 fullPath := fullpath(pwd, filename) 154 cacheKey := &statCacheKey{ 155 Filepath: fullPath, 156 Permanode: withPermanode, 157 } 158 binKey, err := cacheKey.marshalBinary() 159 binVal, err := c.db.Get(nil, binKey) 160 if err != nil { 161 log.Fatalf("Could not query stat cache %v for %q: %v", binKey, fullPath, err) 162 } 163 if binVal == nil { 164 cachelog.Printf("stat cache MISS on %q", binKey) 165 return nil, errCacheMiss 166 } 167 val := &statCacheValue{} 168 if err = val.unmarshalBinary(binVal); err != nil { 169 return nil, fmt.Errorf("Bogus stat cached value for %q: %v", binKey, err) 170 } 171 fp := fileInfoToFingerprint(fi) 172 if val.Fingerprint != fp { 173 cachelog.Printf("cache MISS on %q: stats not equal:\n%#v\n%#v", binKey, val.Fingerprint, fp) 174 return nil, errCacheMiss 175 } 176 cachelog.Printf("stat cache HIT on %q", binKey) 177 return &val.Result, nil 178 } 179 180 func (c *KvStatCache) AddCachedPutResult(pwd, filename string, fi os.FileInfo, pr *client.PutResult, withPermanode bool) { 181 fullPath := fullpath(pwd, filename) 182 cacheKey := &statCacheKey{ 183 Filepath: fullPath, 184 Permanode: withPermanode, 185 } 186 val := &statCacheValue{fileInfoToFingerprint(fi), *pr} 187 188 binKey, err := cacheKey.marshalBinary() 189 if err != nil { 190 log.Fatalf("Could not add %q to stat cache: %v", binKey, err) 191 } 192 binVal, err := val.marshalBinary() 193 if err != nil { 194 log.Fatalf("Could not add %q to stat cache: %v", binKey, err) 195 } 196 cachelog.Printf("Adding to stat cache %q: %q", binKey, binVal) 197 _, _, err = c.db.Put(nil, binKey, 198 func(binKey, old []byte) ([]byte, bool, error) { 199 // We do not overwrite dups 200 if old != nil { 201 return nil, false, nil 202 } 203 return binVal, true, nil 204 }) 205 if err != nil { 206 log.Fatalf("Could not add %q to stat cache: %v", binKey, err) 207 } 208 } 209 210 type statCacheKey struct { 211 Filepath string 212 Permanode bool // whether -filenodes is being used. 213 } 214 215 // marshalBinary returns a more compact binary 216 // representation of the contents of sk. 217 func (sk *statCacheKey) marshalBinary() ([]byte, error) { 218 if sk == nil { 219 return nil, errors.New("Can not marshal from a nil stat cache key") 220 } 221 data := make([]byte, 0, len(sk.Filepath)+3) 222 data = append(data, 1) // version number 223 data = append(data, sk.Filepath...) 224 data = append(data, '|') 225 if sk.Permanode { 226 data = append(data, 1) 227 } 228 return data, nil 229 } 230 231 type statFingerprint string 232 233 type statCacheValue struct { 234 Fingerprint statFingerprint 235 Result client.PutResult 236 } 237 238 // marshalBinary returns a more compact binary 239 // representation of the contents of scv. 240 func (scv *statCacheValue) marshalBinary() ([]byte, error) { 241 if scv == nil { 242 return nil, errors.New("Can not marshal from a nil stat cache value") 243 } 244 binBr, _ := scv.Result.BlobRef.MarshalBinary() 245 // Blob size fits on 4 bytes when binary encoded 246 data := make([]byte, 0, len(scv.Fingerprint)+1+4+1+len(binBr)) 247 buf := bytes.NewBuffer(data) 248 _, err := buf.WriteString(string(scv.Fingerprint)) 249 if err != nil { 250 return nil, fmt.Errorf("Could not write fingerprint %v: %v", scv.Fingerprint, err) 251 } 252 err = buf.WriteByte('|') 253 if err != nil { 254 return nil, fmt.Errorf("Could not write '|': %v", err) 255 } 256 err = binary.Write(buf, binary.BigEndian, int32(scv.Result.Size)) 257 if err != nil { 258 return nil, fmt.Errorf("Could not write blob size %d: %v", scv.Result.Size, err) 259 } 260 err = buf.WriteByte('|') 261 if err != nil { 262 return nil, fmt.Errorf("Could not write '|': %v", err) 263 } 264 _, err = buf.Write(binBr) 265 if err != nil { 266 return nil, fmt.Errorf("Could not write binary blobref %q: %v", binBr, err) 267 } 268 return buf.Bytes(), nil 269 } 270 271 var pipe = []byte("|") 272 273 func (scv *statCacheValue) unmarshalBinary(data []byte) error { 274 if scv == nil { 275 return errors.New("Can't unmarshalBinary into a nil stat cache value") 276 } 277 if scv.Fingerprint != "" { 278 return errors.New("Can't unmarshalBinary into a non empty stat cache value") 279 } 280 281 parts := bytes.SplitN(data, pipe, 3) 282 if len(parts) != 3 { 283 return fmt.Errorf("Bogus stat cache value; was expecting fingerprint|blobSize|blobRef, got %q", data) 284 } 285 fingerprint := string(parts[0]) 286 buf := bytes.NewReader(parts[1]) 287 var size int32 288 err := binary.Read(buf, binary.BigEndian, &size) 289 if err != nil { 290 return fmt.Errorf("Could not decode blob size from stat cache value part %q: %v", parts[1], err) 291 } 292 br := new(blob.Ref) 293 if err := br.UnmarshalBinary(parts[2]); err != nil { 294 return fmt.Errorf("Could not unmarshalBinary for %q: %v", parts[2], err) 295 } 296 297 scv.Fingerprint = statFingerprint(fingerprint) 298 scv.Result = client.PutResult{ 299 BlobRef: *br, 300 Size: uint32(size), 301 Skipped: true, 302 } 303 return nil 304 } 305 306 func fullpath(pwd, filename string) string { 307 var fullPath string 308 if filepath.IsAbs(filename) { 309 fullPath = filepath.Clean(filename) 310 } else { 311 fullPath = filepath.Join(pwd, filename) 312 } 313 return fullPath 314 } 315 316 func escapeGen(gen string) string { 317 // Good enough: 318 return url.QueryEscape(gen) 319 } 320 321 var cleanSysStat func(v interface{}) interface{} 322 323 func fileInfoToFingerprint(fi os.FileInfo) statFingerprint { 324 // We calculate the CRC32 of the underlying system stat structure to get 325 // ctime, owner, group, etc. This is overkill (e.g. we don't care about 326 // the inode or device number probably), but works. 327 sysHash := uint32(0) 328 if sys := fi.Sys(); sys != nil { 329 if clean := cleanSysStat; clean != nil { 330 // TODO: don't clean bad fields, but provide a 331 // portable way to extract all good fields. 332 // This is a Linux+Mac-specific hack for now. 333 sys = clean(sys) 334 } 335 c32 := crc32.NewIEEE() 336 fmt.Fprintf(c32, "%#v", sys) 337 sysHash = c32.Sum32() 338 } 339 return statFingerprint(fmt.Sprintf("%dB/%dMOD/sys-%d", fi.Size(), fi.ModTime().UnixNano(), sysHash)) 340 } 341 342 // Delete stranded lock files and all but the oldest 5 343 // havecache/statcache files, unless they're newer than 30 days. 344 func cleanCacheDir() { 345 dir := osutil.CacheDir() 346 f, err := os.Open(dir) 347 if err != nil { 348 return 349 } 350 defer f.Close() 351 fis, err := f.Readdir(-1) 352 if err != nil { 353 return 354 } 355 var haveCache, statCache []os.FileInfo 356 seen := make(map[string]bool) 357 for _, fi := range fis { 358 seen[fi.Name()] = true 359 } 360 361 for name := range seen { 362 if strings.HasSuffix(name, ".lock") && !seen[strings.TrimSuffix(name, ".lock")] { 363 os.Remove(filepath.Join(dir, name)) 364 } 365 } 366 367 for _, fi := range fis { 368 if strings.HasSuffix(fi.Name(), ".lock") { 369 continue 370 } 371 if strings.HasPrefix(fi.Name(), "camput.havecache.") { 372 haveCache = append(haveCache, fi) 373 continue 374 } 375 if strings.HasPrefix(fi.Name(), "camput.statcache.") { 376 statCache = append(statCache, fi) 377 continue 378 } 379 } 380 for _, list := range [][]os.FileInfo{haveCache, statCache} { 381 if len(list) <= 5 { 382 continue 383 } 384 sort.Sort(byModtime(list)) 385 list = list[:len(list)-5] 386 for _, fi := range list { 387 if fi.ModTime().Before(time.Now().Add(-30 * 24 * time.Hour)) { 388 os.Remove(filepath.Join(dir, fi.Name())) 389 } 390 } 391 } 392 } 393 394 type byModtime []os.FileInfo 395 396 func (s byModtime) Len() int { return len(s) } 397 func (s byModtime) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 398 func (s byModtime) Less(i, j int) bool { return s[i].ModTime().Before(s[j].ModTime()) }