github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/cmd/camput/kvcache.go (about)

     1  /*
     2  Copyright 2013 The Camlistore Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/binary"
    22  	"errors"
    23  	"fmt"
    24  	"hash/crc32"
    25  	"log"
    26  	"net/url"
    27  	"os"
    28  	"path/filepath"
    29  	"sort"
    30  	"strconv"
    31  	"strings"
    32  	"time"
    33  
    34  	"camlistore.org/pkg/blob"
    35  	"camlistore.org/pkg/client"
    36  	"camlistore.org/pkg/kvutil"
    37  	"camlistore.org/pkg/osutil"
    38  	"camlistore.org/third_party/github.com/cznic/kv"
    39  )
    40  
    41  var errCacheMiss = errors.New("not in cache")
    42  
    43  // KvHaveCache is a HaveCache on top of a single
    44  // mutable database file on disk using github.com/cznic/kv.
    45  // It stores the blobref in binary as the key, and
    46  // the blobsize in binary as the value.
    47  // Access to the cache is restricted to one process
    48  // at a time with a lock file. Close should be called
    49  // to remove the lock.
    50  type KvHaveCache struct {
    51  	filename string
    52  	db       *kv.DB
    53  }
    54  
    55  func NewKvHaveCache(gen string) *KvHaveCache {
    56  	cleanCacheDir()
    57  	fullPath := filepath.Join(osutil.CacheDir(), "camput.havecache."+escapeGen(gen)+".kv")
    58  	db, err := kvutil.Open(fullPath, nil)
    59  	if err != nil {
    60  		log.Fatalf("Could not create/open new have cache at %v, %v", fullPath, err)
    61  	}
    62  	return &KvHaveCache{
    63  		filename: fullPath,
    64  		db:       db,
    65  	}
    66  }
    67  
    68  // Close should be called to commit all the writes
    69  // to the db and to unlock the file.
    70  func (c *KvHaveCache) Close() error {
    71  	return c.db.Close()
    72  }
    73  
    74  func (c *KvHaveCache) StatBlobCache(br blob.Ref) (size uint32, ok bool) {
    75  	if !br.Valid() {
    76  		return
    77  	}
    78  	binBr, _ := br.MarshalBinary()
    79  	binVal, err := c.db.Get(nil, binBr)
    80  	if err != nil {
    81  		log.Fatalf("Could not query have cache %v for %v: %v", c.filename, br, err)
    82  	}
    83  	if binVal == nil {
    84  		cachelog.Printf("have cache MISS on %v", br)
    85  		return
    86  	}
    87  	val, err := strconv.ParseUint(string(binVal), 10, 32)
    88  	if err != nil {
    89  		log.Fatalf("Could not decode have cache binary value for %v: %v", br, err)
    90  	}
    91  	if val < 0 {
    92  		log.Fatalf("Error decoding have cache binary value for %v: size=%d", br, val)
    93  	}
    94  	cachelog.Printf("have cache HIT on %v", br)
    95  	return uint32(val), true
    96  }
    97  
    98  func (c *KvHaveCache) NoteBlobExists(br blob.Ref, size uint32) {
    99  	if !br.Valid() {
   100  		return
   101  	}
   102  	if size < 0 {
   103  		log.Fatalf("Got a negative blob size to note in have cache for %v", br)
   104  	}
   105  	binBr, _ := br.MarshalBinary()
   106  	binVal := []byte(strconv.Itoa(int(size)))
   107  	cachelog.Printf("Adding to have cache %v: %q", br, binVal)
   108  	_, _, err := c.db.Put(nil, binBr,
   109  		func(binBr, old []byte) ([]byte, bool, error) {
   110  			// We do not overwrite dups
   111  			if old != nil {
   112  				return nil, false, nil
   113  			}
   114  			return binVal, true, nil
   115  		})
   116  	if err != nil {
   117  		log.Fatalf("Could not write %v in have cache: %v", br, err)
   118  	}
   119  }
   120  
   121  // KvStatCache is an UploadCache on top of a single
   122  // mutable database file on disk using github.com/cznic/kv.
   123  // It stores a binary combination of an os.FileInfo fingerprint and
   124  // a client.Putresult as the key, and the blobsize in binary as
   125  // the value.
   126  // Access to the cache is restricted to one process
   127  // at a time with a lock file. Close should be called
   128  // to remove the lock.
   129  type KvStatCache struct {
   130  	filename string
   131  	db       *kv.DB
   132  }
   133  
   134  func NewKvStatCache(gen string) *KvStatCache {
   135  	fullPath := filepath.Join(osutil.CacheDir(), "camput.statcache."+escapeGen(gen)+".kv")
   136  	db, err := kvutil.Open(fullPath, nil)
   137  	if err != nil {
   138  		log.Fatalf("Could not create/open new stat cache at %v, %v", fullPath, err)
   139  	}
   140  	return &KvStatCache{
   141  		filename: fullPath,
   142  		db:       db,
   143  	}
   144  }
   145  
   146  // Close should be called to commit all the writes
   147  // to the db and to unlock the file.
   148  func (c *KvStatCache) Close() error {
   149  	return c.db.Close()
   150  }
   151  
   152  func (c *KvStatCache) CachedPutResult(pwd, filename string, fi os.FileInfo, withPermanode bool) (*client.PutResult, error) {
   153  	fullPath := fullpath(pwd, filename)
   154  	cacheKey := &statCacheKey{
   155  		Filepath:  fullPath,
   156  		Permanode: withPermanode,
   157  	}
   158  	binKey, err := cacheKey.marshalBinary()
   159  	binVal, err := c.db.Get(nil, binKey)
   160  	if err != nil {
   161  		log.Fatalf("Could not query stat cache %v for %q: %v", binKey, fullPath, err)
   162  	}
   163  	if binVal == nil {
   164  		cachelog.Printf("stat cache MISS on %q", binKey)
   165  		return nil, errCacheMiss
   166  	}
   167  	val := &statCacheValue{}
   168  	if err = val.unmarshalBinary(binVal); err != nil {
   169  		return nil, fmt.Errorf("Bogus stat cached value for %q: %v", binKey, err)
   170  	}
   171  	fp := fileInfoToFingerprint(fi)
   172  	if val.Fingerprint != fp {
   173  		cachelog.Printf("cache MISS on %q: stats not equal:\n%#v\n%#v", binKey, val.Fingerprint, fp)
   174  		return nil, errCacheMiss
   175  	}
   176  	cachelog.Printf("stat cache HIT on %q", binKey)
   177  	return &val.Result, nil
   178  }
   179  
   180  func (c *KvStatCache) AddCachedPutResult(pwd, filename string, fi os.FileInfo, pr *client.PutResult, withPermanode bool) {
   181  	fullPath := fullpath(pwd, filename)
   182  	cacheKey := &statCacheKey{
   183  		Filepath:  fullPath,
   184  		Permanode: withPermanode,
   185  	}
   186  	val := &statCacheValue{fileInfoToFingerprint(fi), *pr}
   187  
   188  	binKey, err := cacheKey.marshalBinary()
   189  	if err != nil {
   190  		log.Fatalf("Could not add %q to stat cache: %v", binKey, err)
   191  	}
   192  	binVal, err := val.marshalBinary()
   193  	if err != nil {
   194  		log.Fatalf("Could not add %q to stat cache: %v", binKey, err)
   195  	}
   196  	cachelog.Printf("Adding to stat cache %q: %q", binKey, binVal)
   197  	_, _, err = c.db.Put(nil, binKey,
   198  		func(binKey, old []byte) ([]byte, bool, error) {
   199  			// We do not overwrite dups
   200  			if old != nil {
   201  				return nil, false, nil
   202  			}
   203  			return binVal, true, nil
   204  		})
   205  	if err != nil {
   206  		log.Fatalf("Could not add %q to stat cache: %v", binKey, err)
   207  	}
   208  }
   209  
   210  type statCacheKey struct {
   211  	Filepath  string
   212  	Permanode bool // whether -filenodes is being used.
   213  }
   214  
   215  // marshalBinary returns a more compact binary
   216  // representation of the contents of sk.
   217  func (sk *statCacheKey) marshalBinary() ([]byte, error) {
   218  	if sk == nil {
   219  		return nil, errors.New("Can not marshal from a nil stat cache key")
   220  	}
   221  	data := make([]byte, 0, len(sk.Filepath)+3)
   222  	data = append(data, 1) // version number
   223  	data = append(data, sk.Filepath...)
   224  	data = append(data, '|')
   225  	if sk.Permanode {
   226  		data = append(data, 1)
   227  	}
   228  	return data, nil
   229  }
   230  
   231  type statFingerprint string
   232  
   233  type statCacheValue struct {
   234  	Fingerprint statFingerprint
   235  	Result      client.PutResult
   236  }
   237  
   238  // marshalBinary returns a more compact binary
   239  // representation of the contents of scv.
   240  func (scv *statCacheValue) marshalBinary() ([]byte, error) {
   241  	if scv == nil {
   242  		return nil, errors.New("Can not marshal from a nil stat cache value")
   243  	}
   244  	binBr, _ := scv.Result.BlobRef.MarshalBinary()
   245  	// Blob size fits on 4 bytes when binary encoded
   246  	data := make([]byte, 0, len(scv.Fingerprint)+1+4+1+len(binBr))
   247  	buf := bytes.NewBuffer(data)
   248  	_, err := buf.WriteString(string(scv.Fingerprint))
   249  	if err != nil {
   250  		return nil, fmt.Errorf("Could not write fingerprint %v: %v", scv.Fingerprint, err)
   251  	}
   252  	err = buf.WriteByte('|')
   253  	if err != nil {
   254  		return nil, fmt.Errorf("Could not write '|': %v", err)
   255  	}
   256  	err = binary.Write(buf, binary.BigEndian, int32(scv.Result.Size))
   257  	if err != nil {
   258  		return nil, fmt.Errorf("Could not write blob size %d: %v", scv.Result.Size, err)
   259  	}
   260  	err = buf.WriteByte('|')
   261  	if err != nil {
   262  		return nil, fmt.Errorf("Could not write '|': %v", err)
   263  	}
   264  	_, err = buf.Write(binBr)
   265  	if err != nil {
   266  		return nil, fmt.Errorf("Could not write binary blobref %q: %v", binBr, err)
   267  	}
   268  	return buf.Bytes(), nil
   269  }
   270  
   271  var pipe = []byte("|")
   272  
   273  func (scv *statCacheValue) unmarshalBinary(data []byte) error {
   274  	if scv == nil {
   275  		return errors.New("Can't unmarshalBinary into a nil stat cache value")
   276  	}
   277  	if scv.Fingerprint != "" {
   278  		return errors.New("Can't unmarshalBinary into a non empty stat cache value")
   279  	}
   280  
   281  	parts := bytes.SplitN(data, pipe, 3)
   282  	if len(parts) != 3 {
   283  		return fmt.Errorf("Bogus stat cache value; was expecting fingerprint|blobSize|blobRef, got %q", data)
   284  	}
   285  	fingerprint := string(parts[0])
   286  	buf := bytes.NewReader(parts[1])
   287  	var size int32
   288  	err := binary.Read(buf, binary.BigEndian, &size)
   289  	if err != nil {
   290  		return fmt.Errorf("Could not decode blob size from stat cache value part %q: %v", parts[1], err)
   291  	}
   292  	br := new(blob.Ref)
   293  	if err := br.UnmarshalBinary(parts[2]); err != nil {
   294  		return fmt.Errorf("Could not unmarshalBinary for %q: %v", parts[2], err)
   295  	}
   296  
   297  	scv.Fingerprint = statFingerprint(fingerprint)
   298  	scv.Result = client.PutResult{
   299  		BlobRef: *br,
   300  		Size:    uint32(size),
   301  		Skipped: true,
   302  	}
   303  	return nil
   304  }
   305  
   306  func fullpath(pwd, filename string) string {
   307  	var fullPath string
   308  	if filepath.IsAbs(filename) {
   309  		fullPath = filepath.Clean(filename)
   310  	} else {
   311  		fullPath = filepath.Join(pwd, filename)
   312  	}
   313  	return fullPath
   314  }
   315  
   316  func escapeGen(gen string) string {
   317  	// Good enough:
   318  	return url.QueryEscape(gen)
   319  }
   320  
   321  var cleanSysStat func(v interface{}) interface{}
   322  
   323  func fileInfoToFingerprint(fi os.FileInfo) statFingerprint {
   324  	// We calculate the CRC32 of the underlying system stat structure to get
   325  	// ctime, owner, group, etc.  This is overkill (e.g. we don't care about
   326  	// the inode or device number probably), but works.
   327  	sysHash := uint32(0)
   328  	if sys := fi.Sys(); sys != nil {
   329  		if clean := cleanSysStat; clean != nil {
   330  			// TODO: don't clean bad fields, but provide a
   331  			// portable way to extract all good fields.
   332  			// This is a Linux+Mac-specific hack for now.
   333  			sys = clean(sys)
   334  		}
   335  		c32 := crc32.NewIEEE()
   336  		fmt.Fprintf(c32, "%#v", sys)
   337  		sysHash = c32.Sum32()
   338  	}
   339  	return statFingerprint(fmt.Sprintf("%dB/%dMOD/sys-%d", fi.Size(), fi.ModTime().UnixNano(), sysHash))
   340  }
   341  
   342  // Delete stranded lock files and all but the oldest 5
   343  // havecache/statcache files, unless they're newer than 30 days.
   344  func cleanCacheDir() {
   345  	dir := osutil.CacheDir()
   346  	f, err := os.Open(dir)
   347  	if err != nil {
   348  		return
   349  	}
   350  	defer f.Close()
   351  	fis, err := f.Readdir(-1)
   352  	if err != nil {
   353  		return
   354  	}
   355  	var haveCache, statCache []os.FileInfo
   356  	seen := make(map[string]bool)
   357  	for _, fi := range fis {
   358  		seen[fi.Name()] = true
   359  	}
   360  
   361  	for name := range seen {
   362  		if strings.HasSuffix(name, ".lock") && !seen[strings.TrimSuffix(name, ".lock")] {
   363  			os.Remove(filepath.Join(dir, name))
   364  		}
   365  	}
   366  
   367  	for _, fi := range fis {
   368  		if strings.HasSuffix(fi.Name(), ".lock") {
   369  			continue
   370  		}
   371  		if strings.HasPrefix(fi.Name(), "camput.havecache.") {
   372  			haveCache = append(haveCache, fi)
   373  			continue
   374  		}
   375  		if strings.HasPrefix(fi.Name(), "camput.statcache.") {
   376  			statCache = append(statCache, fi)
   377  			continue
   378  		}
   379  	}
   380  	for _, list := range [][]os.FileInfo{haveCache, statCache} {
   381  		if len(list) <= 5 {
   382  			continue
   383  		}
   384  		sort.Sort(byModtime(list))
   385  		list = list[:len(list)-5]
   386  		for _, fi := range list {
   387  			if fi.ModTime().Before(time.Now().Add(-30 * 24 * time.Hour)) {
   388  				os.Remove(filepath.Join(dir, fi.Name()))
   389  			}
   390  		}
   391  	}
   392  }
   393  
   394  type byModtime []os.FileInfo
   395  
   396  func (s byModtime) Len() int           { return len(s) }
   397  func (s byModtime) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
   398  func (s byModtime) Less(i, j int) bool { return s[i].ModTime().Before(s[j].ModTime()) }