github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/cmd/camput/kvcache.go (about)

     1  /*
     2  Copyright 2013 The Camlistore Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/binary"
    22  	"errors"
    23  	"fmt"
    24  	"hash/crc32"
    25  	"log"
    26  	"net/url"
    27  	"os"
    28  	"path/filepath"
    29  	"strconv"
    30  
    31  	"camlistore.org/pkg/blob"
    32  	"camlistore.org/pkg/client"
    33  	"camlistore.org/pkg/kvutil"
    34  	"camlistore.org/pkg/osutil"
    35  	"camlistore.org/third_party/github.com/cznic/kv"
    36  )
    37  
    38  var errCacheMiss = errors.New("not in cache")
    39  
    40  // KvHaveCache is a HaveCache on top of a single
    41  // mutable database file on disk using github.com/cznic/kv.
    42  // It stores the blobref in binary as the key, and
    43  // the blobsize in binary as the value.
    44  // Access to the cache is restricted to one process
    45  // at a time with a lock file. Close should be called
    46  // to remove the lock.
    47  type KvHaveCache struct {
    48  	filename string
    49  	db       *kv.DB
    50  }
    51  
    52  func NewKvHaveCache(gen string) *KvHaveCache {
    53  	fullPath := filepath.Join(osutil.CacheDir(), "camput.havecache."+escapeGen(gen)+".kv")
    54  	db, err := kvutil.Open(fullPath, nil)
    55  	if err != nil {
    56  		log.Fatalf("Could not create/open new have cache at %v, %v", fullPath, err)
    57  	}
    58  	return &KvHaveCache{
    59  		filename: fullPath,
    60  		db:       db,
    61  	}
    62  }
    63  
    64  // Close should be called to commit all the writes
    65  // to the db and to unlock the file.
    66  func (c *KvHaveCache) Close() error {
    67  	return c.db.Close()
    68  }
    69  
    70  func (c *KvHaveCache) StatBlobCache(br blob.Ref) (size int64, ok bool) {
    71  	if !br.Valid() {
    72  		return
    73  	}
    74  	binBr, _ := br.MarshalBinary()
    75  	binVal, err := c.db.Get(nil, binBr)
    76  	if err != nil {
    77  		log.Fatalf("Could not query have cache %v for %v: %v", c.filename, br, err)
    78  	}
    79  	if binVal == nil {
    80  		cachelog.Printf("have cache MISS on %v", br)
    81  		return
    82  	}
    83  	val, err := strconv.Atoi(string(binVal))
    84  	if err != nil {
    85  		log.Fatalf("Could not decode have cache binary value for %v: %v", br, err)
    86  	}
    87  	cachelog.Printf("have cache HIT on %v", br)
    88  	return int64(val), true
    89  }
    90  
    91  func (c *KvHaveCache) NoteBlobExists(br blob.Ref, size int64) {
    92  	if !br.Valid() {
    93  		return
    94  	}
    95  	if size < 0 {
    96  		log.Fatalf("Got a negative blob size to note in have cache for %v", br)
    97  	}
    98  	binBr, _ := br.MarshalBinary()
    99  	binVal := []byte(strconv.Itoa(int(size)))
   100  	cachelog.Printf("Adding to have cache %v: %q", br, binVal)
   101  	_, _, err := c.db.Put(nil, binBr,
   102  		func(binBr, old []byte) ([]byte, bool, error) {
   103  			// We do not overwrite dups
   104  			if old != nil {
   105  				return nil, false, nil
   106  			}
   107  			return binVal, true, nil
   108  		})
   109  	if err != nil {
   110  		log.Fatalf("Could not write %v in have cache: %v", br, err)
   111  	}
   112  }
   113  
   114  // KvStatCache is an UploadCache on top of a single
   115  // mutable database file on disk using github.com/cznic/kv.
   116  // It stores a binary combination of an os.FileInfo fingerprint and
   117  // a client.Putresult as the key, and the blobsize in binary as
   118  // the value.
   119  // Access to the cache is restricted to one process
   120  // at a time with a lock file. Close should be called
   121  // to remove the lock.
   122  type KvStatCache struct {
   123  	filename string
   124  	db       *kv.DB
   125  }
   126  
   127  func NewKvStatCache(gen string) *KvStatCache {
   128  	fullPath := filepath.Join(osutil.CacheDir(), "camput.statcache."+escapeGen(gen)+".kv")
   129  	db, err := kvutil.Open(fullPath, nil)
   130  	if err != nil {
   131  		log.Fatalf("Could not create/open new stat cache at %v, %v", fullPath, err)
   132  	}
   133  	return &KvStatCache{
   134  		filename: fullPath,
   135  		db:       db,
   136  	}
   137  }
   138  
   139  // Close should be called to commit all the writes
   140  // to the db and to unlock the file.
   141  func (c *KvStatCache) Close() error {
   142  	return c.db.Close()
   143  }
   144  
   145  func (c *KvStatCache) CachedPutResult(pwd, filename string, fi os.FileInfo, withPermanode bool) (*client.PutResult, error) {
   146  	fullPath := fullpath(pwd, filename)
   147  	cacheKey := &statCacheKey{
   148  		Filepath:  fullPath,
   149  		Permanode: withPermanode,
   150  	}
   151  	binKey, err := cacheKey.marshalBinary()
   152  	binVal, err := c.db.Get(nil, binKey)
   153  	if err != nil {
   154  		log.Fatalf("Could not query stat cache %v for %q: %v", binKey, fullPath, err)
   155  	}
   156  	if binVal == nil {
   157  		cachelog.Printf("stat cache MISS on %q", binKey)
   158  		return nil, errCacheMiss
   159  	}
   160  	val := &statCacheValue{}
   161  	if err = val.unmarshalBinary(binVal); err != nil {
   162  		return nil, fmt.Errorf("Bogus stat cached value for %q: %v", binKey, err)
   163  	}
   164  	fp := fileInfoToFingerprint(fi)
   165  	if val.Fingerprint != fp {
   166  		cachelog.Printf("cache MISS on %q: stats not equal:\n%#v\n%#v", binKey, val.Fingerprint, fp)
   167  		return nil, errCacheMiss
   168  	}
   169  	cachelog.Printf("stat cache HIT on %q", binKey)
   170  	return &val.Result, nil
   171  }
   172  
   173  func (c *KvStatCache) AddCachedPutResult(pwd, filename string, fi os.FileInfo, pr *client.PutResult, withPermanode bool) {
   174  	fullPath := fullpath(pwd, filename)
   175  	cacheKey := &statCacheKey{
   176  		Filepath:  fullPath,
   177  		Permanode: withPermanode,
   178  	}
   179  	val := &statCacheValue{fileInfoToFingerprint(fi), *pr}
   180  
   181  	binKey, err := cacheKey.marshalBinary()
   182  	if err != nil {
   183  		log.Fatalf("Could not add %q to stat cache: %v", binKey, err)
   184  	}
   185  	binVal, err := val.marshalBinary()
   186  	if err != nil {
   187  		log.Fatalf("Could not add %q to stat cache: %v", binKey, err)
   188  	}
   189  	cachelog.Printf("Adding to stat cache %q: %q", binKey, binVal)
   190  	_, _, err = c.db.Put(nil, binKey,
   191  		func(binKey, old []byte) ([]byte, bool, error) {
   192  			// We do not overwrite dups
   193  			if old != nil {
   194  				return nil, false, nil
   195  			}
   196  			return binVal, true, nil
   197  		})
   198  	if err != nil {
   199  		log.Fatalf("Could not add %q to stat cache: %v", binKey, err)
   200  	}
   201  }
   202  
   203  type statCacheKey struct {
   204  	Filepath  string
   205  	Permanode bool // whether -filenodes is being used.
   206  }
   207  
   208  // marshalBinary returns a more compact binary
   209  // representation of the contents of sk.
   210  func (sk *statCacheKey) marshalBinary() ([]byte, error) {
   211  	if sk == nil {
   212  		return nil, errors.New("Can not marshal from a nil stat cache key")
   213  	}
   214  	data := make([]byte, 0, len(sk.Filepath)+3)
   215  	data = append(data, 1) // version number
   216  	data = append(data, sk.Filepath...)
   217  	data = append(data, '|')
   218  	if sk.Permanode {
   219  		data = append(data, 1)
   220  	}
   221  	return data, nil
   222  }
   223  
   224  type statFingerprint string
   225  
   226  type statCacheValue struct {
   227  	Fingerprint statFingerprint
   228  	Result      client.PutResult
   229  }
   230  
   231  // marshalBinary returns a more compact binary
   232  // representation of the contents of scv.
   233  func (scv *statCacheValue) marshalBinary() ([]byte, error) {
   234  	if scv == nil {
   235  		return nil, errors.New("Can not marshal from a nil stat cache value")
   236  	}
   237  	binBr, _ := scv.Result.BlobRef.MarshalBinary()
   238  	// Blob size fits on 4 bytes when binary encoded
   239  	data := make([]byte, 0, len(scv.Fingerprint)+1+4+1+len(binBr))
   240  	buf := bytes.NewBuffer(data)
   241  	_, err := buf.WriteString(string(scv.Fingerprint))
   242  	if err != nil {
   243  		return nil, fmt.Errorf("Could not write fingerprint %v: %v", scv.Fingerprint, err)
   244  	}
   245  	err = buf.WriteByte('|')
   246  	if err != nil {
   247  		return nil, fmt.Errorf("Could not write '|': %v", err)
   248  	}
   249  	err = binary.Write(buf, binary.BigEndian, int32(scv.Result.Size))
   250  	if err != nil {
   251  		return nil, fmt.Errorf("Could not write blob size %d: %v", scv.Result.Size, err)
   252  	}
   253  	err = buf.WriteByte('|')
   254  	if err != nil {
   255  		return nil, fmt.Errorf("Could not write '|': %v", err)
   256  	}
   257  	_, err = buf.Write(binBr)
   258  	if err != nil {
   259  		return nil, fmt.Errorf("Could not write binary blobref %q: %v", binBr, err)
   260  	}
   261  	return buf.Bytes(), nil
   262  }
   263  
   264  var pipe = []byte("|")
   265  
   266  func (scv *statCacheValue) unmarshalBinary(data []byte) error {
   267  	if scv == nil {
   268  		return errors.New("Can't unmarshalBinary into a nil stat cache value")
   269  	}
   270  	if scv.Fingerprint != "" {
   271  		return errors.New("Can't unmarshalBinary into a non empty stat cache value")
   272  	}
   273  
   274  	parts := bytes.SplitN(data, pipe, 3)
   275  	if len(parts) != 3 {
   276  		return fmt.Errorf("Bogus stat cache value; was expecting fingerprint|blobSize|blobRef, got %q", data)
   277  	}
   278  	fingerprint := string(parts[0])
   279  	buf := bytes.NewReader(parts[1])
   280  	var size int32
   281  	err := binary.Read(buf, binary.BigEndian, &size)
   282  	if err != nil {
   283  		return fmt.Errorf("Could not decode blob size from stat cache value part %q: %v", parts[1], err)
   284  	}
   285  	br := new(blob.Ref)
   286  	if err := br.UnmarshalBinary(parts[2]); err != nil {
   287  		return fmt.Errorf("Could not unmarshalBinary for %q: %v", parts[2], err)
   288  	}
   289  
   290  	scv.Fingerprint = statFingerprint(fingerprint)
   291  	scv.Result = client.PutResult{
   292  		BlobRef: *br,
   293  		Size:    int64(size),
   294  		Skipped: true,
   295  	}
   296  	return nil
   297  }
   298  
   299  func fullpath(pwd, filename string) string {
   300  	var fullPath string
   301  	if filepath.IsAbs(filename) {
   302  		fullPath = filepath.Clean(filename)
   303  	} else {
   304  		fullPath = filepath.Join(pwd, filename)
   305  	}
   306  	return fullPath
   307  }
   308  
   309  func escapeGen(gen string) string {
   310  	// Good enough:
   311  	return url.QueryEscape(gen)
   312  }
   313  
   314  var cleanSysStat func(v interface{}) interface{}
   315  
   316  func fileInfoToFingerprint(fi os.FileInfo) statFingerprint {
   317  	// We calculate the CRC32 of the underlying system stat structure to get
   318  	// ctime, owner, group, etc.  This is overkill (e.g. we don't care about
   319  	// the inode or device number probably), but works.
   320  	sysHash := uint32(0)
   321  	if sys := fi.Sys(); sys != nil {
   322  		if clean := cleanSysStat; clean != nil {
   323  			// TODO: don't clean bad fields, but provide a
   324  			// portable way to extract all good fields.
   325  			// This is a Linux+Mac-specific hack for now.
   326  			sys = clean(sys)
   327  		}
   328  		c32 := crc32.NewIEEE()
   329  		fmt.Fprintf(c32, "%#v", sys)
   330  		sysHash = c32.Sum32()
   331  	}
   332  	return statFingerprint(fmt.Sprintf("%dB/%dMOD/sys-%d", fi.Size(), fi.ModTime().UnixNano(), sysHash))
   333  }