github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/cmd/camput/kvcache.go (about) 1 /* 2 Copyright 2013 The Camlistore Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "errors" 23 "fmt" 24 "hash/crc32" 25 "log" 26 "net/url" 27 "os" 28 "path/filepath" 29 "strconv" 30 31 "camlistore.org/pkg/blob" 32 "camlistore.org/pkg/client" 33 "camlistore.org/pkg/kvutil" 34 "camlistore.org/pkg/osutil" 35 "camlistore.org/third_party/github.com/cznic/kv" 36 ) 37 38 var errCacheMiss = errors.New("not in cache") 39 40 // KvHaveCache is a HaveCache on top of a single 41 // mutable database file on disk using github.com/cznic/kv. 42 // It stores the blobref in binary as the key, and 43 // the blobsize in binary as the value. 44 // Access to the cache is restricted to one process 45 // at a time with a lock file. Close should be called 46 // to remove the lock. 47 type KvHaveCache struct { 48 filename string 49 db *kv.DB 50 } 51 52 func NewKvHaveCache(gen string) *KvHaveCache { 53 fullPath := filepath.Join(osutil.CacheDir(), "camput.havecache."+escapeGen(gen)+".kv") 54 db, err := kvutil.Open(fullPath, nil) 55 if err != nil { 56 log.Fatalf("Could not create/open new have cache at %v, %v", fullPath, err) 57 } 58 return &KvHaveCache{ 59 filename: fullPath, 60 db: db, 61 } 62 } 63 64 // Close should be called to commit all the writes 65 // to the db and to unlock the file. 66 func (c *KvHaveCache) Close() error { 67 return c.db.Close() 68 } 69 70 func (c *KvHaveCache) StatBlobCache(br blob.Ref) (size int64, ok bool) { 71 if !br.Valid() { 72 return 73 } 74 binBr, _ := br.MarshalBinary() 75 binVal, err := c.db.Get(nil, binBr) 76 if err != nil { 77 log.Fatalf("Could not query have cache %v for %v: %v", c.filename, br, err) 78 } 79 if binVal == nil { 80 cachelog.Printf("have cache MISS on %v", br) 81 return 82 } 83 val, err := strconv.Atoi(string(binVal)) 84 if err != nil { 85 log.Fatalf("Could not decode have cache binary value for %v: %v", br, err) 86 } 87 cachelog.Printf("have cache HIT on %v", br) 88 return int64(val), true 89 } 90 91 func (c *KvHaveCache) NoteBlobExists(br blob.Ref, size int64) { 92 if !br.Valid() { 93 return 94 } 95 if size < 0 { 96 log.Fatalf("Got a negative blob size to note in have cache for %v", br) 97 } 98 binBr, _ := br.MarshalBinary() 99 binVal := []byte(strconv.Itoa(int(size))) 100 cachelog.Printf("Adding to have cache %v: %q", br, binVal) 101 _, _, err := c.db.Put(nil, binBr, 102 func(binBr, old []byte) ([]byte, bool, error) { 103 // We do not overwrite dups 104 if old != nil { 105 return nil, false, nil 106 } 107 return binVal, true, nil 108 }) 109 if err != nil { 110 log.Fatalf("Could not write %v in have cache: %v", br, err) 111 } 112 } 113 114 // KvStatCache is an UploadCache on top of a single 115 // mutable database file on disk using github.com/cznic/kv. 116 // It stores a binary combination of an os.FileInfo fingerprint and 117 // a client.Putresult as the key, and the blobsize in binary as 118 // the value. 119 // Access to the cache is restricted to one process 120 // at a time with a lock file. Close should be called 121 // to remove the lock. 122 type KvStatCache struct { 123 filename string 124 db *kv.DB 125 } 126 127 func NewKvStatCache(gen string) *KvStatCache { 128 fullPath := filepath.Join(osutil.CacheDir(), "camput.statcache."+escapeGen(gen)+".kv") 129 db, err := kvutil.Open(fullPath, nil) 130 if err != nil { 131 log.Fatalf("Could not create/open new stat cache at %v, %v", fullPath, err) 132 } 133 return &KvStatCache{ 134 filename: fullPath, 135 db: db, 136 } 137 } 138 139 // Close should be called to commit all the writes 140 // to the db and to unlock the file. 141 func (c *KvStatCache) Close() error { 142 return c.db.Close() 143 } 144 145 func (c *KvStatCache) CachedPutResult(pwd, filename string, fi os.FileInfo, withPermanode bool) (*client.PutResult, error) { 146 fullPath := fullpath(pwd, filename) 147 cacheKey := &statCacheKey{ 148 Filepath: fullPath, 149 Permanode: withPermanode, 150 } 151 binKey, err := cacheKey.marshalBinary() 152 binVal, err := c.db.Get(nil, binKey) 153 if err != nil { 154 log.Fatalf("Could not query stat cache %v for %q: %v", binKey, fullPath, err) 155 } 156 if binVal == nil { 157 cachelog.Printf("stat cache MISS on %q", binKey) 158 return nil, errCacheMiss 159 } 160 val := &statCacheValue{} 161 if err = val.unmarshalBinary(binVal); err != nil { 162 return nil, fmt.Errorf("Bogus stat cached value for %q: %v", binKey, err) 163 } 164 fp := fileInfoToFingerprint(fi) 165 if val.Fingerprint != fp { 166 cachelog.Printf("cache MISS on %q: stats not equal:\n%#v\n%#v", binKey, val.Fingerprint, fp) 167 return nil, errCacheMiss 168 } 169 cachelog.Printf("stat cache HIT on %q", binKey) 170 return &val.Result, nil 171 } 172 173 func (c *KvStatCache) AddCachedPutResult(pwd, filename string, fi os.FileInfo, pr *client.PutResult, withPermanode bool) { 174 fullPath := fullpath(pwd, filename) 175 cacheKey := &statCacheKey{ 176 Filepath: fullPath, 177 Permanode: withPermanode, 178 } 179 val := &statCacheValue{fileInfoToFingerprint(fi), *pr} 180 181 binKey, err := cacheKey.marshalBinary() 182 if err != nil { 183 log.Fatalf("Could not add %q to stat cache: %v", binKey, err) 184 } 185 binVal, err := val.marshalBinary() 186 if err != nil { 187 log.Fatalf("Could not add %q to stat cache: %v", binKey, err) 188 } 189 cachelog.Printf("Adding to stat cache %q: %q", binKey, binVal) 190 _, _, err = c.db.Put(nil, binKey, 191 func(binKey, old []byte) ([]byte, bool, error) { 192 // We do not overwrite dups 193 if old != nil { 194 return nil, false, nil 195 } 196 return binVal, true, nil 197 }) 198 if err != nil { 199 log.Fatalf("Could not add %q to stat cache: %v", binKey, err) 200 } 201 } 202 203 type statCacheKey struct { 204 Filepath string 205 Permanode bool // whether -filenodes is being used. 206 } 207 208 // marshalBinary returns a more compact binary 209 // representation of the contents of sk. 210 func (sk *statCacheKey) marshalBinary() ([]byte, error) { 211 if sk == nil { 212 return nil, errors.New("Can not marshal from a nil stat cache key") 213 } 214 data := make([]byte, 0, len(sk.Filepath)+3) 215 data = append(data, 1) // version number 216 data = append(data, sk.Filepath...) 217 data = append(data, '|') 218 if sk.Permanode { 219 data = append(data, 1) 220 } 221 return data, nil 222 } 223 224 type statFingerprint string 225 226 type statCacheValue struct { 227 Fingerprint statFingerprint 228 Result client.PutResult 229 } 230 231 // marshalBinary returns a more compact binary 232 // representation of the contents of scv. 233 func (scv *statCacheValue) marshalBinary() ([]byte, error) { 234 if scv == nil { 235 return nil, errors.New("Can not marshal from a nil stat cache value") 236 } 237 binBr, _ := scv.Result.BlobRef.MarshalBinary() 238 // Blob size fits on 4 bytes when binary encoded 239 data := make([]byte, 0, len(scv.Fingerprint)+1+4+1+len(binBr)) 240 buf := bytes.NewBuffer(data) 241 _, err := buf.WriteString(string(scv.Fingerprint)) 242 if err != nil { 243 return nil, fmt.Errorf("Could not write fingerprint %v: %v", scv.Fingerprint, err) 244 } 245 err = buf.WriteByte('|') 246 if err != nil { 247 return nil, fmt.Errorf("Could not write '|': %v", err) 248 } 249 err = binary.Write(buf, binary.BigEndian, int32(scv.Result.Size)) 250 if err != nil { 251 return nil, fmt.Errorf("Could not write blob size %d: %v", scv.Result.Size, err) 252 } 253 err = buf.WriteByte('|') 254 if err != nil { 255 return nil, fmt.Errorf("Could not write '|': %v", err) 256 } 257 _, err = buf.Write(binBr) 258 if err != nil { 259 return nil, fmt.Errorf("Could not write binary blobref %q: %v", binBr, err) 260 } 261 return buf.Bytes(), nil 262 } 263 264 var pipe = []byte("|") 265 266 func (scv *statCacheValue) unmarshalBinary(data []byte) error { 267 if scv == nil { 268 return errors.New("Can't unmarshalBinary into a nil stat cache value") 269 } 270 if scv.Fingerprint != "" { 271 return errors.New("Can't unmarshalBinary into a non empty stat cache value") 272 } 273 274 parts := bytes.SplitN(data, pipe, 3) 275 if len(parts) != 3 { 276 return fmt.Errorf("Bogus stat cache value; was expecting fingerprint|blobSize|blobRef, got %q", data) 277 } 278 fingerprint := string(parts[0]) 279 buf := bytes.NewReader(parts[1]) 280 var size int32 281 err := binary.Read(buf, binary.BigEndian, &size) 282 if err != nil { 283 return fmt.Errorf("Could not decode blob size from stat cache value part %q: %v", parts[1], err) 284 } 285 br := new(blob.Ref) 286 if err := br.UnmarshalBinary(parts[2]); err != nil { 287 return fmt.Errorf("Could not unmarshalBinary for %q: %v", parts[2], err) 288 } 289 290 scv.Fingerprint = statFingerprint(fingerprint) 291 scv.Result = client.PutResult{ 292 BlobRef: *br, 293 Size: int64(size), 294 Skipped: true, 295 } 296 return nil 297 } 298 299 func fullpath(pwd, filename string) string { 300 var fullPath string 301 if filepath.IsAbs(filename) { 302 fullPath = filepath.Clean(filename) 303 } else { 304 fullPath = filepath.Join(pwd, filename) 305 } 306 return fullPath 307 } 308 309 func escapeGen(gen string) string { 310 // Good enough: 311 return url.QueryEscape(gen) 312 } 313 314 var cleanSysStat func(v interface{}) interface{} 315 316 func fileInfoToFingerprint(fi os.FileInfo) statFingerprint { 317 // We calculate the CRC32 of the underlying system stat structure to get 318 // ctime, owner, group, etc. This is overkill (e.g. we don't care about 319 // the inode or device number probably), but works. 320 sysHash := uint32(0) 321 if sys := fi.Sys(); sys != nil { 322 if clean := cleanSysStat; clean != nil { 323 // TODO: don't clean bad fields, but provide a 324 // portable way to extract all good fields. 325 // This is a Linux+Mac-specific hack for now. 326 sys = clean(sys) 327 } 328 c32 := crc32.NewIEEE() 329 fmt.Fprintf(c32, "%#v", sys) 330 sysHash = c32.Sum32() 331 } 332 return statFingerprint(fmt.Sprintf("%dB/%dMOD/sys-%d", fi.Size(), fi.ModTime().UnixNano(), sysHash)) 333 }