github.com/peterbourgon/diskv@v2.0.1+incompatible/diskv.go (about) 1 // Diskv (disk-vee) is a simple, persistent, key-value store. 2 // It stores all data flatly on the filesystem. 3 4 package diskv 5 6 import ( 7 "bytes" 8 "errors" 9 "fmt" 10 "io" 11 "io/ioutil" 12 "os" 13 "path/filepath" 14 "strings" 15 "sync" 16 "syscall" 17 ) 18 19 const ( 20 defaultBasePath = "diskv" 21 defaultFilePerm os.FileMode = 0666 22 defaultPathPerm os.FileMode = 0777 23 ) 24 25 var ( 26 defaultTransform = func(s string) []string { return []string{} } 27 errCanceled = errors.New("canceled") 28 errEmptyKey = errors.New("empty key") 29 errBadKey = errors.New("bad key") 30 errImportDirectory = errors.New("can't import a directory") 31 ) 32 33 // TransformFunction transforms a key into a slice of strings, with each 34 // element in the slice representing a directory in the file path where the 35 // key's entry will eventually be stored. 36 // 37 // For example, if TransformFunc transforms "abcdef" to ["ab", "cde", "f"], 38 // the final location of the data file will be <basedir>/ab/cde/f/abcdef 39 type TransformFunction func(s string) []string 40 41 // Options define a set of properties that dictate Diskv behavior. 42 // All values are optional. 43 type Options struct { 44 BasePath string 45 Transform TransformFunction 46 CacheSizeMax uint64 // bytes 47 PathPerm os.FileMode 48 FilePerm os.FileMode 49 // If TempDir is set, it will enable filesystem atomic writes by 50 // writing temporary files to that location before being moved 51 // to BasePath. 52 // Note that TempDir MUST be on the same device/partition as 53 // BasePath. 54 TempDir string 55 56 Index Index 57 IndexLess LessFunction 58 59 Compression Compression 60 } 61 62 // Diskv implements the Diskv interface. You shouldn't construct Diskv 63 // structures directly; instead, use the New constructor. 64 type Diskv struct { 65 Options 66 mu sync.RWMutex 67 cache map[string][]byte 68 cacheSize uint64 69 } 70 71 // New returns an initialized Diskv structure, ready to use. 72 // If the path identified by baseDir already contains data, 73 // it will be accessible, but not yet cached. 74 func New(o Options) *Diskv { 75 if o.BasePath == "" { 76 o.BasePath = defaultBasePath 77 } 78 if o.Transform == nil { 79 o.Transform = defaultTransform 80 } 81 if o.PathPerm == 0 { 82 o.PathPerm = defaultPathPerm 83 } 84 if o.FilePerm == 0 { 85 o.FilePerm = defaultFilePerm 86 } 87 88 d := &Diskv{ 89 Options: o, 90 cache: map[string][]byte{}, 91 cacheSize: 0, 92 } 93 94 if d.Index != nil && d.IndexLess != nil { 95 d.Index.Initialize(d.IndexLess, d.Keys(nil)) 96 } 97 98 return d 99 } 100 101 // Write synchronously writes the key-value pair to disk, making it immediately 102 // available for reads. Write relies on the filesystem to perform an eventual 103 // sync to physical media. If you need stronger guarantees, see WriteStream. 104 func (d *Diskv) Write(key string, val []byte) error { 105 return d.WriteStream(key, bytes.NewBuffer(val), false) 106 } 107 108 // WriteStream writes the data represented by the io.Reader to the disk, under 109 // the provided key. If sync is true, WriteStream performs an explicit sync on 110 // the file as soon as it's written. 111 // 112 // bytes.Buffer provides io.Reader semantics for basic data types. 113 func (d *Diskv) WriteStream(key string, r io.Reader, sync bool) error { 114 if len(key) <= 0 { 115 return errEmptyKey 116 } 117 118 d.mu.Lock() 119 defer d.mu.Unlock() 120 121 return d.writeStreamWithLock(key, r, sync) 122 } 123 124 // createKeyFileWithLock either creates the key file directly, or 125 // creates a temporary file in TempDir if it is set. 126 func (d *Diskv) createKeyFileWithLock(key string) (*os.File, error) { 127 if d.TempDir != "" { 128 if err := os.MkdirAll(d.TempDir, d.PathPerm); err != nil { 129 return nil, fmt.Errorf("temp mkdir: %s", err) 130 } 131 f, err := ioutil.TempFile(d.TempDir, "") 132 if err != nil { 133 return nil, fmt.Errorf("temp file: %s", err) 134 } 135 136 if err := f.Chmod(d.FilePerm); err != nil { 137 f.Close() // error deliberately ignored 138 os.Remove(f.Name()) // error deliberately ignored 139 return nil, fmt.Errorf("chmod: %s", err) 140 } 141 return f, nil 142 } 143 144 mode := os.O_WRONLY | os.O_CREATE | os.O_TRUNC // overwrite if exists 145 f, err := os.OpenFile(d.completeFilename(key), mode, d.FilePerm) 146 if err != nil { 147 return nil, fmt.Errorf("open file: %s", err) 148 } 149 return f, nil 150 } 151 152 // writeStream does no input validation checking. 153 func (d *Diskv) writeStreamWithLock(key string, r io.Reader, sync bool) error { 154 if err := d.ensurePathWithLock(key); err != nil { 155 return fmt.Errorf("ensure path: %s", err) 156 } 157 158 f, err := d.createKeyFileWithLock(key) 159 if err != nil { 160 return fmt.Errorf("create key file: %s", err) 161 } 162 163 wc := io.WriteCloser(&nopWriteCloser{f}) 164 if d.Compression != nil { 165 wc, err = d.Compression.Writer(f) 166 if err != nil { 167 f.Close() // error deliberately ignored 168 os.Remove(f.Name()) // error deliberately ignored 169 return fmt.Errorf("compression writer: %s", err) 170 } 171 } 172 173 if _, err := io.Copy(wc, r); err != nil { 174 f.Close() // error deliberately ignored 175 os.Remove(f.Name()) // error deliberately ignored 176 return fmt.Errorf("i/o copy: %s", err) 177 } 178 179 if err := wc.Close(); err != nil { 180 f.Close() // error deliberately ignored 181 os.Remove(f.Name()) // error deliberately ignored 182 return fmt.Errorf("compression close: %s", err) 183 } 184 185 if sync { 186 if err := f.Sync(); err != nil { 187 f.Close() // error deliberately ignored 188 os.Remove(f.Name()) // error deliberately ignored 189 return fmt.Errorf("file sync: %s", err) 190 } 191 } 192 193 if err := f.Close(); err != nil { 194 return fmt.Errorf("file close: %s", err) 195 } 196 197 if f.Name() != d.completeFilename(key) { 198 if err := os.Rename(f.Name(), d.completeFilename(key)); err != nil { 199 os.Remove(f.Name()) // error deliberately ignored 200 return fmt.Errorf("rename: %s", err) 201 } 202 } 203 204 if d.Index != nil { 205 d.Index.Insert(key) 206 } 207 208 d.bustCacheWithLock(key) // cache only on read 209 210 return nil 211 } 212 213 // Import imports the source file into diskv under the destination key. If the 214 // destination key already exists, it's overwritten. If move is true, the 215 // source file is removed after a successful import. 216 func (d *Diskv) Import(srcFilename, dstKey string, move bool) (err error) { 217 if dstKey == "" { 218 return errEmptyKey 219 } 220 221 if fi, err := os.Stat(srcFilename); err != nil { 222 return err 223 } else if fi.IsDir() { 224 return errImportDirectory 225 } 226 227 d.mu.Lock() 228 defer d.mu.Unlock() 229 230 if err := d.ensurePathWithLock(dstKey); err != nil { 231 return fmt.Errorf("ensure path: %s", err) 232 } 233 234 if move { 235 if err := syscall.Rename(srcFilename, d.completeFilename(dstKey)); err == nil { 236 d.bustCacheWithLock(dstKey) 237 return nil 238 } else if err != syscall.EXDEV { 239 // If it failed due to being on a different device, fall back to copying 240 return err 241 } 242 } 243 244 f, err := os.Open(srcFilename) 245 if err != nil { 246 return err 247 } 248 defer f.Close() 249 err = d.writeStreamWithLock(dstKey, f, false) 250 if err == nil && move { 251 err = os.Remove(srcFilename) 252 } 253 return err 254 } 255 256 // Read reads the key and returns the value. 257 // If the key is available in the cache, Read won't touch the disk. 258 // If the key is not in the cache, Read will have the side-effect of 259 // lazily caching the value. 260 func (d *Diskv) Read(key string) ([]byte, error) { 261 rc, err := d.ReadStream(key, false) 262 if err != nil { 263 return []byte{}, err 264 } 265 defer rc.Close() 266 return ioutil.ReadAll(rc) 267 } 268 269 // ReadStream reads the key and returns the value (data) as an io.ReadCloser. 270 // If the value is cached from a previous read, and direct is false, 271 // ReadStream will use the cached value. Otherwise, it will return a handle to 272 // the file on disk, and cache the data on read. 273 // 274 // If direct is true, ReadStream will lazily delete any cached value for the 275 // key, and return a direct handle to the file on disk. 276 // 277 // If compression is enabled, ReadStream taps into the io.Reader stream prior 278 // to decompression, and caches the compressed data. 279 func (d *Diskv) ReadStream(key string, direct bool) (io.ReadCloser, error) { 280 d.mu.RLock() 281 defer d.mu.RUnlock() 282 283 if val, ok := d.cache[key]; ok { 284 if !direct { 285 buf := bytes.NewBuffer(val) 286 if d.Compression != nil { 287 return d.Compression.Reader(buf) 288 } 289 return ioutil.NopCloser(buf), nil 290 } 291 292 go func() { 293 d.mu.Lock() 294 defer d.mu.Unlock() 295 d.uncacheWithLock(key, uint64(len(val))) 296 }() 297 } 298 299 return d.readWithRLock(key) 300 } 301 302 // read ignores the cache, and returns an io.ReadCloser representing the 303 // decompressed data for the given key, streamed from the disk. Clients should 304 // acquire a read lock on the Diskv and check the cache themselves before 305 // calling read. 306 func (d *Diskv) readWithRLock(key string) (io.ReadCloser, error) { 307 filename := d.completeFilename(key) 308 309 fi, err := os.Stat(filename) 310 if err != nil { 311 return nil, err 312 } 313 if fi.IsDir() { 314 return nil, os.ErrNotExist 315 } 316 317 f, err := os.Open(filename) 318 if err != nil { 319 return nil, err 320 } 321 322 var r io.Reader 323 if d.CacheSizeMax > 0 { 324 r = newSiphon(f, d, key) 325 } else { 326 r = &closingReader{f} 327 } 328 329 var rc = io.ReadCloser(ioutil.NopCloser(r)) 330 if d.Compression != nil { 331 rc, err = d.Compression.Reader(r) 332 if err != nil { 333 return nil, err 334 } 335 } 336 337 return rc, nil 338 } 339 340 // closingReader provides a Reader that automatically closes the 341 // embedded ReadCloser when it reaches EOF 342 type closingReader struct { 343 rc io.ReadCloser 344 } 345 346 func (cr closingReader) Read(p []byte) (int, error) { 347 n, err := cr.rc.Read(p) 348 if err == io.EOF { 349 if closeErr := cr.rc.Close(); closeErr != nil { 350 return n, closeErr // close must succeed for Read to succeed 351 } 352 } 353 return n, err 354 } 355 356 // siphon is like a TeeReader: it copies all data read through it to an 357 // internal buffer, and moves that buffer to the cache at EOF. 358 type siphon struct { 359 f *os.File 360 d *Diskv 361 key string 362 buf *bytes.Buffer 363 } 364 365 // newSiphon constructs a siphoning reader that represents the passed file. 366 // When a successful series of reads ends in an EOF, the siphon will write 367 // the buffered data to Diskv's cache under the given key. 368 func newSiphon(f *os.File, d *Diskv, key string) io.Reader { 369 return &siphon{ 370 f: f, 371 d: d, 372 key: key, 373 buf: &bytes.Buffer{}, 374 } 375 } 376 377 // Read implements the io.Reader interface for siphon. 378 func (s *siphon) Read(p []byte) (int, error) { 379 n, err := s.f.Read(p) 380 381 if err == nil { 382 return s.buf.Write(p[0:n]) // Write must succeed for Read to succeed 383 } 384 385 if err == io.EOF { 386 s.d.cacheWithoutLock(s.key, s.buf.Bytes()) // cache may fail 387 if closeErr := s.f.Close(); closeErr != nil { 388 return n, closeErr // close must succeed for Read to succeed 389 } 390 return n, err 391 } 392 393 return n, err 394 } 395 396 // Erase synchronously erases the given key from the disk and the cache. 397 func (d *Diskv) Erase(key string) error { 398 d.mu.Lock() 399 defer d.mu.Unlock() 400 401 d.bustCacheWithLock(key) 402 403 // erase from index 404 if d.Index != nil { 405 d.Index.Delete(key) 406 } 407 408 // erase from disk 409 filename := d.completeFilename(key) 410 if s, err := os.Stat(filename); err == nil { 411 if s.IsDir() { 412 return errBadKey 413 } 414 if err = os.Remove(filename); err != nil { 415 return err 416 } 417 } else { 418 // Return err as-is so caller can do os.IsNotExist(err). 419 return err 420 } 421 422 // clean up and return 423 d.pruneDirsWithLock(key) 424 return nil 425 } 426 427 // EraseAll will delete all of the data from the store, both in the cache and on 428 // the disk. Note that EraseAll doesn't distinguish diskv-related data from non- 429 // diskv-related data. Care should be taken to always specify a diskv base 430 // directory that is exclusively for diskv data. 431 func (d *Diskv) EraseAll() error { 432 d.mu.Lock() 433 defer d.mu.Unlock() 434 d.cache = make(map[string][]byte) 435 d.cacheSize = 0 436 if d.TempDir != "" { 437 os.RemoveAll(d.TempDir) // errors ignored 438 } 439 return os.RemoveAll(d.BasePath) 440 } 441 442 // Has returns true if the given key exists. 443 func (d *Diskv) Has(key string) bool { 444 d.mu.Lock() 445 defer d.mu.Unlock() 446 447 if _, ok := d.cache[key]; ok { 448 return true 449 } 450 451 filename := d.completeFilename(key) 452 s, err := os.Stat(filename) 453 if err != nil { 454 return false 455 } 456 if s.IsDir() { 457 return false 458 } 459 460 return true 461 } 462 463 // Keys returns a channel that will yield every key accessible by the store, 464 // in undefined order. If a cancel channel is provided, closing it will 465 // terminate and close the keys channel. 466 func (d *Diskv) Keys(cancel <-chan struct{}) <-chan string { 467 return d.KeysPrefix("", cancel) 468 } 469 470 // KeysPrefix returns a channel that will yield every key accessible by the 471 // store with the given prefix, in undefined order. If a cancel channel is 472 // provided, closing it will terminate and close the keys channel. If the 473 // provided prefix is the empty string, all keys will be yielded. 474 func (d *Diskv) KeysPrefix(prefix string, cancel <-chan struct{}) <-chan string { 475 var prepath string 476 if prefix == "" { 477 prepath = d.BasePath 478 } else { 479 prepath = d.pathFor(prefix) 480 } 481 c := make(chan string) 482 go func() { 483 filepath.Walk(prepath, walker(c, prefix, cancel)) 484 close(c) 485 }() 486 return c 487 } 488 489 // walker returns a function which satisfies the filepath.WalkFunc interface. 490 // It sends every non-directory file entry down the channel c. 491 func walker(c chan<- string, prefix string, cancel <-chan struct{}) filepath.WalkFunc { 492 return func(path string, info os.FileInfo, err error) error { 493 if err != nil { 494 return err 495 } 496 497 if info.IsDir() || !strings.HasPrefix(info.Name(), prefix) { 498 return nil // "pass" 499 } 500 501 select { 502 case c <- info.Name(): 503 case <-cancel: 504 return errCanceled 505 } 506 507 return nil 508 } 509 } 510 511 // pathFor returns the absolute path for location on the filesystem where the 512 // data for the given key will be stored. 513 func (d *Diskv) pathFor(key string) string { 514 return filepath.Join(d.BasePath, filepath.Join(d.Transform(key)...)) 515 } 516 517 // ensurePathWithLock is a helper function that generates all necessary 518 // directories on the filesystem for the given key. 519 func (d *Diskv) ensurePathWithLock(key string) error { 520 return os.MkdirAll(d.pathFor(key), d.PathPerm) 521 } 522 523 // completeFilename returns the absolute path to the file for the given key. 524 func (d *Diskv) completeFilename(key string) string { 525 return filepath.Join(d.pathFor(key), key) 526 } 527 528 // cacheWithLock attempts to cache the given key-value pair in the store's 529 // cache. It can fail if the value is larger than the cache's maximum size. 530 func (d *Diskv) cacheWithLock(key string, val []byte) error { 531 valueSize := uint64(len(val)) 532 if err := d.ensureCacheSpaceWithLock(valueSize); err != nil { 533 return fmt.Errorf("%s; not caching", err) 534 } 535 536 // be very strict about memory guarantees 537 if (d.cacheSize + valueSize) > d.CacheSizeMax { 538 panic(fmt.Sprintf("failed to make room for value (%d/%d)", valueSize, d.CacheSizeMax)) 539 } 540 541 d.cache[key] = val 542 d.cacheSize += valueSize 543 return nil 544 } 545 546 // cacheWithoutLock acquires the store's (write) mutex and calls cacheWithLock. 547 func (d *Diskv) cacheWithoutLock(key string, val []byte) error { 548 d.mu.Lock() 549 defer d.mu.Unlock() 550 return d.cacheWithLock(key, val) 551 } 552 553 func (d *Diskv) bustCacheWithLock(key string) { 554 if val, ok := d.cache[key]; ok { 555 d.uncacheWithLock(key, uint64(len(val))) 556 } 557 } 558 559 func (d *Diskv) uncacheWithLock(key string, sz uint64) { 560 d.cacheSize -= sz 561 delete(d.cache, key) 562 } 563 564 // pruneDirsWithLock deletes empty directories in the path walk leading to the 565 // key k. Typically this function is called after an Erase is made. 566 func (d *Diskv) pruneDirsWithLock(key string) error { 567 pathlist := d.Transform(key) 568 for i := range pathlist { 569 dir := filepath.Join(d.BasePath, filepath.Join(pathlist[:len(pathlist)-i]...)) 570 571 // thanks to Steven Blenkinsop for this snippet 572 switch fi, err := os.Stat(dir); true { 573 case err != nil: 574 return err 575 case !fi.IsDir(): 576 panic(fmt.Sprintf("corrupt dirstate at %s", dir)) 577 } 578 579 nlinks, err := filepath.Glob(filepath.Join(dir, "*")) 580 if err != nil { 581 return err 582 } else if len(nlinks) > 0 { 583 return nil // has subdirs -- do not prune 584 } 585 if err = os.Remove(dir); err != nil { 586 return err 587 } 588 } 589 590 return nil 591 } 592 593 // ensureCacheSpaceWithLock deletes entries from the cache in arbitrary order 594 // until the cache has at least valueSize bytes available. 595 func (d *Diskv) ensureCacheSpaceWithLock(valueSize uint64) error { 596 if valueSize > d.CacheSizeMax { 597 return fmt.Errorf("value size (%d bytes) too large for cache (%d bytes)", valueSize, d.CacheSizeMax) 598 } 599 600 safe := func() bool { return (d.cacheSize + valueSize) <= d.CacheSizeMax } 601 602 for key, val := range d.cache { 603 if safe() { 604 break 605 } 606 607 d.uncacheWithLock(key, uint64(len(val))) 608 } 609 610 if !safe() { 611 panic(fmt.Sprintf("%d bytes still won't fit in the cache! (max %d bytes)", valueSize, d.CacheSizeMax)) 612 } 613 614 return nil 615 } 616 617 // nopWriteCloser wraps an io.Writer and provides a no-op Close method to 618 // satisfy the io.WriteCloser interface. 619 type nopWriteCloser struct { 620 io.Writer 621 } 622 623 func (wc *nopWriteCloser) Write(p []byte) (int, error) { return wc.Writer.Write(p) } 624 func (wc *nopWriteCloser) Close() error { return nil }