github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/blobserver/diskpacked/diskpacked.go (about) 1 /* 2 Copyright 2013 Google Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 /* 18 Package diskpacked registers the "diskpacked" blobserver storage type, 19 storing blobs in sequence of monolithic data files indexed by a kvfile index. 20 21 Example low-level config: 22 23 "/storage/": { 24 "handler": "storage-diskpacked", 25 "handlerArgs": { 26 "path": "/var/camlistore/blobs" 27 } 28 }, 29 30 */ 31 package diskpacked 32 33 import ( 34 "bytes" 35 "errors" 36 "expvar" 37 "fmt" 38 "io" 39 "log" 40 "os" 41 "path/filepath" 42 "strings" 43 "sync" 44 45 "camlistore.org/pkg/blob" 46 "camlistore.org/pkg/blobserver" 47 "camlistore.org/pkg/blobserver/local" 48 "camlistore.org/pkg/context" 49 "camlistore.org/pkg/jsonconfig" 50 "camlistore.org/pkg/sorted" 51 "camlistore.org/pkg/sorted/kvfile" 52 "camlistore.org/pkg/syncutil" 53 "camlistore.org/pkg/types" 54 "camlistore.org/third_party/github.com/camlistore/lock" 55 ) 56 57 // TODO(wathiede): replace with glog.V(2) when we decide our logging story. 58 type debugT bool 59 60 var debug = debugT(false) 61 62 func (d debugT) Printf(format string, args ...interface{}) { 63 if bool(d) { 64 log.Printf(format, args...) 65 } 66 } 67 68 func (d debugT) Println(args ...interface{}) { 69 if bool(d) { 70 log.Println(args...) 71 } 72 } 73 74 const defaultMaxFileSize = 512 << 20 // 512MB 75 76 type storage struct { 77 root string 78 index sorted.KeyValue 79 maxFileSize int64 80 81 writeLock io.Closer // Provided by lock.Lock, and guards other processes from accesing the file open for writes. 82 83 mu sync.Mutex // Guards all I/O state. 84 closed bool 85 writer *os.File 86 fds []*os.File 87 size int64 88 89 *local.Generationer 90 } 91 92 func (s *storage) String() string { 93 return fmt.Sprintf("\"diskpacked\" blob packs at %s", s.root) 94 } 95 96 var ( 97 readVar = expvar.NewMap("diskpacked-read-bytes") 98 readTotVar = expvar.NewMap("diskpacked-total-read-bytes") 99 openFdsVar = expvar.NewMap("diskpacked-open-fds") 100 writeVar = expvar.NewMap("diskpacked-write-bytes") 101 writeTotVar = expvar.NewMap("diskpacked-total-write-bytes") 102 ) 103 104 const indexKV = "index.kv" 105 106 // IsDir reports whether dir is a diskpacked directory. 107 func IsDir(dir string) (bool, error) { 108 _, err := os.Stat(filepath.Join(dir, indexKV)) 109 if os.IsNotExist(err) { 110 return false, nil 111 } 112 return err == nil, err 113 } 114 115 // New returns a diskpacked storage implementation, adding blobs to 116 // the provided directory. It doesn't delete any existing blob pack 117 // files. 118 func New(dir string) (blobserver.Storage, error) { 119 var maxSize int64 120 if ok, _ := IsDir(dir); ok { 121 // TODO: detect existing max size from size of files, if obvious, 122 // and set maxSize to that? 123 } 124 return newStorage(dir, maxSize, nil) 125 } 126 127 // newStorage returns a new storage in path root with the given maxFileSize, 128 // or defaultMaxFileSize (512MB) if <= 0 129 func newStorage(root string, maxFileSize int64, indexConf jsonconfig.Obj) (s *storage, err error) { 130 fi, err := os.Stat(root) 131 if os.IsNotExist(err) { 132 return nil, fmt.Errorf("storage root %q doesn't exist", root) 133 } 134 if err != nil { 135 return nil, fmt.Errorf("Failed to stat directory %q: %v", root, err) 136 } 137 if !fi.IsDir() { 138 return nil, fmt.Errorf("storage root %q exists but is not a directory.", root) 139 } 140 var index sorted.KeyValue 141 if len(indexConf) > 0 { 142 index, err = sorted.NewKeyValue(indexConf) 143 } else { 144 index, err = kvfile.NewStorage(filepath.Join(root, indexKV)) 145 } 146 if err != nil { 147 return nil, err 148 } 149 defer func() { 150 if err != nil { 151 index.Close() 152 } 153 }() 154 if maxFileSize <= 0 { 155 maxFileSize = defaultMaxFileSize 156 } 157 // Be consistent with trailing slashes. Makes expvar stats for total 158 // reads/writes consistent across diskpacked targets, regardless of what 159 // people put in their low level config. 160 root = strings.TrimRight(root, `\/`) 161 s = &storage{ 162 root: root, 163 index: index, 164 maxFileSize: maxFileSize, 165 Generationer: local.NewGenerationer(root), 166 } 167 s.mu.Lock() 168 defer s.mu.Unlock() 169 if err := s.openAllPacks(); err != nil { 170 return nil, err 171 } 172 if _, _, err := s.StorageGeneration(); err != nil { 173 return nil, fmt.Errorf("Error initialization generation for %q: %v", root, err) 174 } 175 return s, nil 176 } 177 178 func newFromConfig(_ blobserver.Loader, config jsonconfig.Obj) (storage blobserver.Storage, err error) { 179 var ( 180 path = config.RequiredString("path") 181 maxFileSize = config.OptionalInt("maxFileSize", 0) 182 indexConf = config.OptionalObject("metaIndex") 183 ) 184 if err := config.Validate(); err != nil { 185 return nil, err 186 } 187 return newStorage(path, int64(maxFileSize), indexConf) 188 } 189 190 func init() { 191 blobserver.RegisterStorageConstructor("diskpacked", blobserver.StorageConstructor(newFromConfig)) 192 } 193 194 // openForRead will open pack file n for read and keep a handle to it in 195 // s.fds. os.IsNotExist returned if n >= the number of pack files in s.root. 196 // This function is not thread safe, s.mu should be locked by the caller. 197 func (s *storage) openForRead(n int) error { 198 if n > len(s.fds) { 199 panic(fmt.Sprintf("openForRead called out of order got %d, expected %d", n, len(s.fds))) 200 } 201 202 fn := s.filename(n) 203 f, err := os.Open(fn) 204 if err != nil { 205 return err 206 } 207 openFdsVar.Add(s.root, 1) 208 debug.Printf("diskpacked: opened for read %q", fn) 209 s.fds = append(s.fds, f) 210 return nil 211 } 212 213 // openForWrite will create or open pack file n for writes, create a lock 214 // visible external to the process and seek to the end of the file ready for 215 // appending new data. 216 // This function is not thread safe, s.mu should be locked by the caller. 217 func (s *storage) openForWrite(n int) error { 218 fn := s.filename(n) 219 l, err := lock.Lock(fn + ".lock") 220 if err != nil { 221 return err 222 } 223 f, err := os.OpenFile(fn, os.O_RDWR|os.O_CREATE, 0666) 224 if err != nil { 225 l.Close() 226 return err 227 } 228 openFdsVar.Add(s.root, 1) 229 debug.Printf("diskpacked: opened for write %q", fn) 230 231 s.size, err = f.Seek(0, os.SEEK_END) 232 if err != nil { 233 return err 234 } 235 236 s.writer = f 237 s.writeLock = l 238 return nil 239 } 240 241 // nextPack will close the current writer and release its lock if open, 242 // open the next pack file in sequence for writing, grab its lock, set it 243 // to the currently active writer, and open another copy for read-only use. 244 // This function is not thread safe, s.mu should be locked by the caller. 245 func (s *storage) nextPack() error { 246 debug.Println("diskpacked: nextPack") 247 s.size = 0 248 if s.writeLock != nil { 249 err := s.writeLock.Close() 250 if err != nil { 251 return err 252 } 253 s.writeLock = nil 254 } 255 if s.writer != nil { 256 if err := s.writer.Close(); err != nil { 257 return err 258 } 259 openFdsVar.Add(s.root, -1) 260 } 261 262 n := len(s.fds) 263 if err := s.openForWrite(n); err != nil { 264 return err 265 } 266 return s.openForRead(n) 267 } 268 269 // openAllPacks opens read-only each pack file in s.root, populating s.fds. 270 // The latest pack file will also have a writable handle opened. 271 // This function is not thread safe, s.mu should be locked by the caller. 272 func (s *storage) openAllPacks() error { 273 debug.Println("diskpacked: openAllPacks") 274 n := 0 275 for { 276 err := s.openForRead(n) 277 if os.IsNotExist(err) { 278 break 279 } 280 if err != nil { 281 s.Close() 282 return err 283 } 284 n++ 285 } 286 287 if n == 0 { 288 // If no pack files are found, we create one open for read and write. 289 return s.nextPack() 290 } 291 292 // If 1 or more pack files are found, open the last one read and write. 293 return s.openForWrite(n - 1) 294 } 295 296 func (s *storage) Close() error { 297 s.mu.Lock() 298 defer s.mu.Unlock() 299 var closeErr error 300 if !s.closed { 301 s.closed = true 302 if err := s.index.Close(); err != nil { 303 log.Println("diskpacked: closing index:", err) 304 } 305 for _, f := range s.fds { 306 if err := f.Close(); err != nil { 307 closeErr = err 308 } 309 openFdsVar.Add(s.root, -1) 310 } 311 s.writer = nil 312 if l := s.writeLock; l != nil { 313 err := l.Close() 314 if closeErr == nil { 315 closeErr = err 316 } 317 s.writeLock = nil 318 } 319 } 320 return closeErr 321 } 322 323 func (s *storage) Fetch(br blob.Ref) (io.ReadCloser, uint32, error) { 324 meta, err := s.meta(br) 325 if err != nil { 326 return nil, 0, err 327 } 328 329 if meta.file >= len(s.fds) { 330 return nil, 0, fmt.Errorf("diskpacked: attempt to fetch blob from out of range pack file %d > %d", meta.file, len(s.fds)) 331 } 332 rac := s.fds[meta.file] 333 var rs io.ReadSeeker = io.NewSectionReader(rac, meta.offset, int64(meta.size)) 334 fn := rac.Name() 335 // Ensure entry is in map. 336 readVar.Add(fn, 0) 337 if v, ok := readVar.Get(fn).(*expvar.Int); ok { 338 rs = types.NewStatsReadSeeker(v, rs) 339 } 340 readTotVar.Add(s.root, 0) 341 if v, ok := readTotVar.Get(s.root).(*expvar.Int); ok { 342 rs = types.NewStatsReadSeeker(v, rs) 343 } 344 rsc := struct { 345 io.ReadSeeker 346 io.Closer 347 }{ 348 rs, 349 types.NopCloser, 350 } 351 return rsc, meta.size, nil 352 } 353 354 func (s *storage) filename(file int) string { 355 return filepath.Join(s.root, fmt.Sprintf("pack-%05d.blobs", file)) 356 } 357 358 var removeGate = syncutil.NewGate(20) // arbitrary 359 360 // RemoveBlobs removes the blobs from index and pads data with zero bytes 361 func (s *storage) RemoveBlobs(blobs []blob.Ref) error { 362 batch := s.index.BeginBatch() 363 var wg syncutil.Group 364 for _, br := range blobs { 365 br := br 366 removeGate.Start() 367 batch.Delete(br.String()) 368 wg.Go(func() error { 369 defer removeGate.Done() 370 if err := s.delete(br); err != nil { 371 return err 372 } 373 return nil 374 }) 375 } 376 err1 := wg.Err() 377 err2 := s.index.CommitBatch(batch) 378 if err1 != nil { 379 return err1 380 } 381 return err2 382 } 383 384 var statGate = syncutil.NewGate(20) // arbitrary 385 386 func (s *storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) (err error) { 387 var wg syncutil.Group 388 389 for _, br := range blobs { 390 br := br 391 statGate.Start() 392 wg.Go(func() error { 393 defer statGate.Done() 394 395 m, err := s.meta(br) 396 if err == nil { 397 dest <- m.SizedRef(br) 398 return nil 399 } 400 if err == os.ErrNotExist { 401 return nil 402 } 403 return err 404 }) 405 } 406 return wg.Err() 407 } 408 409 func (s *storage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) (err error) { 410 defer close(dest) 411 412 t := s.index.Find(after, "") 413 defer func() { 414 closeErr := t.Close() 415 if err == nil { 416 err = closeErr 417 } 418 }() 419 for i := 0; i < limit && t.Next(); { 420 key := t.Key() 421 if key <= after { 422 // EnumerateBlobs' semantics are '>', but sorted.KeyValue.Find is '>='. 423 continue 424 } 425 br, ok := blob.Parse(key) 426 if !ok { 427 return fmt.Errorf("diskpacked: couldn't parse index key %q", key) 428 } 429 m, ok := parseBlobMeta(t.Value()) 430 if !ok { 431 return fmt.Errorf("diskpacked: couldn't parse index value %q: %q", key, t.Value()) 432 } 433 select { 434 case dest <- m.SizedRef(br): 435 case <-ctx.Done(): 436 return context.ErrCanceled 437 } 438 i++ 439 } 440 return nil 441 } 442 443 func (s *storage) ReceiveBlob(br blob.Ref, source io.Reader) (sbr blob.SizedRef, err error) { 444 var b bytes.Buffer 445 n, err := b.ReadFrom(source) 446 if err != nil { 447 return 448 } 449 450 sbr = blob.SizedRef{Ref: br, Size: uint32(n)} 451 452 // Check if it's a dup. Still accept it if the pack file on disk seems to be corrupt 453 // or truncated. 454 if m, err := s.meta(br); err == nil { 455 fi, err := os.Stat(s.filename(m.file)) 456 if err == nil && fi.Size() >= m.offset+int64(m.size) { 457 return sbr, nil 458 } 459 } 460 461 err = s.append(sbr, &b) 462 return 463 } 464 465 // append writes the provided blob to the current data file. 466 func (s *storage) append(br blob.SizedRef, r io.Reader) error { 467 s.mu.Lock() 468 defer s.mu.Unlock() 469 if s.closed { 470 return errors.New("diskpacked: write to closed storage") 471 } 472 473 fn := s.writer.Name() 474 n, err := fmt.Fprintf(s.writer, "[%v %v]", br.Ref.String(), br.Size) 475 s.size += int64(n) 476 writeVar.Add(fn, int64(n)) 477 writeTotVar.Add(s.root, int64(n)) 478 if err != nil { 479 return err 480 } 481 482 // TODO(adg): remove this seek and the offset check once confident 483 offset, err := s.writer.Seek(0, os.SEEK_CUR) 484 if err != nil { 485 return err 486 } 487 if offset != s.size { 488 return fmt.Errorf("diskpacked: seek says offset = %d, we think %d", 489 offset, s.size) 490 } 491 offset = s.size // make this a declaration once the above is removed 492 493 n2, err := io.Copy(s.writer, r) 494 s.size += n2 495 writeVar.Add(fn, int64(n)) 496 writeTotVar.Add(s.root, int64(n)) 497 if err != nil { 498 return err 499 } 500 if n2 != int64(br.Size) { 501 return fmt.Errorf("diskpacked: written blob size %d didn't match size %d", n, br.Size) 502 } 503 if err = s.writer.Sync(); err != nil { 504 return err 505 } 506 507 packIdx := len(s.fds) - 1 508 if s.size > s.maxFileSize { 509 if err := s.nextPack(); err != nil { 510 return err 511 } 512 } 513 return s.index.Set(br.Ref.String(), blobMeta{packIdx, offset, br.Size}.String()) 514 } 515 516 // meta fetches the metadata for the specified blob from the index. 517 func (s *storage) meta(br blob.Ref) (m blobMeta, err error) { 518 ms, err := s.index.Get(br.String()) 519 if err != nil { 520 if err == sorted.ErrNotFound { 521 err = os.ErrNotExist 522 } 523 return 524 } 525 m, ok := parseBlobMeta(ms) 526 if !ok { 527 err = fmt.Errorf("diskpacked: bad blob metadata: %q", ms) 528 } 529 return 530 } 531 532 // blobMeta is the blob metadata stored in the index. 533 type blobMeta struct { 534 file int 535 offset int64 536 size uint32 537 } 538 539 func parseBlobMeta(s string) (m blobMeta, ok bool) { 540 n, err := fmt.Sscan(s, &m.file, &m.offset, &m.size) 541 return m, n == 3 && err == nil 542 } 543 544 func (m blobMeta) String() string { 545 return fmt.Sprintf("%v %v %v", m.file, m.offset, m.size) 546 } 547 548 func (m blobMeta) SizedRef(br blob.Ref) blob.SizedRef { 549 return blob.SizedRef{Ref: br, Size: m.size} 550 }