github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/pkg/blobserver/diskpacked/diskpacked.go (about) 1 /* 2 Copyright 2013 Google Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 /* 18 Package diskpacked registers the "diskpacked" blobserver storage type, 19 storing blobs in sequence of monolithic data files indexed by a kvfile index. 20 21 Example low-level config: 22 23 "/storage/": { 24 "handler": "storage-diskpacked", 25 "handlerArgs": { 26 "path": "/var/camlistore/blobs" 27 } 28 }, 29 30 */ 31 package diskpacked 32 33 import ( 34 "bytes" 35 "errors" 36 "expvar" 37 "fmt" 38 "io" 39 "log" 40 "os" 41 "path/filepath" 42 "strings" 43 "sync" 44 45 "camlistore.org/pkg/blob" 46 "camlistore.org/pkg/blobserver" 47 "camlistore.org/pkg/blobserver/local" 48 "camlistore.org/pkg/context" 49 "camlistore.org/pkg/jsonconfig" 50 "camlistore.org/pkg/sorted" 51 "camlistore.org/pkg/sorted/kvfile" 52 "camlistore.org/pkg/syncutil" 53 "camlistore.org/pkg/types" 54 "camlistore.org/third_party/github.com/camlistore/lock" 55 ) 56 57 // TODO(wathiede): replace with glog.V(2) when we decide our logging story. 58 type debugT bool 59 60 var debug = debugT(false) 61 62 func (d debugT) Printf(format string, args ...interface{}) { 63 if bool(d) { 64 log.Printf(format, args...) 65 } 66 } 67 68 func (d debugT) Println(args ...interface{}) { 69 if bool(d) { 70 log.Println(args...) 71 } 72 } 73 74 const defaultMaxFileSize = 512 << 20 // 512MB 75 76 type storage struct { 77 root string 78 index sorted.KeyValue 79 maxFileSize int64 80 81 writeLock io.Closer // Provided by lock.Lock, and guards other processes from accesing the file open for writes. 82 83 mu sync.Mutex // Guards all I/O state. 84 closed bool 85 writer *os.File 86 fds []*os.File 87 size int64 88 89 *local.Generationer 90 } 91 92 var ( 93 readVar = expvar.NewMap("diskpacked-read-bytes") 94 readTotVar = expvar.NewMap("diskpacked-total-read-bytes") 95 openFdsVar = expvar.NewMap("diskpacked-open-fds") 96 writeVar = expvar.NewMap("diskpacked-write-bytes") 97 writeTotVar = expvar.NewMap("diskpacked-total-write-bytes") 98 ) 99 100 // newStorage returns a new storage in path root with the given maxFileSize, 101 // or defaultMaxFileSize (512MB) if <= 0 102 func newStorage(root string, maxFileSize int64) (s *storage, err error) { 103 fi, err := os.Stat(root) 104 if os.IsNotExist(err) { 105 return nil, fmt.Errorf("storage root %q doesn't exist", root) 106 } 107 if err != nil { 108 return nil, fmt.Errorf("Failed to stat directory %q: %v", root, err) 109 } 110 if !fi.IsDir() { 111 return nil, fmt.Errorf("storage root %q exists but is not a directory.", root) 112 } 113 index, err := kvfile.NewStorage(filepath.Join(root, "index.kv")) 114 if err != nil { 115 return nil, err 116 } 117 defer func() { 118 if err != nil { 119 index.Close() 120 } 121 }() 122 if maxFileSize <= 0 { 123 maxFileSize = defaultMaxFileSize 124 } 125 // Be consistent with trailing slashes. Makes expvar stats for total 126 // reads/writes consistent across diskpacked targets, regardless of what 127 // people put in their low level config. 128 root = strings.TrimRight(root, `\/`) 129 s = &storage{ 130 root: root, 131 index: index, 132 maxFileSize: maxFileSize, 133 Generationer: local.NewGenerationer(root), 134 } 135 s.mu.Lock() 136 defer s.mu.Unlock() 137 if err := s.openAllPacks(); err != nil { 138 return nil, err 139 } 140 if _, _, err := s.StorageGeneration(); err != nil { 141 return nil, fmt.Errorf("Error initialization generation for %q: %v", root, err) 142 } 143 return s, nil 144 } 145 146 func newFromConfig(_ blobserver.Loader, config jsonconfig.Obj) (storage blobserver.Storage, err error) { 147 path := config.RequiredString("path") 148 maxFileSize := config.OptionalInt("maxFileSize", 0) 149 if err := config.Validate(); err != nil { 150 return nil, err 151 } 152 return newStorage(path, int64(maxFileSize)) 153 } 154 155 func init() { 156 blobserver.RegisterStorageConstructor("diskpacked", blobserver.StorageConstructor(newFromConfig)) 157 } 158 159 // openForRead will open pack file n for read and keep a handle to it in 160 // s.fds. os.IsNotExist returned if n >= the number of pack files in s.root. 161 // This function is not thread safe, s.mu should be locked by the caller. 162 func (s *storage) openForRead(n int) error { 163 if n > len(s.fds) { 164 panic(fmt.Sprintf("openForRead called out of order got %d, expected %d", n, len(s.fds))) 165 } 166 167 fn := s.filename(n) 168 f, err := os.Open(fn) 169 if err != nil { 170 return err 171 } 172 openFdsVar.Add(s.root, 1) 173 debug.Printf("diskpacked: opened for read %q", fn) 174 s.fds = append(s.fds, f) 175 return nil 176 } 177 178 // openForWrite will create or open pack file n for writes, create a lock 179 // visible external to the process and seek to the end of the file ready for 180 // appending new data. 181 // This function is not thread safe, s.mu should be locked by the caller. 182 func (s *storage) openForWrite(n int) error { 183 fn := s.filename(n) 184 l, err := lock.Lock(fn + ".lock") 185 if err != nil { 186 return err 187 } 188 f, err := os.OpenFile(fn, os.O_RDWR|os.O_CREATE, 0666) 189 if err != nil { 190 l.Close() 191 return err 192 } 193 openFdsVar.Add(s.root, 1) 194 debug.Printf("diskpacked: opened for write %q", fn) 195 196 s.size, err = f.Seek(0, os.SEEK_END) 197 if err != nil { 198 return err 199 } 200 201 s.writer = f 202 s.writeLock = l 203 return nil 204 } 205 206 // nextPack will close the current writer and release its lock if open, 207 // open the next pack file in sequence for writing, grab its lock, set it 208 // to the currently active writer, and open another copy for read-only use. 209 // This function is not thread safe, s.mu should be locked by the caller. 210 func (s *storage) nextPack() error { 211 debug.Println("diskpacked: nextPack") 212 s.size = 0 213 if s.writeLock != nil { 214 err := s.writeLock.Close() 215 if err != nil { 216 return err 217 } 218 s.writeLock = nil 219 } 220 if s.writer != nil { 221 if err := s.writer.Close(); err != nil { 222 return err 223 } 224 openFdsVar.Add(s.root, -1) 225 } 226 227 n := len(s.fds) 228 if err := s.openForWrite(n); err != nil { 229 return err 230 } 231 return s.openForRead(n) 232 } 233 234 // openAllPacks opens read-only each pack file in s.root, populating s.fds. 235 // The latest pack file will also have a writable handle opened. 236 // This function is not thread safe, s.mu should be locked by the caller. 237 func (s *storage) openAllPacks() error { 238 debug.Println("diskpacked: openAllPacks") 239 n := 0 240 for { 241 err := s.openForRead(n) 242 if os.IsNotExist(err) { 243 break 244 } 245 if err != nil { 246 s.Close() 247 return err 248 } 249 n++ 250 } 251 252 if n == 0 { 253 // If no pack files are found, we create one open for read and write. 254 return s.nextPack() 255 } 256 257 // If 1 or more pack files are found, open the last one read and write. 258 return s.openForWrite(n - 1) 259 } 260 261 func (s *storage) Close() error { 262 s.mu.Lock() 263 defer s.mu.Unlock() 264 var closeErr error 265 if !s.closed { 266 s.closed = true 267 if err := s.index.Close(); err != nil { 268 log.Println("diskpacked: closing index:", err) 269 } 270 for _, f := range s.fds { 271 if err := f.Close(); err != nil { 272 closeErr = err 273 } 274 openFdsVar.Add(s.root, -1) 275 } 276 s.writer = nil 277 if l := s.writeLock; l != nil { 278 err := l.Close() 279 if closeErr == nil { 280 closeErr = err 281 } 282 s.writeLock = nil 283 } 284 } 285 return closeErr 286 } 287 288 func (s *storage) FetchStreaming(br blob.Ref) (io.ReadCloser, int64, error) { 289 return s.Fetch(br) 290 } 291 292 func (s *storage) Fetch(br blob.Ref) (types.ReadSeekCloser, int64, error) { 293 meta, err := s.meta(br) 294 if err != nil { 295 return nil, 0, err 296 } 297 298 if meta.file >= len(s.fds) { 299 return nil, 0, fmt.Errorf("diskpacked: attempt to fetch blob from out of range pack file %d > %d", meta.file, len(s.fds)) 300 } 301 rac := s.fds[meta.file] 302 var rs io.ReadSeeker = io.NewSectionReader(rac, meta.offset, meta.size) 303 fn := rac.Name() 304 // Ensure entry is in map. 305 readVar.Add(fn, 0) 306 if v, ok := readVar.Get(fn).(*expvar.Int); ok { 307 rs = types.NewStatsReadSeeker(v, rs) 308 } 309 readTotVar.Add(s.root, 0) 310 if v, ok := readTotVar.Get(s.root).(*expvar.Int); ok { 311 rs = types.NewStatsReadSeeker(v, rs) 312 } 313 rsc := struct { 314 io.ReadSeeker 315 io.Closer 316 }{ 317 rs, 318 types.NopCloser, 319 } 320 return rsc, meta.size, nil 321 } 322 323 func (s *storage) filename(file int) string { 324 return filepath.Join(s.root, fmt.Sprintf("pack-%05d.blobs", file)) 325 } 326 327 func (s *storage) RemoveBlobs(blobs []blob.Ref) error { 328 // TODO(adg): remove blob from index and pad data with spaces 329 return blobserver.ErrNotImplemented 330 } 331 332 var statGate = syncutil.NewGate(20) // arbitrary 333 334 func (s *storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) (err error) { 335 var wg syncutil.Group 336 337 for _, br := range blobs { 338 br := br 339 statGate.Start() 340 wg.Go(func() error { 341 defer statGate.Done() 342 343 m, err := s.meta(br) 344 if err == nil { 345 dest <- m.SizedRef(br) 346 return nil 347 } 348 if err == os.ErrNotExist { 349 return nil 350 } 351 return err 352 }) 353 } 354 return wg.Err() 355 } 356 357 func (s *storage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) (err error) { 358 defer close(dest) 359 360 t := s.index.Find(after, "") 361 defer func() { 362 closeErr := t.Close() 363 if err == nil { 364 err = closeErr 365 } 366 }() 367 for i := 0; i < limit && t.Next(); { 368 key := t.Key() 369 if key <= after { 370 // EnumerateBlobs' semantics are '>', but sorted.KeyValue.Find is '>='. 371 continue 372 } 373 br, ok := blob.Parse(key) 374 if !ok { 375 return fmt.Errorf("diskpacked: couldn't parse index key %q", key) 376 } 377 m, ok := parseBlobMeta(t.Value()) 378 if !ok { 379 return fmt.Errorf("diskpacked: couldn't parse index value %q: %q", key, t.Value()) 380 } 381 select { 382 case dest <- m.SizedRef(br): 383 case <-ctx.Done(): 384 return context.ErrCanceled 385 } 386 i++ 387 } 388 return nil 389 } 390 391 func (s *storage) ReceiveBlob(br blob.Ref, source io.Reader) (sbr blob.SizedRef, err error) { 392 var b bytes.Buffer 393 n, err := b.ReadFrom(source) 394 if err != nil { 395 return 396 } 397 sbr = blob.SizedRef{Ref: br, Size: n} 398 err = s.append(sbr, &b) 399 return 400 } 401 402 // append writes the provided blob to the current data file. 403 func (s *storage) append(br blob.SizedRef, r io.Reader) error { 404 s.mu.Lock() 405 defer s.mu.Unlock() 406 if s.closed { 407 return errors.New("diskpacked: write to closed storage") 408 } 409 410 fn := s.writer.Name() 411 n, err := fmt.Fprintf(s.writer, "[%v %v]", br.Ref.String(), br.Size) 412 s.size += int64(n) 413 writeVar.Add(fn, int64(n)) 414 writeTotVar.Add(s.root, int64(n)) 415 if err != nil { 416 return err 417 } 418 419 // TODO(adg): remove this seek and the offset check once confident 420 offset, err := s.writer.Seek(0, os.SEEK_CUR) 421 if err != nil { 422 return err 423 } 424 if offset != s.size { 425 return fmt.Errorf("diskpacked: seek says offset = %d, we think %d", 426 offset, s.size) 427 } 428 offset = s.size // make this a declaration once the above is removed 429 430 n2, err := io.Copy(s.writer, r) 431 s.size += n2 432 writeVar.Add(fn, int64(n)) 433 writeTotVar.Add(s.root, int64(n)) 434 if err != nil { 435 return err 436 } 437 if n2 != br.Size { 438 return fmt.Errorf("diskpacked: written blob size %d didn't match size %d", n, br.Size) 439 } 440 if err = s.writer.Sync(); err != nil { 441 return err 442 } 443 444 packIdx := len(s.fds) - 1 445 if s.size > s.maxFileSize { 446 if err := s.nextPack(); err != nil { 447 return err 448 } 449 } 450 return s.index.Set(br.Ref.String(), blobMeta{packIdx, offset, br.Size}.String()) 451 } 452 453 // meta fetches the metadata for the specified blob from the index. 454 func (s *storage) meta(br blob.Ref) (m blobMeta, err error) { 455 ms, err := s.index.Get(br.String()) 456 if err != nil { 457 if err == sorted.ErrNotFound { 458 err = os.ErrNotExist 459 } 460 return 461 } 462 m, ok := parseBlobMeta(ms) 463 if !ok { 464 err = fmt.Errorf("diskpacked: bad blob metadata: %q", ms) 465 } 466 return 467 } 468 469 // blobMeta is the blob metadata stored in the index. 470 type blobMeta struct { 471 file int 472 offset, size int64 473 } 474 475 func parseBlobMeta(s string) (m blobMeta, ok bool) { 476 n, err := fmt.Sscan(s, &m.file, &m.offset, &m.size) 477 return m, n == 3 && err == nil 478 } 479 480 func (m blobMeta) String() string { 481 return fmt.Sprintf("%v %v %v", m.file, m.offset, m.size) 482 } 483 484 func (m blobMeta) SizedRef(br blob.Ref) blob.SizedRef { 485 return blob.SizedRef{Ref: br, Size: m.size} 486 }