github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/third_party/labix.org/v2/mgo/gridfs.go (about) 1 // mgo - MongoDB driver for Go 2 // 3 // Copyright (c) 2010-2012 - Gustavo Niemeyer <gustavo@niemeyer.net> 4 // 5 // All rights reserved. 6 // 7 // Redistribution and use in source and binary forms, with or without 8 // modification, are permitted provided that the following conditions are met: 9 // 10 // 1. Redistributions of source code must retain the above copyright notice, this 11 // list of conditions and the following disclaimer. 12 // 2. Redistributions in binary form must reproduce the above copyright notice, 13 // this list of conditions and the following disclaimer in the documentation 14 // and/or other materials provided with the distribution. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 // ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 package mgo 28 29 import ( 30 "camlistore.org/third_party/labix.org/v2/mgo/bson" 31 "crypto/md5" 32 "encoding/hex" 33 "errors" 34 "hash" 35 "io" 36 "os" 37 "sync" 38 "time" 39 ) 40 41 type GridFS struct { 42 Files *Collection 43 Chunks *Collection 44 } 45 46 type gfsFileMode int 47 48 const ( 49 gfsClosed gfsFileMode = 0 50 gfsReading gfsFileMode = 1 51 gfsWriting gfsFileMode = 2 52 ) 53 54 type GridFile struct { 55 m sync.Mutex 56 c sync.Cond 57 gfs *GridFS 58 mode gfsFileMode 59 err error 60 61 chunk int 62 offset int64 63 64 wpending int 65 wbuf []byte 66 wsum hash.Hash 67 68 rbuf []byte 69 rcache *gfsCachedChunk 70 71 doc gfsFile 72 } 73 74 type gfsFile struct { 75 Id interface{} "_id" 76 ChunkSize int "chunkSize" 77 UploadDate time.Time "uploadDate" 78 Length int64 ",minsize" 79 MD5 string 80 Filename string ",omitempty" 81 ContentType string "contentType,omitempty" 82 Metadata *bson.Raw ",omitempty" 83 } 84 85 type gfsChunk struct { 86 Id interface{} "_id" 87 FilesId interface{} "files_id" 88 N int 89 Data []byte 90 } 91 92 type gfsCachedChunk struct { 93 wait sync.Mutex 94 n int 95 data []byte 96 err error 97 } 98 99 func newGridFS(db *Database, prefix string) *GridFS { 100 return &GridFS{db.C(prefix + ".files"), db.C(prefix + ".chunks")} 101 } 102 103 func (gfs *GridFS) newFile() *GridFile { 104 file := &GridFile{gfs: gfs} 105 file.c.L = &file.m 106 //runtime.SetFinalizer(file, finalizeFile) 107 return file 108 } 109 110 func finalizeFile(file *GridFile) { 111 file.Close() 112 } 113 114 // Create creates a new file with the provided name in the GridFS. If the file 115 // name already exists, a new version will be inserted with an up-to-date 116 // uploadDate that will cause it to be atomically visible to the Open and 117 // OpenId methods. If the file name is not important, an empty name may be 118 // provided and the file Id used instead. 119 // 120 // It's important to Close files whether they are being written to 121 // or read from, and to check the err result to ensure the operation 122 // completed successfully. 123 // 124 // A simple example inserting a new file: 125 // 126 // func check(err os.Error) { 127 // if err != nil { 128 // panic(err.String()) 129 // } 130 // } 131 // file, err := db.GridFS("fs").Create("myfile.txt") 132 // check(err) 133 // n, err := file.Write([]byte("Hello world!") 134 // check(err) 135 // err = file.Close() 136 // check(err) 137 // fmt.Printf("%d bytes written\n", n) 138 // 139 // The io.Writer interface is implemented by *GridFile and may be used to 140 // help on the file creation. For example: 141 // 142 // file, err := db.GridFS("fs").Create("myfile.txt") 143 // check(err) 144 // messages, err := os.Open("/var/log/messages") 145 // check(err) 146 // defer messages.Close() 147 // err = io.Copy(file, messages) 148 // check(err) 149 // err = file.Close() 150 // check(err) 151 // 152 func (gfs *GridFS) Create(name string) (file *GridFile, err error) { 153 file = gfs.newFile() 154 file.mode = gfsWriting 155 file.wsum = md5.New() 156 file.doc = gfsFile{Id: bson.NewObjectId(), ChunkSize: 256 * 1024, Filename: name} 157 return 158 } 159 160 // OpenId returns the file with the provided id, for reading. 161 // If the file isn't found, err will be set to mgo.ErrNotFound. 162 // 163 // It's important to Close files whether they are being written to 164 // or read from, and to check the err result to ensure the operation 165 // completed successfully. 166 // 167 // The following example will print the first 8192 bytes from the file: 168 // 169 // func check(err os.Error) { 170 // if err != nil { 171 // panic(err.String()) 172 // } 173 // } 174 // file, err := db.GridFS("fs").OpenId(objid) 175 // check(err) 176 // b := make([]byte, 8192) 177 // n, err := file.Read(b) 178 // check(err) 179 // fmt.Println(string(b)) 180 // check(err) 181 // err = file.Close() 182 // check(err) 183 // fmt.Printf("%d bytes read\n", n) 184 // 185 // The io.Reader interface is implemented by *GridFile and may be used to 186 // deal with it. As an example, the following snippet will dump the whole 187 // file into the standard output: 188 // 189 // file, err := db.GridFS("fs").OpenId(objid) 190 // check(err) 191 // err = io.Copy(os.Stdout, file) 192 // check(err) 193 // err = file.Close() 194 // check(err) 195 // 196 func (gfs *GridFS) OpenId(id interface{}) (file *GridFile, err error) { 197 var doc gfsFile 198 err = gfs.Files.Find(bson.M{"_id": id}).One(&doc) 199 if err != nil { 200 return 201 } 202 file = gfs.newFile() 203 file.mode = gfsReading 204 file.doc = doc 205 return 206 } 207 208 // Open returns the most recently uploaded file with the provided 209 // name, for reading. If the file isn't found, err will be set 210 // to mgo.ErrNotFound. 211 // 212 // It's important to Close files whether they are being written to 213 // or read from, and to check the err result to ensure the operation 214 // completed successfully. 215 // 216 // The following example will print the first 8192 bytes from the file: 217 // 218 // file, err := db.GridFS("fs").Open("myfile.txt") 219 // check(err) 220 // b := make([]byte, 8192) 221 // n, err := file.Read(b) 222 // check(err) 223 // fmt.Println(string(b)) 224 // check(err) 225 // err = file.Close() 226 // check(err) 227 // fmt.Printf("%d bytes read\n", n) 228 // 229 // The io.Reader interface is implemented by *GridFile and may be used to 230 // deal with it. As an example, the following snippet will dump the whole 231 // file into the standard output: 232 // 233 // file, err := db.GridFS("fs").Open("myfile.txt") 234 // check(err) 235 // err = io.Copy(os.Stdout, file) 236 // check(err) 237 // err = file.Close() 238 // check(err) 239 // 240 func (gfs *GridFS) Open(name string) (file *GridFile, err error) { 241 var doc gfsFile 242 err = gfs.Files.Find(bson.M{"filename": name}).Sort("-uploadDate").One(&doc) 243 if err != nil { 244 return 245 } 246 file = gfs.newFile() 247 file.mode = gfsReading 248 file.doc = doc 249 return 250 } 251 252 // OpenNext opens the next file from iter for reading, sets *file to it, 253 // and returns true on the success case. If no more documents are available 254 // on iter or an error occurred, *file is set to nil and the result is false. 255 // Errors will be available via iter.Err(). 256 // 257 // The iter parameter must be an iterator on the GridFS files collection. 258 // Using the GridFS.Find method is an easy way to obtain such an iterator, 259 // but any iterator on the collection will work. 260 // 261 // If the provided *file is non-nil, OpenNext will close it before attempting 262 // to iterate to the next element. This means that in a loop one only 263 // has to worry about closing files when breaking out of the loop early 264 // (break, return, or panic). 265 // 266 // For example: 267 // 268 // gfs := db.GridFS("fs") 269 // query := gfs.Find(nil).Sort("filename") 270 // iter := query.Iter() 271 // var f *mgo.GridFile 272 // for gfs.OpenNext(iter, &f) { 273 // fmt.Printf("Filename: %s\n", f.Name()) 274 // } 275 // if iter.Close() != nil { 276 // panic(iter.Close()) 277 // } 278 // 279 func (gfs *GridFS) OpenNext(iter *Iter, file **GridFile) bool { 280 if *file != nil { 281 // Ignoring the error here shouldn't be a big deal 282 // as we're reading the file and the loop iteration 283 // for this file is finished. 284 _ = (*file).Close() 285 } 286 var doc gfsFile 287 if !iter.Next(&doc) { 288 *file = nil 289 return false 290 } 291 f := gfs.newFile() 292 f.mode = gfsReading 293 f.doc = doc 294 *file = f 295 return true 296 } 297 298 // Find runs query on GridFS's files collection and returns 299 // the resulting Query. 300 // 301 // This logic: 302 // 303 // gfs := db.GridFS("fs") 304 // iter := gfs.Find(nil).Iter() 305 // 306 // Is equivalent to: 307 // 308 // files := db.C("fs" + ".files") 309 // iter := files.Find(nil).Iter() 310 // 311 func (gfs *GridFS) Find(query interface{}) *Query { 312 return gfs.Files.Find(query) 313 } 314 315 // RemoveId deletes the file with the provided id from the GridFS. 316 func (gfs *GridFS) RemoveId(id interface{}) error { 317 err := gfs.Files.Remove(bson.M{"_id": id}) 318 if err != nil { 319 return err 320 } 321 _, err = gfs.Chunks.RemoveAll(bson.M{"files_id": id}) 322 return err 323 } 324 325 type gfsDocId struct { 326 Id interface{} "_id" 327 } 328 329 // Remove deletes all files with the provided name from the GridFS. 330 func (gfs *GridFS) Remove(name string) (err error) { 331 iter := gfs.Files.Find(bson.M{"filename": name}).Select(bson.M{"_id": 1}).Iter() 332 var doc gfsDocId 333 for iter.Next(&doc) { 334 if e := gfs.RemoveId(doc.Id); e != nil { 335 err = e 336 } 337 } 338 if err == nil { 339 err = iter.Close() 340 } 341 return err 342 } 343 344 func (file *GridFile) assertMode(mode gfsFileMode) { 345 switch file.mode { 346 case mode: 347 return 348 case gfsWriting: 349 panic("GridFile is open for writing") 350 case gfsReading: 351 panic("GridFile is open for reading") 352 case gfsClosed: 353 panic("GridFile is closed") 354 default: 355 panic("Internal error: missing GridFile mode") 356 } 357 } 358 359 // SetChunkSize sets size of saved chunks. Once the file is written to, it 360 // will be split in blocks of that size and each block saved into an 361 // independent chunk document. The default chunk size is 256kb. 362 // 363 // It is a runtime error to call this function once the file has started 364 // being written to. 365 func (file *GridFile) SetChunkSize(bytes int) { 366 file.assertMode(gfsWriting) 367 debugf("GridFile %p: setting chunk size to %d", file, bytes) 368 file.m.Lock() 369 file.doc.ChunkSize = bytes 370 file.m.Unlock() 371 } 372 373 // Id returns the current file Id. 374 func (file *GridFile) Id() interface{} { 375 return file.doc.Id 376 } 377 378 // SetId changes the current file Id. It is a runtime 379 // 380 // It is a runtime error to call this function once the file has started 381 // being written to, or when the file is not open for writing. 382 func (file *GridFile) SetId(id interface{}) { 383 file.assertMode(gfsWriting) 384 file.m.Lock() 385 file.doc.Id = id 386 file.m.Unlock() 387 } 388 389 // Name returns the optional file name. An empty string will be returned 390 // in case it is unset. 391 func (file *GridFile) Name() string { 392 return file.doc.Filename 393 } 394 395 // SetName changes the optional file name. An empty string may be used to 396 // unset it. 397 // 398 // It is a runtime error to call this function when the file is not open 399 // for writing. 400 func (file *GridFile) SetName(name string) { 401 file.assertMode(gfsWriting) 402 file.m.Lock() 403 file.doc.Filename = name 404 file.m.Unlock() 405 } 406 407 // ContentType returns the optional file content type. An empty string will be 408 // returned in case it is unset. 409 func (file *GridFile) ContentType() string { 410 return file.doc.ContentType 411 } 412 413 // ContentType changes the optional file content type. An empty string may be 414 // used to unset it. 415 // 416 // It is a runtime error to call this function when the file is not open 417 // for writing. 418 func (file *GridFile) SetContentType(ctype string) { 419 file.assertMode(gfsWriting) 420 file.m.Lock() 421 file.doc.ContentType = ctype 422 file.m.Unlock() 423 } 424 425 // GetMeta unmarshals the optional "metadata" field associated with the 426 // file into the result parameter. The meaning of keys under that field 427 // is user-defined. For example: 428 // 429 // result := struct{ INode int }{} 430 // err = file.GetMeta(&result) 431 // if err != nil { 432 // panic(err.String()) 433 // } 434 // fmt.Printf("inode: %d\n", result.INode) 435 // 436 func (file *GridFile) GetMeta(result interface{}) (err error) { 437 file.m.Lock() 438 if file.doc.Metadata != nil { 439 err = bson.Unmarshal(file.doc.Metadata.Data, result) 440 } 441 file.m.Unlock() 442 return 443 } 444 445 // SetMeta changes the optional "metadata" field associated with the 446 // file. The meaning of keys under that field is user-defined. 447 // For example: 448 // 449 // file.SetMeta(bson.M{"inode": inode}) 450 // 451 // It is a runtime error to call this function when the file is not open 452 // for writing. 453 func (file *GridFile) SetMeta(metadata interface{}) { 454 file.assertMode(gfsWriting) 455 data, err := bson.Marshal(metadata) 456 file.m.Lock() 457 if err != nil && file.err == nil { 458 file.err = err 459 } else { 460 file.doc.Metadata = &bson.Raw{Data: data} 461 } 462 file.m.Unlock() 463 } 464 465 // Size returns the file size in bytes. 466 func (file *GridFile) Size() (bytes int64) { 467 file.m.Lock() 468 bytes = file.doc.Length 469 file.m.Unlock() 470 return 471 } 472 473 // MD5 returns the file MD5 as a hex-encoded string. 474 func (file *GridFile) MD5() (md5 string) { 475 return file.doc.MD5 476 } 477 478 // UploadDate returns the file upload time. 479 func (file *GridFile) UploadDate() time.Time { 480 return file.doc.UploadDate 481 } 482 483 // Close flushes any pending changes in case the file is being written 484 // to, waits for any background operations to finish, and closes the file. 485 // 486 // It's important to Close files whether they are being written to 487 // or read from, and to check the err result to ensure the operation 488 // completed successfully. 489 func (file *GridFile) Close() (err error) { 490 file.m.Lock() 491 defer file.m.Unlock() 492 if file.mode == gfsWriting { 493 if len(file.wbuf) > 0 { 494 file.insertChunk(file.wbuf) 495 file.wbuf = file.wbuf[0:0] 496 } 497 file.insertFile() 498 } else if file.mode == gfsReading && file.rcache != nil { 499 file.rcache.wait.Lock() 500 file.rcache = nil 501 } 502 file.mode = gfsClosed 503 debugf("GridFile %p: closed", file) 504 return file.err 505 } 506 507 // Write writes the provided data to the file and returns the 508 // number of bytes written and an error in case something 509 // wrong happened. 510 // 511 // The file will internally cache the data so that all but the last 512 // chunk sent to the database have the size defined by SetChunkSize. 513 // This also means that errors may be deferred until a future call 514 // to Write or Close. 515 // 516 // The parameters and behavior of this function turn the file 517 // into an io.Writer. 518 func (file *GridFile) Write(data []byte) (n int, err error) { 519 file.assertMode(gfsWriting) 520 file.m.Lock() 521 debugf("GridFile %p: writing %d bytes", file, len(data)) 522 defer file.m.Unlock() 523 524 if file.err != nil { 525 return 0, file.err 526 } 527 528 n = len(data) 529 file.doc.Length += int64(n) 530 chunkSize := file.doc.ChunkSize 531 532 if len(file.wbuf)+len(data) < chunkSize { 533 file.wbuf = append(file.wbuf, data...) 534 return 535 } 536 537 // First, flush file.wbuf complementing with data. 538 if len(file.wbuf) > 0 { 539 missing := chunkSize - len(file.wbuf) 540 if missing > len(data) { 541 missing = len(data) 542 } 543 file.wbuf = append(file.wbuf, data[:missing]...) 544 data = data[missing:] 545 file.insertChunk(file.wbuf) 546 file.wbuf = file.wbuf[0:0] 547 } 548 549 // Then, flush all chunks from data without copying. 550 for len(data) > chunkSize { 551 size := chunkSize 552 if size > len(data) { 553 size = len(data) 554 } 555 file.insertChunk(data[:size]) 556 data = data[size:] 557 } 558 559 // And append the rest for a future call. 560 file.wbuf = append(file.wbuf, data...) 561 562 return n, file.err 563 } 564 565 func (file *GridFile) insertChunk(data []byte) { 566 n := file.chunk 567 file.chunk++ 568 debugf("GridFile %p: adding to checksum: %q", file, string(data)) 569 file.wsum.Write(data) 570 571 for file.doc.ChunkSize*file.wpending >= 1024*1024 { 572 // Hold on.. we got a MB pending. 573 file.c.Wait() 574 if file.err != nil { 575 return 576 } 577 } 578 579 file.wpending++ 580 581 debugf("GridFile %p: inserting chunk %d with %d bytes", file, n, len(data)) 582 583 // We may not own the memory of data, so rather than 584 // simply copying it, we'll marshal the document ahead of time. 585 data, err := bson.Marshal(gfsChunk{bson.NewObjectId(), file.doc.Id, n, data}) 586 if err != nil { 587 file.err = err 588 return 589 } 590 591 go func() { 592 err := file.gfs.Chunks.Insert(bson.Raw{Data: data}) 593 file.m.Lock() 594 file.wpending-- 595 if err != nil && file.err == nil { 596 file.err = err 597 } 598 file.c.Broadcast() 599 file.m.Unlock() 600 }() 601 } 602 603 func (file *GridFile) insertFile() { 604 hexsum := hex.EncodeToString(file.wsum.Sum(nil)) 605 for file.wpending > 0 { 606 debugf("GridFile %p: waiting for %d pending chunks to insert file", file, file.wpending) 607 file.c.Wait() 608 } 609 if file.err == nil { 610 file.doc.UploadDate = bson.Now() 611 file.doc.MD5 = hexsum 612 file.err = file.gfs.Files.Insert(file.doc) 613 file.gfs.Chunks.EnsureIndexKey("files_id", "n") 614 } 615 } 616 617 // Seek sets the offset for the next Read or Write on file to 618 // offset, interpreted according to whence: 0 means relative to 619 // the origin of the file, 1 means relative to the current offset, 620 // and 2 means relative to the end. It returns the new offset and 621 // an Error, if any. 622 func (file *GridFile) Seek(offset int64, whence int) (pos int64, err error) { 623 file.m.Lock() 624 debugf("GridFile %p: seeking for %s (whence=%d)", file, offset, whence) 625 defer file.m.Unlock() 626 switch whence { 627 case os.SEEK_SET: 628 case os.SEEK_CUR: 629 offset += file.offset 630 case os.SEEK_END: 631 offset += file.doc.Length 632 default: 633 panic("Unsupported whence value") 634 } 635 if offset > file.doc.Length { 636 return file.offset, errors.New("Seek past end of file") 637 } 638 chunk := int(offset / int64(file.doc.ChunkSize)) 639 if chunk+1 == file.chunk && offset >= file.offset { 640 file.rbuf = file.rbuf[int(offset-file.offset):] 641 file.offset = offset 642 return file.offset, nil 643 } 644 file.offset = offset 645 file.chunk = chunk 646 file.rbuf = nil 647 file.rbuf, err = file.getChunk() 648 if err == nil { 649 file.rbuf = file.rbuf[int(file.offset-int64(chunk)*int64(file.doc.ChunkSize)):] 650 } 651 return file.offset, err 652 } 653 654 // Read reads into b the next available data from the file and 655 // returns the number of bytes written and an error in case 656 // something wrong happened. At the end of the file, n will 657 // be zero and err will be set to os.EOF. 658 // 659 // The parameters and behavior of this function turn the file 660 // into an io.Reader. 661 func (file *GridFile) Read(b []byte) (n int, err error) { 662 file.assertMode(gfsReading) 663 file.m.Lock() 664 debugf("GridFile %p: reading at offset %d into buffer of length %d", file, file.offset, len(b)) 665 defer file.m.Unlock() 666 if file.offset == file.doc.Length { 667 return 0, io.EOF 668 } 669 for err == nil { 670 i := copy(b, file.rbuf) 671 n += i 672 file.offset += int64(i) 673 file.rbuf = file.rbuf[i:] 674 if i == len(b) || file.offset == file.doc.Length { 675 break 676 } 677 b = b[i:] 678 file.rbuf, err = file.getChunk() 679 } 680 return n, err 681 } 682 683 func (file *GridFile) getChunk() (data []byte, err error) { 684 cache := file.rcache 685 file.rcache = nil 686 if cache != nil && cache.n == file.chunk { 687 debugf("GridFile %p: Getting chunk %d from cache", file, file.chunk) 688 cache.wait.Lock() 689 data, err = cache.data, cache.err 690 } else { 691 debugf("GridFile %p: Fetching chunk %d", file, file.chunk) 692 var doc gfsChunk 693 err = file.gfs.Chunks.Find(bson.D{{"files_id", file.doc.Id}, {"n", file.chunk}}).One(&doc) 694 data = doc.Data 695 } 696 file.chunk++ 697 if int64(file.chunk)*int64(file.doc.ChunkSize) < file.doc.Length { 698 // Read the next one in background. 699 cache = &gfsCachedChunk{n: file.chunk} 700 cache.wait.Lock() 701 debugf("GridFile %p: Scheduling chunk %d for background caching", file, file.chunk) 702 // Clone the session to avoid having it closed in between. 703 chunks := file.gfs.Chunks 704 session := chunks.Database.Session.Clone() 705 go func(id interface{}, n int) { 706 defer session.Close() 707 chunks = chunks.With(session) 708 var doc gfsChunk 709 cache.err = chunks.Find(bson.D{{"files_id", id}, {"n", n}}).One(&doc) 710 cache.data = doc.Data 711 cache.wait.Unlock() 712 }(file.doc.Id, file.chunk) 713 file.rcache = cache 714 } 715 debugf("Returning err: %#v", err) 716 return 717 }