github.com/atlassian/git-lob@v0.0.0-20150806085256-2386a5ed291a/core/storage.go (about) 1 package core 2 3 import ( 4 "bytes" 5 "crypto/sha1" 6 "encoding/json" 7 "errors" 8 "fmt" 9 "hash" 10 "io" 11 "io/ioutil" 12 "os" 13 "path/filepath" 14 15 "github.com/atlassian/git-lob/Godeps/_workspace/src/github.com/cloudflare/bm" 16 "github.com/atlassian/git-lob/util" 17 ) 18 19 const BUFSIZE = 131072 20 21 // Chunk size that we split stored data into so it's easier to resume uploads/downloads 22 // This used to be configurable, but it caused too many issues if different people had different 23 // settings in a shared repository 24 // This is only 'var' rather than 'const' to allow tests to modify 25 var ChunkSize = int64(32 * 1024 * 1024) 26 27 const ApproximateMetadataSize = 75 28 29 // Information about a LOB 30 type LOBInfo struct { 31 // SHA of the LOB 32 SHA string 33 // Total size of the LOB (all chunks) 34 Size int64 35 // Number of chunks that make up the whole LOB (integrity check) 36 NumChunks int 37 } 38 39 // Gets the root directory for local LOB files & creates if necessary 40 func GetLocalLOBRoot() string { 41 ret := filepath.Join(util.GetGitDir(), "git-lob", "content") 42 err := os.MkdirAll(ret, 0755) 43 if err != nil { 44 util.LogErrorf("Unable to create LOB root folder at %v: %v", ret, err) 45 panic(err) 46 } 47 return ret 48 } 49 50 // Gets the root directory for shared LOB files & creates if necessary 51 func GetSharedLOBRoot() string { 52 // We create shared store when loading config if specified 53 return util.GlobalOptions.SharedStore 54 } 55 56 // Get relative directory for some base dir for a given sha 57 func getLOBRelativeDir(sha string) string { 58 return filepath.Join(sha[:3], sha[3:6]) 59 } 60 61 // Get a relative file name for a meta file (no dirs created as not rooted) 62 func GetLOBMetaRelativePath(sha string) string { 63 return filepath.Join(getLOBRelativeDir(sha), getLOBMetaFilename(sha)) 64 } 65 66 // Get a relative file name for a meta file (no dirs created as not rooted) 67 func GetLOBChunkRelativePath(sha string, chunkIdx int) string { 68 return filepath.Join(getLOBRelativeDir(sha), getLOBChunkFilename(sha, chunkIdx)) 69 } 70 71 // Get absolute directory for a sha & creates it 72 func getLOBSubDir(base, sha string) string { 73 ret := filepath.Join(base, getLOBRelativeDir(sha)) 74 err := os.MkdirAll(ret, 0755) 75 if err != nil { 76 util.LogErrorf("Unable to create LOB 2nd-level folder at %v: %v", ret, err) 77 panic(err) 78 } 79 return ret 80 81 } 82 83 // Gets the containing local folder for a given LOB SHA & creates if necessary 84 // LOBs are 'splayed' 2-levels deep based on first 6 chars of SHA (3 for each dir) 85 // We splay by 2 levels and by 3 each (4096 dirs) because we don't pack like git 86 // so need to ensure directory contents remain practical at high numbers of files 87 func GetLocalLOBDir(sha string) string { 88 if len(sha) != 40 { 89 util.LogErrorf("Invalid SHA format: %v\n", sha) 90 return "" 91 } 92 return getLOBSubDir(GetLocalLOBRoot(), sha) 93 } 94 95 // Gets the containing shared folder for a given LOB SHA & creates if necessary 96 // LOBs are 'splayed' 2-levels deep based on first 6 chars of SHA (3 for each dir) 97 // We splay by 2 levels and by 3 each (4096 dirs) because we don't pack like git 98 // so need to ensure directory contents remain practical at high numbers of files 99 func GetSharedLOBDir(sha string) string { 100 if len(sha) != 40 { 101 util.LogErrorf("Invalid SHA format: %v\n", sha) 102 return "" 103 } 104 return getLOBSubDir(GetSharedLOBRoot(), sha) 105 } 106 107 // get the filename for a meta file (no dir) 108 func getLOBMetaFilename(sha string) string { 109 return sha + "_meta" 110 } 111 112 // get the filename for a chunk file (no dir) 113 func getLOBChunkFilename(sha string, chunkIdx int) string { 114 return fmt.Sprintf("%v_%d", sha, chunkIdx) 115 } 116 117 // Gets the absolute path to the meta file for a LOB from a base dir 118 func GetLOBMetaPathInBaseDir(basedir, sha string) string { 119 fld := getLOBSubDir(basedir, sha) 120 return filepath.Join(fld, getLOBMetaFilename(sha)) 121 } 122 123 // Gets the absolute path to the chunk file for a LOB from a base dir 124 func GetLOBChunkPathInBaseDir(basedir, sha string, chunkIdx int) string { 125 fld := getLOBSubDir(basedir, sha) 126 return filepath.Join(fld, getLOBChunkFilename(sha, chunkIdx)) 127 } 128 129 // Gets the absolute path to the meta file for a LOB in local store 130 func GetLocalLOBMetaPath(sha string) string { 131 return GetLOBMetaPathInBaseDir(GetLocalLOBRoot(), sha) 132 } 133 134 // Gets the absolute path to the chunk file for a LOB in local store 135 func GetLocalLOBChunkPath(sha string, chunkIdx int) string { 136 return GetLOBChunkPathInBaseDir(GetLocalLOBRoot(), sha, chunkIdx) 137 } 138 139 // Gets the absolute path to the meta file for a LOB in shared store 140 func getSharedLOBMetaPath(sha string) string { 141 return GetLOBMetaPathInBaseDir(GetSharedLOBRoot(), sha) 142 } 143 144 // Gets the absolute path to the chunk file for a LOB in local store 145 func GetSharedLOBChunkPath(sha string, chunkIdx int) string { 146 return GetLOBChunkPathInBaseDir(GetSharedLOBRoot(), sha, chunkIdx) 147 } 148 149 // Retrieve information about an existing stored LOB, from a base dir 150 func getLOBInfoInBaseDir(sha, basedir string) (*LOBInfo, error) { 151 file := GetLOBMetaPathInBaseDir(basedir, sha) 152 _, err := os.Stat(file) 153 if err != nil { 154 if os.IsNotExist(err) { 155 return nil, NewNotFoundError(err.Error(), file) 156 } 157 return nil, err 158 } 159 160 info, err := parseLOBInfoFromFile(file) 161 if err != nil { 162 return nil, NewIntegrityErrorWithAdditionalMessage([]string{sha}, err.Error()) 163 } 164 return info, nil 165 } 166 167 // Retrieve information about an existing stored LOB (local) 168 func GetLOBInfo(sha string) (*LOBInfo, error) { 169 info, err := getLOBInfoInBaseDir(sha, GetLocalLOBRoot()) 170 if err != nil { 171 if IsNotFoundError(err) { 172 // Try to recover from shared 173 if recoverLocalLOBFilesFromSharedStore(sha) { 174 info, err = getLOBInfoInBaseDir(sha, GetLocalLOBRoot()) 175 if err != nil { 176 // Dang 177 return nil, err 178 } 179 // otherwise we recovered! 180 } else { 181 return nil, err 182 } 183 } else { 184 return nil, err 185 } 186 } 187 188 return info, nil 189 } 190 191 // Parse a LOB meta file 192 func parseLOBInfoFromFile(file string) (*LOBInfo, error) { 193 infobytes, err := ioutil.ReadFile(file) 194 195 if err != nil { 196 return nil, err 197 } 198 // Read JSON metadata 199 info := &LOBInfo{} 200 err = json.Unmarshal(infobytes, info) 201 if err != nil { 202 // Fatal, corruption 203 return nil, errors.New(fmt.Sprintf("Unable to interpret meta file %v: %v", file, err)) 204 } 205 206 return info, nil 207 208 } 209 210 // If files are missing in the local repo but available in the shared 211 // store, returns true after re-establishing the link 212 // Note: this doesn't validate sizes of any files because it's assumed 213 // because of hardlinking the files are either missing entirely or the 214 // same as the shared store 215 func recoverLocalLOBFilesFromSharedStore(sha string) bool { 216 if !IsUsingSharedStorage() { 217 return false 218 } 219 220 metalocal := GetLocalLOBMetaPath(sha) 221 if !util.FileExists(metalocal) { 222 metashared := getSharedLOBMetaPath(sha) 223 if util.FileExists(metashared) { 224 err := linkSharedLOBFilename(metashared) 225 if err != nil { 226 util.LogErrorf("Failed to link shared file %v into local repo: %v\n", metashared, err.Error()) 227 return false 228 } 229 } else { 230 return false 231 } 232 } 233 // Meta should be complete & local now 234 info, err := GetLOBInfo(sha) 235 if err != nil { 236 return false 237 } 238 for i := 0; i < info.NumChunks; i++ { 239 local := GetLocalLOBChunkPath(sha, i) 240 expectedSize := getLOBExpectedChunkSize(info, i) 241 if !util.FileExistsAndIsOfSize(local, expectedSize) { 242 shared := GetSharedLOBChunkPath(sha, i) 243 if util.FileExistsAndIsOfSize(shared, expectedSize) { 244 err := linkSharedLOBFilename(shared) 245 if err != nil { 246 util.LogErrorf("Failed to link shared file %v into local repo: %v\n", shared, err.Error()) 247 return false 248 } 249 } else { 250 return false 251 } 252 } 253 } 254 255 return true 256 } 257 258 // Retrieve LOB from storage 259 func RetrieveLOB(sha string, out io.Writer) (info *LOBInfo, err error) { 260 info, err = GetLOBInfo(sha) 261 262 if err != nil { 263 if IsNotFoundError(err) && util.GlobalOptions.AutoFetchEnabled { 264 err = AutoFetch(sha, true) 265 if err == nil { 266 info, err = GetLOBInfo(sha) 267 } 268 } 269 if err != nil { 270 if IsNotFoundError(err) { 271 // Still not found after possible recovery? 272 return nil, err 273 } else { 274 // Some other issue 275 return nil, errors.New(fmt.Sprintf("Unable to retrieve LOB with SHA %v: %v", sha, err.Error())) 276 } 277 } 278 } 279 280 var totalBytesRead = int64(0) 281 fileSize := info.Size 282 // Pre-validate all the files BEFORE we start streaming data to out 283 // if we fail part way through we don't want to have written partial 284 // data, should be all or nothing 285 lastChunkSize := fileSize - (int64(info.NumChunks-1) * ChunkSize) 286 // Check all files 287 for i := 0; i < info.NumChunks; i++ { 288 chunkFilename := GetLocalLOBChunkPath(sha, i) 289 var expectedSize int64 290 if i+1 < info.NumChunks { 291 expectedSize = ChunkSize 292 } else { 293 if info.NumChunks == 1 { 294 expectedSize = fileSize 295 } else { 296 expectedSize = lastChunkSize 297 } 298 } 299 if !util.FileExistsAndIsOfSize(chunkFilename, expectedSize) { 300 // Try to recover from shared store 301 recoveredFromShared := false 302 if recoverLocalLOBFilesFromSharedStore(sha) { 303 recoveredFromShared = util.FileExistsAndIsOfSize(chunkFilename, expectedSize) 304 } 305 306 if !recoveredFromShared { 307 if util.GlobalOptions.AutoFetchEnabled { 308 err = AutoFetch(sha, true) 309 if err != nil { 310 if IsNotFoundError(err) { 311 return info, NewNotFoundError(fmt.Sprintf("Missing chunk %d for %v & not on remote", i, sha), chunkFilename) 312 } else { 313 return info, errors.New(fmt.Sprintf("Missing chunk %d for %v & failed fetch: %v", i, sha, err.Error())) 314 } 315 } 316 } else { 317 return info, NewNotFoundError(fmt.Sprintf("Missing chunk %d for %v", i, sha), chunkFilename) 318 } 319 } 320 } 321 } 322 // If all was well, start reading & streaming content 323 for i := 0; i < info.NumChunks; i++ { 324 // Check each chunk file exists 325 chunkFilename := GetLocalLOBChunkPath(info.SHA, i) 326 in, err := os.OpenFile(chunkFilename, os.O_RDONLY, 0644) 327 if err != nil { 328 return info, errors.New(fmt.Sprintf("Error reading LOB file %v: %v", chunkFilename, err)) 329 } 330 c, err := io.Copy(out, in) 331 if err != nil { 332 return info, errors.New(fmt.Sprintf("I/O error while copying LOB file %v, check working copy state", chunkFilename)) 333 } 334 totalBytesRead += c 335 } 336 337 // Final check 338 if totalBytesRead != fileSize { 339 err = errors.New(fmt.Sprintf("Error, file length does not match expected in LOB %v, expected %d, total size %d", sha, fileSize, totalBytesRead)) 340 return info, err 341 } 342 343 util.LogDebugf("Successfully retrieved LOB %v from %d chunks, total size %v\n", sha, info.NumChunks, util.FormatSize(totalBytesRead)) 344 345 return info, nil 346 347 } 348 349 // Link a file from shared storage into the local repo 350 // The hard link means we only ever have one copy of the data 351 // but it appears under each repo's git-lob folder 352 // destFile should be a full path of shared file location 353 func linkSharedLOBFilename(destSharedFile string) error { 354 // Get path relative to shared store root, then translate it to local path 355 relPath, err := filepath.Rel(util.GlobalOptions.SharedStore, destSharedFile) 356 if err != nil { 357 return err 358 } 359 linkPath := filepath.Join(GetLocalLOBRoot(), relPath) 360 361 // Make sure path exists since we're not using utility method to link 362 os.MkdirAll(filepath.Dir(linkPath), 0755) 363 364 os.Remove(linkPath) 365 err = CreateHardLink(destSharedFile, linkPath) 366 if err != nil { 367 return errors.New(fmt.Sprintf("Error creating hard link from %v to %v: %v", linkPath, destSharedFile, err)) 368 } 369 return nil 370 } 371 372 // Store the metadata for a given sha 373 // If it already exists and is of the right size, will do nothing 374 func StoreLOBInfo(info *LOBInfo) error { 375 var root string 376 if IsUsingSharedStorage() { 377 root = GetSharedLOBRoot() 378 } else { 379 root = GetLocalLOBRoot() 380 } 381 return StoreLOBInfoInBaseDir(root, info) 382 } 383 384 // Store the metadata for a given sha in a relative path 385 // If it already exists and is of the right size, will do nothing 386 func StoreLOBInfoInBaseDir(basedir string, info *LOBInfo) error { 387 infoBytes, err := json.Marshal(info) 388 if err != nil { 389 return errors.New(fmt.Sprintf("Unable to convert LOB info to JSON: %v", err)) 390 } 391 infoFilename := GetLOBMetaPathInBaseDir(basedir, info.SHA) 392 if !util.FileExistsAndIsOfSize(infoFilename, int64(len(infoBytes))) { 393 // Since all the details are derived from the SHA the only variant is chunking or incomplete writes so 394 // we don't need to worry about needing to update the content (it must be correct) 395 util.LogDebugf("Writing LOB metadata file: %v\n", infoFilename) 396 err = ioutil.WriteFile(infoFilename, infoBytes, 0644) 397 if err != nil { 398 return err 399 } 400 } else { 401 util.LogDebugf("LOB metadata file already exists & is valid: %v\n", infoFilename) 402 } 403 404 // This may have stored in shared storage, so link if required 405 if IsUsingSharedStorage() && basedir == GetSharedLOBRoot() { 406 return linkSharedLOBFilename(infoFilename) 407 } else { 408 return nil 409 } 410 411 } 412 413 func IsUsingSharedStorage() bool { 414 if util.GlobalOptions.SharedStore != "" { 415 // We create the folder on loading config 416 return util.DirExists(util.GlobalOptions.SharedStore) 417 } 418 return false 419 } 420 421 // Write the contents of fromFile to final storage with sha, checking the size 422 // If file already exists and is of the right size, will do nothing 423 // fromChunkFile will be moved into its final location or deleted if the data is already valid, 424 // so the file will not exist after this call (renamed to final location or deleted), unless error 425 func StoreLOBChunk(sha string, chunkNo int, fromChunkFile string, sz int64) error { 426 var root string 427 if IsUsingSharedStorage() { 428 root = GetSharedLOBRoot() 429 } else { 430 root = GetLocalLOBRoot() 431 } 432 return StoreLOBChunkInBaseDir(root, sha, chunkNo, fromChunkFile, sz) 433 } 434 435 // Write the contents of fromFile to final storage with sha, checking the size, to a relative root 436 // If file already exists and is of the right size, will do nothing 437 // fromChunkFile will be moved into its final location or deleted if the data is already valid, 438 // so the file will not exist after this call (renamed to final location or deleted), unless error 439 func StoreLOBChunkInBaseDir(basedir, sha string, chunkNo int, fromChunkFile string, sz int64) error { 440 destFile := GetLOBChunkPathInBaseDir(basedir, sha, chunkNo) 441 442 if !util.FileExistsAndIsOfSize(destFile, int64(sz)) { 443 util.LogDebugf("Saving final LOB metadata file: %v\n", destFile) 444 // delete any existing (incorrectly sized) file since will probably not be allowed to rename over it 445 // ignore any errors 446 os.Remove(destFile) 447 err := os.Rename(fromChunkFile, destFile) 448 if err != nil { 449 return err 450 } 451 } else { 452 util.LogDebugf("LOB chunk file already exists & is valid: %v\n", destFile) 453 // Remove file that would have been moved 454 os.Remove(fromChunkFile) 455 } 456 457 // This may have stored in shared storage, so link if required 458 if IsUsingSharedStorage() && basedir == GetSharedLOBRoot() { 459 return linkSharedLOBFilename(destFile) 460 } 461 return nil 462 463 } 464 465 // Read from a stream and calculate SHA, while also writing content to chunked content 466 // leader is a slice of bytes that has already been read (probe for SHA) 467 func StoreLOB(in io.Reader, leader []byte) (*LOBInfo, error) { 468 var root string 469 if IsUsingSharedStorage() { 470 root = GetSharedLOBRoot() 471 } else { 472 root = GetLocalLOBRoot() 473 } 474 return StoreLOBInBaseDir(root, in, leader) 475 } 476 477 // Read from a stream and calculate SHA, while also writing content to chunked content 478 // leader is a slice of bytes that has already been read (probe for SHA) 479 // Store underneath a specified LOB root 480 func StoreLOBInBaseDir(basedir string, in io.Reader, leader []byte) (*LOBInfo, error) { 481 sha := sha1.New() 482 // Write chunks to temporary files, then move based on SHA filename once calculated 483 chunkFilenames := make([]string, 0, 5) 484 485 var outf *os.File 486 var err error 487 writeLeader := true 488 buf := make([]byte, BUFSIZE) 489 var fatalError error 490 var currentChunkSize int64 = 0 491 var totalSize int64 = 0 492 493 for { 494 var dataToWrite []byte 495 496 if writeLeader && len(leader) > 0 { 497 dataToWrite = leader 498 writeLeader = false 499 } else { 500 var bytesToRead int64 = BUFSIZE 501 if BUFSIZE+currentChunkSize > ChunkSize { 502 // Read less than BUFSIZE so we stick to CHUNKLIMIT 503 bytesToRead = ChunkSize - currentChunkSize 504 } 505 c, err := in.Read(buf[:bytesToRead]) 506 // Write any data to SHA & output 507 if c > 0 { 508 dataToWrite = buf[:c] 509 } else if err != nil { 510 if err == io.EOF { 511 // End of input 512 outf.Close() 513 break 514 } else { 515 outf.Close() 516 fatalError = errors.New(fmt.Sprintf("I/O error reading chunk %d: %v", len(chunkFilenames), err)) 517 break 518 } 519 } 520 521 } 522 523 // Write data 524 if len(dataToWrite) > 0 { 525 // New chunk file? 526 if outf == nil { 527 outf, err = ioutil.TempFile("", "tempchunk") 528 if err != nil { 529 fatalError = errors.New(fmt.Sprintf("Unable to create chunk %d: %v", len(chunkFilenames), err)) 530 break 531 } 532 chunkFilenames = append(chunkFilenames, outf.Name()) 533 currentChunkSize = 0 534 } 535 sha.Write(dataToWrite) 536 c, err := outf.Write(dataToWrite) 537 if err != nil { 538 fatalError = errors.New(fmt.Sprintf("I/O error writing chunk: %v wrote %d bytes of %d", err, c, len(dataToWrite))) 539 break 540 } 541 currentChunkSize += int64(c) 542 totalSize += int64(c) 543 544 // Read from incoming 545 // Deal with chunk limit 546 if currentChunkSize >= ChunkSize { 547 // Close this output, next iteration will create the next file 548 outf.Close() 549 outf = nil 550 currentChunkSize = 0 551 } 552 } else { 553 // No data to write 554 outf.Close() 555 break 556 } 557 } 558 if outf != nil { 559 // Close any dangling chunk 560 outf.Close() 561 } 562 defer func() { 563 // Clean up any temporaries on error or not used 564 for _, f := range chunkFilenames { 565 os.Remove(f) 566 } 567 }() 568 569 if fatalError != nil { 570 return nil, fatalError 571 } 572 573 shaStr := fmt.Sprintf("%x", string(sha.Sum(nil))) 574 575 // We *may* now move the data to LOB dir 576 // We won't if it already exists & is the correct size 577 // Construct LOBInfo & write to final location 578 info := &LOBInfo{SHA: shaStr, Size: totalSize, NumChunks: len(chunkFilenames)} 579 err = StoreLOBInfoInBaseDir(basedir, info) 580 if err != nil { 581 return nil, err 582 } 583 584 // Check each chunk file 585 for i, f := range chunkFilenames { 586 sz := ChunkSize 587 if i+1 == len(chunkFilenames) { 588 // Last chunk, get size 589 sz = currentChunkSize 590 } 591 err = StoreLOBChunkInBaseDir(basedir, shaStr, i, f, sz) 592 if err != nil { 593 return nil, err 594 } 595 } 596 597 return info, nil 598 599 } 600 601 // Delete all files associated with a given LOB SHA from the local store 602 func DeleteLOB(sha string) error { 603 // Delete from local always (either only copy, or hard link) 604 return DeleteLOBInBaseDir(sha, GetLocalLOBRoot()) 605 } 606 607 // Delete all files associated with a given LOB SHA from a specified root dir 608 func DeleteLOBInBaseDir(sha, basedir string) error { 609 610 dir := getLOBSubDir(basedir, sha) 611 names, err := filepath.Glob(filepath.Join(dir, fmt.Sprintf("%v*", sha))) 612 if err != nil { 613 return errors.New(fmt.Sprintf("Unable to glob local files for %v: %v", sha, err)) 614 } 615 for _, n := range names { 616 err = os.Remove(n) 617 if err != nil { 618 return errors.New(fmt.Sprintf("Unable to delete file %v: %v", n, err)) 619 } 620 } 621 622 if IsUsingSharedStorage() && basedir != GetSharedLOBRoot() { 623 // If we're using shared storage, then also check the number of links in 624 // shared storage for this SHA. See PruneSharedStore for a more general 625 // sweep for files that don't go through DeleteLOB (e.g. repo deleted manually) 626 shareddir := GetSharedLOBDir(sha) 627 names, err := filepath.Glob(filepath.Join(shareddir, fmt.Sprintf("%v*", sha))) 628 if err != nil { 629 return errors.New(fmt.Sprintf("Unable to glob shared files for %v: %v", sha, err)) 630 } 631 for _, n := range names { 632 links, err := GetHardLinkCount(n) 633 if err == nil && links == 1 { 634 // only 1 hard link means no other repo refers to this shared LOB 635 // so it's safe to delete it 636 err = os.Remove(n) 637 if err != nil { 638 return errors.New(fmt.Sprintf("Unable to delete file %v: %v", n, err)) 639 } 640 } 641 642 } 643 644 } 645 646 return nil 647 648 } 649 650 // Get the local/shared storage of a LOB with a given SHA 651 // Returns the list of files (relative to basedir) & checks for 652 // integrity if check = true 653 // If check = true and checkHash = true, reads all the data in the files and re-calculates 654 // the SHA for a deep validation of content 655 // If check = true and checkHash = false, just checks the presence & size of all files 656 // If there are any errors the returned list may not be correct 657 // In the rare case that a break has occurred between shared storage 658 // and the local hardlink, this method will re-link if the shared 659 // store has it 660 func GetLOBFilesForSHA(sha, basedir string, check bool, checkHash bool) (files []string, size int64, _err error) { 661 var ret []string 662 info, err := getLOBInfoInBaseDir(sha, basedir) 663 if err != nil { 664 return []string{}, 0, err 665 } 666 // add meta file (relative) - already checked by GetLOBInfo above 667 relmeta := GetLOBMetaRelativePath(sha) 668 ret = append(ret, relmeta) 669 670 var shaRecalc hash.Hash 671 if checkHash { 672 shaRecalc = sha1.New() 673 } 674 lastChunkSize := info.Size - (int64(info.NumChunks-1) * ChunkSize) 675 for i := 0; i < info.NumChunks; i++ { 676 relchunk := GetLOBChunkRelativePath(sha, i) 677 ret = append(ret, relchunk) 678 if check { 679 abschunk := filepath.Join(basedir, relchunk) 680 // Check size first 681 var expectedSize int64 682 if i+1 < info.NumChunks { 683 expectedSize = ChunkSize 684 } else { 685 if info.NumChunks == 1 { 686 expectedSize = info.Size 687 } else { 688 expectedSize = lastChunkSize 689 } 690 } 691 if !util.FileExistsAndIsOfSize(abschunk, expectedSize) { 692 // Try to recover from shared store 693 recoveredFromShared := false 694 if recoverLocalLOBFilesFromSharedStore(sha) { 695 recoveredFromShared = util.FileExistsAndIsOfSize(abschunk, expectedSize) 696 } 697 698 if !recoveredFromShared { 699 msg := fmt.Sprintf("LOB file not found or wrong size: %v expected to be %d bytes", abschunk, expectedSize) 700 wrongSize := util.FileExists(abschunk) 701 var err error 702 if wrongSize { 703 err = NewWrongSizeError(msg, abschunk) 704 } else { 705 err = NewNotFoundError(msg, abschunk) 706 } 707 return ret, info.Size, err 708 } 709 } 710 711 // Check SHA content? 712 if checkHash { 713 f, err := os.OpenFile(abschunk, os.O_RDONLY, 0644) 714 if err != nil { 715 msg := fmt.Sprintf("Error opening LOB file %v to check SHA: %v", abschunk, err) 716 return ret, info.Size, errors.New(msg) 717 } 718 _, err = io.Copy(shaRecalc, f) 719 if err != nil { 720 msg := fmt.Sprintf("Error copying LOB file %v into SHA calculator: %v", abschunk, err) 721 return ret, info.Size, errors.New(msg) 722 } 723 f.Close() 724 } 725 726 } 727 } 728 729 if check && checkHash { 730 shaRecalcStr := fmt.Sprintf("%x", string(shaRecalc.Sum(nil))) 731 if sha != shaRecalcStr { 732 return ret, info.Size, NewIntegrityError([]string{sha}) 733 } 734 } 735 736 return ret, info.Size, nil 737 738 } 739 740 // Check the integrity of the files for a given sha in the attached basedir 741 // If checkHash = true, reads all the data in the files and re-calculates 742 // the SHA for a deep validation of content (slower but complete) 743 // If checkHash = false, just checks the presence & size of all files (quick & most likely correct) 744 // Note that if basedir is the local root, will try to recover missing files from shared store 745 func CheckLOBFilesForSHA(sha, basedir string, checkHash bool) error { 746 _, _, err := GetLOBFilesForSHA(sha, basedir, true, checkHash) 747 return err 748 } 749 750 // Check the presence & integrity of the files for a given list of shas in this repo 751 // and return a list of those which failed the check 752 // If checkHash = true, reads all the data in the files and re-calculates 753 // the SHA for a deep validation of content (slower but complete) 754 // If checkHash = false, just checks the presence & size of all files (quick & most likely correct) 755 func GetMissingLOBs(lobshas []string, checkHash bool) []string { 756 localroot := GetLocalLOBRoot() 757 var missing []string 758 for _, sha := range lobshas { 759 err := CheckLOBFilesForSHA(sha, localroot, checkHash) 760 if err != nil { 761 // Recover from shared storage if possible 762 if IsUsingSharedStorage() && recoverLocalLOBFilesFromSharedStore(sha) { 763 // then we're OK 764 } else { 765 missing = append(missing, sha) 766 } 767 } 768 } 769 return missing 770 } 771 772 // Return whether a single LOB is missing 773 func IsLOBMissing(sha string, checkHash bool) bool { 774 localroot := GetLocalLOBRoot() 775 err := CheckLOBFilesForSHA(sha, localroot, checkHash) 776 if err != nil { 777 // Recover from shared storage if possible 778 if IsUsingSharedStorage() && recoverLocalLOBFilesFromSharedStore(sha) { 779 // then we're OK 780 } else { 781 return true 782 } 783 } 784 785 return false 786 } 787 788 // Get the correct size of a given chunk 789 func getLOBExpectedChunkSize(info *LOBInfo, chunkIdx int) int64 { 790 if chunkIdx+1 < info.NumChunks { 791 return ChunkSize 792 } else { 793 if info.NumChunks == 1 { 794 return info.Size 795 } else { 796 return info.Size - (int64(info.NumChunks-1) * ChunkSize) 797 } 798 } 799 800 } 801 802 // returns whether the local store has any binaries in it 803 func IsLocalLOBStoreEmpty() bool { 804 root := GetLocalLOBRoot() 805 rootf, err := os.Open(root) 806 if err != nil { 807 return true 808 } 809 defer rootf.Close() 810 // Max 3 entries 811 dirs, err := rootf.Readdirnames(3) 812 if err != nil { 813 return true 814 } 815 // Will be no entries if this is new 816 return len(dirs) == 0 817 } 818 819 // Generates a diff between the contents of 2 LOBs 820 // Automatically copes with chunking, the diff is one file across the entire content 821 // Returns the size of the compressed delta 822 func GenerateLOBDelta(basesha, targetsha string, out io.Writer) (int64, error) { 823 return GenerateLOBDeltaInBaseDir(GetLocalLOBRoot(), basesha, targetsha, out) 824 } 825 826 // Applies a diff to basesha and generates a LOB which should have targetsha (will be checked, error returned if disagrees) 827 func ApplyLOBDelta(basesha, targetsha string, delta io.Reader) error { 828 var root string 829 if IsUsingSharedStorage() { 830 root = GetSharedLOBRoot() 831 } else { 832 root = GetLocalLOBRoot() 833 } 834 err := ApplyLOBDeltaInBaseDir(root, basesha, targetsha, delta) 835 if err != nil { 836 // This may have stored in shared storage, so link if required 837 if IsUsingSharedStorage() { 838 recoverLocalLOBFilesFromSharedStore(targetsha) 839 } 840 } 841 return err 842 } 843 844 // Retrieve the entire content for all chunks of a LOB and write to 'out' 845 func GetLOBCompleteContent(sha string, out io.Writer) error { 846 return GetLOBCompleteContentInBaseDir(GetLocalLOBRoot(), sha, out) 847 } 848 849 // Retrieve the entire content for all chunks of a LOB within a base root, and write to 'out' 850 func GetLOBCompleteContentInBaseDir(basedir, sha string, out io.Writer) error { 851 info, err := getLOBInfoInBaseDir(sha, basedir) 852 if err != nil { 853 return err 854 } 855 var bytesread int64 856 for i := 0; i < info.NumChunks; i++ { 857 chunkfile := filepath.Join(basedir, GetLOBChunkRelativePath(sha, i)) 858 cf, err := os.OpenFile(chunkfile, os.O_RDONLY, 0644) 859 if err != nil { 860 return err 861 } 862 defer cf.Close() 863 n, err := io.Copy(out, cf) 864 if err != nil { 865 return fmt.Errorf("Error while copying data from content: %v", err.Error()) 866 } 867 bytesread += n 868 } 869 if bytesread != info.Size { 870 return fmt.Errorf("Incorrect number of bytes read for LOB - expected %d actual %d", info.Size, bytesread) 871 } 872 return nil 873 } 874 875 // Generates a diff between the contents of 2 LOBs, with a specified root storage 876 // Automatically copes with chunking, the diff is one file across the entire content 877 // Returns the size of the compressed delta 878 func GenerateLOBDeltaInBaseDir(basedir, basesha, targetsha string, out io.Writer) (int64, error) { 879 // Read all of base file into memory to use as dictionary (pre-size from info) 880 baseinfo, err := getLOBInfoInBaseDir(basesha, basedir) 881 basebuf := bytes.NewBuffer(make([]byte, 0, baseinfo.Size)) 882 if err != nil { 883 return 0, err 884 } 885 err = GetLOBCompleteContentInBaseDir(basedir, basesha, basebuf) 886 if err != nil { 887 return 0, fmt.Errorf("Error getting base file content for delta: %v", err.Error()) 888 } 889 comp := bm.NewCompressor() 890 baseDict := &bm.Dictionary{Dict: basebuf.Bytes()} 891 // Use SetDictionary to set on compressor, this computes the hashes 892 comp.SetDictionary(baseDict) 893 // Set the delta buffer as the output 894 comp.SetWriter(out) 895 896 // Now we read all the targetsha's content and copy it into the compressor 897 targetinfo, err := getLOBInfoInBaseDir(targetsha, basedir) 898 if err != nil { 899 return 0, err 900 } 901 var targetbytesread int64 902 for i := 0; i < targetinfo.NumChunks; i++ { 903 chunkfile := filepath.Join(basedir, GetLOBChunkRelativePath(targetsha, i)) 904 cf, err := os.OpenFile(chunkfile, os.O_RDONLY, 0644) 905 if err != nil { 906 return 0, err 907 } 908 defer cf.Close() 909 n, err := io.Copy(comp, cf) 910 if err != nil { 911 return 0, fmt.Errorf("Error while copying data from target into compressor: %v", err.Error()) 912 } 913 targetbytesread += n 914 } 915 if targetbytesread != targetinfo.Size { 916 return 0, fmt.Errorf("Incorrect number of bytes read for target file - expected %d actual %d", targetinfo.Size, targetbytesread) 917 } 918 919 // Now do the actual compression 920 // Maybe we can improve bm later so that it does it on the fly (less memory) 921 err = comp.Close() 922 if err != nil { 923 return 0, fmt.Errorf("Error during compression of delta: %v", err.Error()) 924 } 925 // This has been written to out now so we're done 926 927 return int64(comp.CompressedSize()), nil 928 } 929 930 // Applies a diff to basesha and generates a LOB, with a specified root storage, 931 // which should have targetsha (will be checked, error returned if disagrees) 932 func ApplyLOBDeltaInBaseDir(basedir, basesha, targetsha string, delta io.Reader) error { 933 // Read all of base file into memory to use as dictionary (pre-size from info) 934 baseinfo, err := getLOBInfoInBaseDir(basesha, basedir) 935 basebuf := bytes.NewBuffer(make([]byte, 0, baseinfo.Size)) 936 if err != nil { 937 return err 938 } 939 err = GetLOBCompleteContentInBaseDir(basedir, basesha, basebuf) 940 if err != nil { 941 return fmt.Errorf("Error getting base file content for delta: %v", err.Error()) 942 } 943 944 exp := bm.NewExpander(delta, basebuf.Bytes()) 945 946 // output result to temp file 947 outf, err := ioutil.TempFile("", fmt.Sprintf("tempdelta%v_%v", basesha, targetsha)) 948 if err != nil { 949 return fmt.Errorf("Error opening temp file for writing: %v\n", err) 950 } 951 defer outf.Close() 952 defer os.Remove(outf.Name()) // always remove temp file if not moved 953 954 // bm.Expander claims to support io.Reader but it doesn't 955 // so can't use the same io.Copy approach we use for Compressor 956 // Probably because it doesn't currently implement the buffering required to support arbitrary Read() calls 957 // Do it all in memory right now - we should probably enhance bm to make this more efficient 958 // Reading the code, the input to Expand is the current slice if you want it 959 // good to pre-allocate some space here, estimate same size as base 960 outbytes, err := exp.Expand(make([]byte, 0, basebuf.Len())) 961 if err != nil { 962 return fmt.Errorf("Error applying LOB delta: %v", err) 963 } 964 // Check the SHA 965 shacalc := sha1.New() 966 shacalc.Write(outbytes) 967 testsha := fmt.Sprintf("%x", string(shacalc.Sum(nil))) 968 if testsha != targetsha { 969 return fmt.Errorf("Integrity error applying delta, SHA does not agree (expected: %v actual %v)", targetsha, testsha) 970 } 971 // Otherwise, we're good. Store this data 972 targetinfo, err := StoreLOBInBaseDir(basedir, bytes.NewReader(outbytes), nil) 973 if err != nil { 974 return fmt.Errorf("Error storing target LOB %v: %v", targetsha, err.Error()) 975 } else if targetinfo.SHA != targetsha { 976 return fmt.Errorf("Integrity error saving applied delta, SHA does not agree (expected: %v actual %v)", targetsha, targetinfo) 977 } 978 979 return nil 980 } 981 982 // Record of a LOB delta (calculated but still to be done) 983 type LOBDelta struct { 984 BaseSHA, TargetSHA string 985 DeltaSize int64 986 // Optional already present delta filename, can be blank 987 DeltaFilename string 988 }