github.com/atlassian/git-lob@v0.0.0-20150806085256-2386a5ed291a/core/storage.go (about)

     1  package core
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/sha1"
     6  	"encoding/json"
     7  	"errors"
     8  	"fmt"
     9  	"hash"
    10  	"io"
    11  	"io/ioutil"
    12  	"os"
    13  	"path/filepath"
    14  
    15  	"github.com/atlassian/git-lob/Godeps/_workspace/src/github.com/cloudflare/bm"
    16  	"github.com/atlassian/git-lob/util"
    17  )
    18  
    19  const BUFSIZE = 131072
    20  
    21  // Chunk size that we split stored data into so it's easier to resume uploads/downloads
    22  // This used to be configurable, but it caused too many issues if different people had different
    23  // settings in a shared repository
    24  // This is only 'var' rather than 'const' to allow tests to modify
    25  var ChunkSize = int64(32 * 1024 * 1024)
    26  
    27  const ApproximateMetadataSize = 75
    28  
    29  // Information about a LOB
    30  type LOBInfo struct {
    31  	// SHA of the LOB
    32  	SHA string
    33  	// Total size of the LOB (all chunks)
    34  	Size int64
    35  	// Number of chunks that make up the whole LOB (integrity check)
    36  	NumChunks int
    37  }
    38  
    39  // Gets the root directory for local LOB files & creates if necessary
    40  func GetLocalLOBRoot() string {
    41  	ret := filepath.Join(util.GetGitDir(), "git-lob", "content")
    42  	err := os.MkdirAll(ret, 0755)
    43  	if err != nil {
    44  		util.LogErrorf("Unable to create LOB root folder at %v: %v", ret, err)
    45  		panic(err)
    46  	}
    47  	return ret
    48  }
    49  
    50  // Gets the root directory for shared LOB files & creates if necessary
    51  func GetSharedLOBRoot() string {
    52  	// We create shared store when loading config if specified
    53  	return util.GlobalOptions.SharedStore
    54  }
    55  
    56  // Get relative directory for some base dir for a given sha
    57  func getLOBRelativeDir(sha string) string {
    58  	return filepath.Join(sha[:3], sha[3:6])
    59  }
    60  
    61  // Get a relative file name for a meta file (no dirs created as not rooted)
    62  func GetLOBMetaRelativePath(sha string) string {
    63  	return filepath.Join(getLOBRelativeDir(sha), getLOBMetaFilename(sha))
    64  }
    65  
    66  // Get a relative file name for a meta file (no dirs created as not rooted)
    67  func GetLOBChunkRelativePath(sha string, chunkIdx int) string {
    68  	return filepath.Join(getLOBRelativeDir(sha), getLOBChunkFilename(sha, chunkIdx))
    69  }
    70  
    71  // Get absolute directory for a sha & creates it
    72  func getLOBSubDir(base, sha string) string {
    73  	ret := filepath.Join(base, getLOBRelativeDir(sha))
    74  	err := os.MkdirAll(ret, 0755)
    75  	if err != nil {
    76  		util.LogErrorf("Unable to create LOB 2nd-level folder at %v: %v", ret, err)
    77  		panic(err)
    78  	}
    79  	return ret
    80  
    81  }
    82  
    83  // Gets the containing local folder for a given LOB SHA & creates if necessary
    84  // LOBs are 'splayed' 2-levels deep based on first 6 chars of SHA (3 for each dir)
    85  // We splay by 2 levels and by 3 each (4096 dirs) because we don't pack like git
    86  // so need to ensure directory contents remain practical at high numbers of files
    87  func GetLocalLOBDir(sha string) string {
    88  	if len(sha) != 40 {
    89  		util.LogErrorf("Invalid SHA format: %v\n", sha)
    90  		return ""
    91  	}
    92  	return getLOBSubDir(GetLocalLOBRoot(), sha)
    93  }
    94  
    95  // Gets the containing shared folder for a given LOB SHA & creates if necessary
    96  // LOBs are 'splayed' 2-levels deep based on first 6 chars of SHA (3 for each dir)
    97  // We splay by 2 levels and by 3 each (4096 dirs) because we don't pack like git
    98  // so need to ensure directory contents remain practical at high numbers of files
    99  func GetSharedLOBDir(sha string) string {
   100  	if len(sha) != 40 {
   101  		util.LogErrorf("Invalid SHA format: %v\n", sha)
   102  		return ""
   103  	}
   104  	return getLOBSubDir(GetSharedLOBRoot(), sha)
   105  }
   106  
   107  // get the filename for a meta file (no dir)
   108  func getLOBMetaFilename(sha string) string {
   109  	return sha + "_meta"
   110  }
   111  
   112  // get the filename for a chunk file (no dir)
   113  func getLOBChunkFilename(sha string, chunkIdx int) string {
   114  	return fmt.Sprintf("%v_%d", sha, chunkIdx)
   115  }
   116  
   117  // Gets the absolute path to the meta file for a LOB from a base dir
   118  func GetLOBMetaPathInBaseDir(basedir, sha string) string {
   119  	fld := getLOBSubDir(basedir, sha)
   120  	return filepath.Join(fld, getLOBMetaFilename(sha))
   121  }
   122  
   123  // Gets the absolute path to the chunk file for a LOB from a base dir
   124  func GetLOBChunkPathInBaseDir(basedir, sha string, chunkIdx int) string {
   125  	fld := getLOBSubDir(basedir, sha)
   126  	return filepath.Join(fld, getLOBChunkFilename(sha, chunkIdx))
   127  }
   128  
   129  // Gets the absolute path to the meta file for a LOB in local store
   130  func GetLocalLOBMetaPath(sha string) string {
   131  	return GetLOBMetaPathInBaseDir(GetLocalLOBRoot(), sha)
   132  }
   133  
   134  // Gets the absolute path to the chunk file for a LOB in local store
   135  func GetLocalLOBChunkPath(sha string, chunkIdx int) string {
   136  	return GetLOBChunkPathInBaseDir(GetLocalLOBRoot(), sha, chunkIdx)
   137  }
   138  
   139  // Gets the absolute path to the meta file for a LOB in shared store
   140  func getSharedLOBMetaPath(sha string) string {
   141  	return GetLOBMetaPathInBaseDir(GetSharedLOBRoot(), sha)
   142  }
   143  
   144  // Gets the absolute path to the chunk file for a LOB in local store
   145  func GetSharedLOBChunkPath(sha string, chunkIdx int) string {
   146  	return GetLOBChunkPathInBaseDir(GetSharedLOBRoot(), sha, chunkIdx)
   147  }
   148  
   149  // Retrieve information about an existing stored LOB, from a base dir
   150  func getLOBInfoInBaseDir(sha, basedir string) (*LOBInfo, error) {
   151  	file := GetLOBMetaPathInBaseDir(basedir, sha)
   152  	_, err := os.Stat(file)
   153  	if err != nil {
   154  		if os.IsNotExist(err) {
   155  			return nil, NewNotFoundError(err.Error(), file)
   156  		}
   157  		return nil, err
   158  	}
   159  
   160  	info, err := parseLOBInfoFromFile(file)
   161  	if err != nil {
   162  		return nil, NewIntegrityErrorWithAdditionalMessage([]string{sha}, err.Error())
   163  	}
   164  	return info, nil
   165  }
   166  
   167  // Retrieve information about an existing stored LOB (local)
   168  func GetLOBInfo(sha string) (*LOBInfo, error) {
   169  	info, err := getLOBInfoInBaseDir(sha, GetLocalLOBRoot())
   170  	if err != nil {
   171  		if IsNotFoundError(err) {
   172  			// Try to recover from shared
   173  			if recoverLocalLOBFilesFromSharedStore(sha) {
   174  				info, err = getLOBInfoInBaseDir(sha, GetLocalLOBRoot())
   175  				if err != nil {
   176  					// Dang
   177  					return nil, err
   178  				}
   179  				// otherwise we recovered!
   180  			} else {
   181  				return nil, err
   182  			}
   183  		} else {
   184  			return nil, err
   185  		}
   186  	}
   187  
   188  	return info, nil
   189  }
   190  
   191  // Parse a LOB meta file
   192  func parseLOBInfoFromFile(file string) (*LOBInfo, error) {
   193  	infobytes, err := ioutil.ReadFile(file)
   194  
   195  	if err != nil {
   196  		return nil, err
   197  	}
   198  	// Read JSON metadata
   199  	info := &LOBInfo{}
   200  	err = json.Unmarshal(infobytes, info)
   201  	if err != nil {
   202  		// Fatal, corruption
   203  		return nil, errors.New(fmt.Sprintf("Unable to interpret meta file %v: %v", file, err))
   204  	}
   205  
   206  	return info, nil
   207  
   208  }
   209  
   210  // If files are missing in the local repo but available in the shared
   211  // store, returns true after re-establishing the link
   212  // Note: this doesn't validate sizes of any files because it's assumed
   213  // because of hardlinking the files are either missing entirely or the
   214  // same as the shared store
   215  func recoverLocalLOBFilesFromSharedStore(sha string) bool {
   216  	if !IsUsingSharedStorage() {
   217  		return false
   218  	}
   219  
   220  	metalocal := GetLocalLOBMetaPath(sha)
   221  	if !util.FileExists(metalocal) {
   222  		metashared := getSharedLOBMetaPath(sha)
   223  		if util.FileExists(metashared) {
   224  			err := linkSharedLOBFilename(metashared)
   225  			if err != nil {
   226  				util.LogErrorf("Failed to link shared file %v into local repo: %v\n", metashared, err.Error())
   227  				return false
   228  			}
   229  		} else {
   230  			return false
   231  		}
   232  	}
   233  	// Meta should be complete & local now
   234  	info, err := GetLOBInfo(sha)
   235  	if err != nil {
   236  		return false
   237  	}
   238  	for i := 0; i < info.NumChunks; i++ {
   239  		local := GetLocalLOBChunkPath(sha, i)
   240  		expectedSize := getLOBExpectedChunkSize(info, i)
   241  		if !util.FileExistsAndIsOfSize(local, expectedSize) {
   242  			shared := GetSharedLOBChunkPath(sha, i)
   243  			if util.FileExistsAndIsOfSize(shared, expectedSize) {
   244  				err := linkSharedLOBFilename(shared)
   245  				if err != nil {
   246  					util.LogErrorf("Failed to link shared file %v into local repo: %v\n", shared, err.Error())
   247  					return false
   248  				}
   249  			} else {
   250  				return false
   251  			}
   252  		}
   253  	}
   254  
   255  	return true
   256  }
   257  
   258  // Retrieve LOB from storage
   259  func RetrieveLOB(sha string, out io.Writer) (info *LOBInfo, err error) {
   260  	info, err = GetLOBInfo(sha)
   261  
   262  	if err != nil {
   263  		if IsNotFoundError(err) && util.GlobalOptions.AutoFetchEnabled {
   264  			err = AutoFetch(sha, true)
   265  			if err == nil {
   266  				info, err = GetLOBInfo(sha)
   267  			}
   268  		}
   269  		if err != nil {
   270  			if IsNotFoundError(err) {
   271  				// Still not found after possible recovery?
   272  				return nil, err
   273  			} else {
   274  				// Some other issue
   275  				return nil, errors.New(fmt.Sprintf("Unable to retrieve LOB with SHA %v: %v", sha, err.Error()))
   276  			}
   277  		}
   278  	}
   279  
   280  	var totalBytesRead = int64(0)
   281  	fileSize := info.Size
   282  	// Pre-validate all the files BEFORE we start streaming data to out
   283  	// if we fail part way through we don't want to have written partial
   284  	// data, should be all or nothing
   285  	lastChunkSize := fileSize - (int64(info.NumChunks-1) * ChunkSize)
   286  	// Check all files
   287  	for i := 0; i < info.NumChunks; i++ {
   288  		chunkFilename := GetLocalLOBChunkPath(sha, i)
   289  		var expectedSize int64
   290  		if i+1 < info.NumChunks {
   291  			expectedSize = ChunkSize
   292  		} else {
   293  			if info.NumChunks == 1 {
   294  				expectedSize = fileSize
   295  			} else {
   296  				expectedSize = lastChunkSize
   297  			}
   298  		}
   299  		if !util.FileExistsAndIsOfSize(chunkFilename, expectedSize) {
   300  			// Try to recover from shared store
   301  			recoveredFromShared := false
   302  			if recoverLocalLOBFilesFromSharedStore(sha) {
   303  				recoveredFromShared = util.FileExistsAndIsOfSize(chunkFilename, expectedSize)
   304  			}
   305  
   306  			if !recoveredFromShared {
   307  				if util.GlobalOptions.AutoFetchEnabled {
   308  					err = AutoFetch(sha, true)
   309  					if err != nil {
   310  						if IsNotFoundError(err) {
   311  							return info, NewNotFoundError(fmt.Sprintf("Missing chunk %d for %v & not on remote", i, sha), chunkFilename)
   312  						} else {
   313  							return info, errors.New(fmt.Sprintf("Missing chunk %d for %v & failed fetch: %v", i, sha, err.Error()))
   314  						}
   315  					}
   316  				} else {
   317  					return info, NewNotFoundError(fmt.Sprintf("Missing chunk %d for %v", i, sha), chunkFilename)
   318  				}
   319  			}
   320  		}
   321  	}
   322  	// If all was well, start reading & streaming content
   323  	for i := 0; i < info.NumChunks; i++ {
   324  		// Check each chunk file exists
   325  		chunkFilename := GetLocalLOBChunkPath(info.SHA, i)
   326  		in, err := os.OpenFile(chunkFilename, os.O_RDONLY, 0644)
   327  		if err != nil {
   328  			return info, errors.New(fmt.Sprintf("Error reading LOB file %v: %v", chunkFilename, err))
   329  		}
   330  		c, err := io.Copy(out, in)
   331  		if err != nil {
   332  			return info, errors.New(fmt.Sprintf("I/O error while copying LOB file %v, check working copy state", chunkFilename))
   333  		}
   334  		totalBytesRead += c
   335  	}
   336  
   337  	// Final check
   338  	if totalBytesRead != fileSize {
   339  		err = errors.New(fmt.Sprintf("Error, file length does not match expected in LOB %v, expected %d, total size %d", sha, fileSize, totalBytesRead))
   340  		return info, err
   341  	}
   342  
   343  	util.LogDebugf("Successfully retrieved LOB %v from %d chunks, total size %v\n", sha, info.NumChunks, util.FormatSize(totalBytesRead))
   344  
   345  	return info, nil
   346  
   347  }
   348  
   349  // Link a file from shared storage into the local repo
   350  // The hard link means we only ever have one copy of the data
   351  // but it appears under each repo's git-lob folder
   352  // destFile should be a full path of shared file location
   353  func linkSharedLOBFilename(destSharedFile string) error {
   354  	// Get path relative to shared store root, then translate it to local path
   355  	relPath, err := filepath.Rel(util.GlobalOptions.SharedStore, destSharedFile)
   356  	if err != nil {
   357  		return err
   358  	}
   359  	linkPath := filepath.Join(GetLocalLOBRoot(), relPath)
   360  
   361  	// Make sure path exists since we're not using utility method to link
   362  	os.MkdirAll(filepath.Dir(linkPath), 0755)
   363  
   364  	os.Remove(linkPath)
   365  	err = CreateHardLink(destSharedFile, linkPath)
   366  	if err != nil {
   367  		return errors.New(fmt.Sprintf("Error creating hard link from %v to %v: %v", linkPath, destSharedFile, err))
   368  	}
   369  	return nil
   370  }
   371  
   372  // Store the metadata for a given sha
   373  // If it already exists and is of the right size, will do nothing
   374  func StoreLOBInfo(info *LOBInfo) error {
   375  	var root string
   376  	if IsUsingSharedStorage() {
   377  		root = GetSharedLOBRoot()
   378  	} else {
   379  		root = GetLocalLOBRoot()
   380  	}
   381  	return StoreLOBInfoInBaseDir(root, info)
   382  }
   383  
   384  // Store the metadata for a given sha in a relative path
   385  // If it already exists and is of the right size, will do nothing
   386  func StoreLOBInfoInBaseDir(basedir string, info *LOBInfo) error {
   387  	infoBytes, err := json.Marshal(info)
   388  	if err != nil {
   389  		return errors.New(fmt.Sprintf("Unable to convert LOB info to JSON: %v", err))
   390  	}
   391  	infoFilename := GetLOBMetaPathInBaseDir(basedir, info.SHA)
   392  	if !util.FileExistsAndIsOfSize(infoFilename, int64(len(infoBytes))) {
   393  		// Since all the details are derived from the SHA the only variant is chunking or incomplete writes so
   394  		// we don't need to worry about needing to update the content (it must be correct)
   395  		util.LogDebugf("Writing LOB metadata file: %v\n", infoFilename)
   396  		err = ioutil.WriteFile(infoFilename, infoBytes, 0644)
   397  		if err != nil {
   398  			return err
   399  		}
   400  	} else {
   401  		util.LogDebugf("LOB metadata file already exists & is valid: %v\n", infoFilename)
   402  	}
   403  
   404  	// This may have stored in shared storage, so link if required
   405  	if IsUsingSharedStorage() && basedir == GetSharedLOBRoot() {
   406  		return linkSharedLOBFilename(infoFilename)
   407  	} else {
   408  		return nil
   409  	}
   410  
   411  }
   412  
   413  func IsUsingSharedStorage() bool {
   414  	if util.GlobalOptions.SharedStore != "" {
   415  		// We create the folder on loading config
   416  		return util.DirExists(util.GlobalOptions.SharedStore)
   417  	}
   418  	return false
   419  }
   420  
   421  // Write the contents of fromFile to final storage with sha, checking the size
   422  // If file already exists and is of the right size, will do nothing
   423  // fromChunkFile will be moved into its final location or deleted if the data is already valid,
   424  // so the file will not exist after this call (renamed to final location or deleted), unless error
   425  func StoreLOBChunk(sha string, chunkNo int, fromChunkFile string, sz int64) error {
   426  	var root string
   427  	if IsUsingSharedStorage() {
   428  		root = GetSharedLOBRoot()
   429  	} else {
   430  		root = GetLocalLOBRoot()
   431  	}
   432  	return StoreLOBChunkInBaseDir(root, sha, chunkNo, fromChunkFile, sz)
   433  }
   434  
   435  // Write the contents of fromFile to final storage with sha, checking the size, to a relative root
   436  // If file already exists and is of the right size, will do nothing
   437  // fromChunkFile will be moved into its final location or deleted if the data is already valid,
   438  // so the file will not exist after this call (renamed to final location or deleted), unless error
   439  func StoreLOBChunkInBaseDir(basedir, sha string, chunkNo int, fromChunkFile string, sz int64) error {
   440  	destFile := GetLOBChunkPathInBaseDir(basedir, sha, chunkNo)
   441  
   442  	if !util.FileExistsAndIsOfSize(destFile, int64(sz)) {
   443  		util.LogDebugf("Saving final LOB metadata file: %v\n", destFile)
   444  		// delete any existing (incorrectly sized) file since will probably not be allowed to rename over it
   445  		// ignore any errors
   446  		os.Remove(destFile)
   447  		err := os.Rename(fromChunkFile, destFile)
   448  		if err != nil {
   449  			return err
   450  		}
   451  	} else {
   452  		util.LogDebugf("LOB chunk file already exists & is valid: %v\n", destFile)
   453  		// Remove file that would have been moved
   454  		os.Remove(fromChunkFile)
   455  	}
   456  
   457  	// This may have stored in shared storage, so link if required
   458  	if IsUsingSharedStorage() && basedir == GetSharedLOBRoot() {
   459  		return linkSharedLOBFilename(destFile)
   460  	}
   461  	return nil
   462  
   463  }
   464  
   465  // Read from a stream and calculate SHA, while also writing content to chunked content
   466  // leader is a slice of bytes that has already been read (probe for SHA)
   467  func StoreLOB(in io.Reader, leader []byte) (*LOBInfo, error) {
   468  	var root string
   469  	if IsUsingSharedStorage() {
   470  		root = GetSharedLOBRoot()
   471  	} else {
   472  		root = GetLocalLOBRoot()
   473  	}
   474  	return StoreLOBInBaseDir(root, in, leader)
   475  }
   476  
   477  // Read from a stream and calculate SHA, while also writing content to chunked content
   478  // leader is a slice of bytes that has already been read (probe for SHA)
   479  // Store underneath a specified LOB root
   480  func StoreLOBInBaseDir(basedir string, in io.Reader, leader []byte) (*LOBInfo, error) {
   481  	sha := sha1.New()
   482  	// Write chunks to temporary files, then move based on SHA filename once calculated
   483  	chunkFilenames := make([]string, 0, 5)
   484  
   485  	var outf *os.File
   486  	var err error
   487  	writeLeader := true
   488  	buf := make([]byte, BUFSIZE)
   489  	var fatalError error
   490  	var currentChunkSize int64 = 0
   491  	var totalSize int64 = 0
   492  
   493  	for {
   494  		var dataToWrite []byte
   495  
   496  		if writeLeader && len(leader) > 0 {
   497  			dataToWrite = leader
   498  			writeLeader = false
   499  		} else {
   500  			var bytesToRead int64 = BUFSIZE
   501  			if BUFSIZE+currentChunkSize > ChunkSize {
   502  				// Read less than BUFSIZE so we stick to CHUNKLIMIT
   503  				bytesToRead = ChunkSize - currentChunkSize
   504  			}
   505  			c, err := in.Read(buf[:bytesToRead])
   506  			// Write any data to SHA & output
   507  			if c > 0 {
   508  				dataToWrite = buf[:c]
   509  			} else if err != nil {
   510  				if err == io.EOF {
   511  					// End of input
   512  					outf.Close()
   513  					break
   514  				} else {
   515  					outf.Close()
   516  					fatalError = errors.New(fmt.Sprintf("I/O error reading chunk %d: %v", len(chunkFilenames), err))
   517  					break
   518  				}
   519  			}
   520  
   521  		}
   522  
   523  		// Write data
   524  		if len(dataToWrite) > 0 {
   525  			// New chunk file?
   526  			if outf == nil {
   527  				outf, err = ioutil.TempFile("", "tempchunk")
   528  				if err != nil {
   529  					fatalError = errors.New(fmt.Sprintf("Unable to create chunk %d: %v", len(chunkFilenames), err))
   530  					break
   531  				}
   532  				chunkFilenames = append(chunkFilenames, outf.Name())
   533  				currentChunkSize = 0
   534  			}
   535  			sha.Write(dataToWrite)
   536  			c, err := outf.Write(dataToWrite)
   537  			if err != nil {
   538  				fatalError = errors.New(fmt.Sprintf("I/O error writing chunk: %v wrote %d bytes of %d", err, c, len(dataToWrite)))
   539  				break
   540  			}
   541  			currentChunkSize += int64(c)
   542  			totalSize += int64(c)
   543  
   544  			// Read from incoming
   545  			// Deal with chunk limit
   546  			if currentChunkSize >= ChunkSize {
   547  				// Close this output, next iteration will create the next file
   548  				outf.Close()
   549  				outf = nil
   550  				currentChunkSize = 0
   551  			}
   552  		} else {
   553  			// No data to write
   554  			outf.Close()
   555  			break
   556  		}
   557  	}
   558  	if outf != nil {
   559  		// Close any dangling chunk
   560  		outf.Close()
   561  	}
   562  	defer func() {
   563  		// Clean up any temporaries on error or not used
   564  		for _, f := range chunkFilenames {
   565  			os.Remove(f)
   566  		}
   567  	}()
   568  
   569  	if fatalError != nil {
   570  		return nil, fatalError
   571  	}
   572  
   573  	shaStr := fmt.Sprintf("%x", string(sha.Sum(nil)))
   574  
   575  	// We *may* now move the data to LOB dir
   576  	// We won't if it already exists & is the correct size
   577  	// Construct LOBInfo & write to final location
   578  	info := &LOBInfo{SHA: shaStr, Size: totalSize, NumChunks: len(chunkFilenames)}
   579  	err = StoreLOBInfoInBaseDir(basedir, info)
   580  	if err != nil {
   581  		return nil, err
   582  	}
   583  
   584  	// Check each chunk file
   585  	for i, f := range chunkFilenames {
   586  		sz := ChunkSize
   587  		if i+1 == len(chunkFilenames) {
   588  			// Last chunk, get size
   589  			sz = currentChunkSize
   590  		}
   591  		err = StoreLOBChunkInBaseDir(basedir, shaStr, i, f, sz)
   592  		if err != nil {
   593  			return nil, err
   594  		}
   595  	}
   596  
   597  	return info, nil
   598  
   599  }
   600  
   601  // Delete all files associated with a given LOB SHA from the local store
   602  func DeleteLOB(sha string) error {
   603  	// Delete from local always (either only copy, or hard link)
   604  	return DeleteLOBInBaseDir(sha, GetLocalLOBRoot())
   605  }
   606  
   607  // Delete all files associated with a given LOB SHA from a specified root dir
   608  func DeleteLOBInBaseDir(sha, basedir string) error {
   609  
   610  	dir := getLOBSubDir(basedir, sha)
   611  	names, err := filepath.Glob(filepath.Join(dir, fmt.Sprintf("%v*", sha)))
   612  	if err != nil {
   613  		return errors.New(fmt.Sprintf("Unable to glob local files for %v: %v", sha, err))
   614  	}
   615  	for _, n := range names {
   616  		err = os.Remove(n)
   617  		if err != nil {
   618  			return errors.New(fmt.Sprintf("Unable to delete file %v: %v", n, err))
   619  		}
   620  	}
   621  
   622  	if IsUsingSharedStorage() && basedir != GetSharedLOBRoot() {
   623  		// If we're using shared storage, then also check the number of links in
   624  		// shared storage for this SHA. See PruneSharedStore for a more general
   625  		// sweep for files that don't go through DeleteLOB (e.g. repo deleted manually)
   626  		shareddir := GetSharedLOBDir(sha)
   627  		names, err := filepath.Glob(filepath.Join(shareddir, fmt.Sprintf("%v*", sha)))
   628  		if err != nil {
   629  			return errors.New(fmt.Sprintf("Unable to glob shared files for %v: %v", sha, err))
   630  		}
   631  		for _, n := range names {
   632  			links, err := GetHardLinkCount(n)
   633  			if err == nil && links == 1 {
   634  				// only 1 hard link means no other repo refers to this shared LOB
   635  				// so it's safe to delete it
   636  				err = os.Remove(n)
   637  				if err != nil {
   638  					return errors.New(fmt.Sprintf("Unable to delete file %v: %v", n, err))
   639  				}
   640  			}
   641  
   642  		}
   643  
   644  	}
   645  
   646  	return nil
   647  
   648  }
   649  
   650  // Get the local/shared storage of a LOB with a given SHA
   651  // Returns the list of files (relative to basedir) & checks for
   652  // integrity if check = true
   653  // If check = true and checkHash = true, reads all the data in the files and re-calculates
   654  // the SHA for a deep validation of content
   655  // If check = true and checkHash = false, just checks the presence & size of all files
   656  // If there are any errors the returned list may not be correct
   657  // In the rare case that a break has occurred between shared storage
   658  // and the local hardlink, this method will re-link if the shared
   659  // store has it
   660  func GetLOBFilesForSHA(sha, basedir string, check bool, checkHash bool) (files []string, size int64, _err error) {
   661  	var ret []string
   662  	info, err := getLOBInfoInBaseDir(sha, basedir)
   663  	if err != nil {
   664  		return []string{}, 0, err
   665  	}
   666  	// add meta file (relative) - already checked by GetLOBInfo above
   667  	relmeta := GetLOBMetaRelativePath(sha)
   668  	ret = append(ret, relmeta)
   669  
   670  	var shaRecalc hash.Hash
   671  	if checkHash {
   672  		shaRecalc = sha1.New()
   673  	}
   674  	lastChunkSize := info.Size - (int64(info.NumChunks-1) * ChunkSize)
   675  	for i := 0; i < info.NumChunks; i++ {
   676  		relchunk := GetLOBChunkRelativePath(sha, i)
   677  		ret = append(ret, relchunk)
   678  		if check {
   679  			abschunk := filepath.Join(basedir, relchunk)
   680  			// Check size first
   681  			var expectedSize int64
   682  			if i+1 < info.NumChunks {
   683  				expectedSize = ChunkSize
   684  			} else {
   685  				if info.NumChunks == 1 {
   686  					expectedSize = info.Size
   687  				} else {
   688  					expectedSize = lastChunkSize
   689  				}
   690  			}
   691  			if !util.FileExistsAndIsOfSize(abschunk, expectedSize) {
   692  				// Try to recover from shared store
   693  				recoveredFromShared := false
   694  				if recoverLocalLOBFilesFromSharedStore(sha) {
   695  					recoveredFromShared = util.FileExistsAndIsOfSize(abschunk, expectedSize)
   696  				}
   697  
   698  				if !recoveredFromShared {
   699  					msg := fmt.Sprintf("LOB file not found or wrong size: %v expected to be %d bytes", abschunk, expectedSize)
   700  					wrongSize := util.FileExists(abschunk)
   701  					var err error
   702  					if wrongSize {
   703  						err = NewWrongSizeError(msg, abschunk)
   704  					} else {
   705  						err = NewNotFoundError(msg, abschunk)
   706  					}
   707  					return ret, info.Size, err
   708  				}
   709  			}
   710  
   711  			// Check SHA content?
   712  			if checkHash {
   713  				f, err := os.OpenFile(abschunk, os.O_RDONLY, 0644)
   714  				if err != nil {
   715  					msg := fmt.Sprintf("Error opening LOB file %v to check SHA: %v", abschunk, err)
   716  					return ret, info.Size, errors.New(msg)
   717  				}
   718  				_, err = io.Copy(shaRecalc, f)
   719  				if err != nil {
   720  					msg := fmt.Sprintf("Error copying LOB file %v into SHA calculator: %v", abschunk, err)
   721  					return ret, info.Size, errors.New(msg)
   722  				}
   723  				f.Close()
   724  			}
   725  
   726  		}
   727  	}
   728  
   729  	if check && checkHash {
   730  		shaRecalcStr := fmt.Sprintf("%x", string(shaRecalc.Sum(nil)))
   731  		if sha != shaRecalcStr {
   732  			return ret, info.Size, NewIntegrityError([]string{sha})
   733  		}
   734  	}
   735  
   736  	return ret, info.Size, nil
   737  
   738  }
   739  
   740  // Check the integrity of the files for a given sha in the attached basedir
   741  // If checkHash = true, reads all the data in the files and re-calculates
   742  // the SHA for a deep validation of content (slower but complete)
   743  // If checkHash = false, just checks the presence & size of all files (quick & most likely correct)
   744  // Note that if basedir is the local root, will try to recover missing files from shared store
   745  func CheckLOBFilesForSHA(sha, basedir string, checkHash bool) error {
   746  	_, _, err := GetLOBFilesForSHA(sha, basedir, true, checkHash)
   747  	return err
   748  }
   749  
   750  // Check the presence & integrity of the files for a given list of shas in this repo
   751  // and return a list of those which failed the check
   752  // If checkHash = true, reads all the data in the files and re-calculates
   753  // the SHA for a deep validation of content (slower but complete)
   754  // If checkHash = false, just checks the presence & size of all files (quick & most likely correct)
   755  func GetMissingLOBs(lobshas []string, checkHash bool) []string {
   756  	localroot := GetLocalLOBRoot()
   757  	var missing []string
   758  	for _, sha := range lobshas {
   759  		err := CheckLOBFilesForSHA(sha, localroot, checkHash)
   760  		if err != nil {
   761  			// Recover from shared storage if possible
   762  			if IsUsingSharedStorage() && recoverLocalLOBFilesFromSharedStore(sha) {
   763  				// then we're OK
   764  			} else {
   765  				missing = append(missing, sha)
   766  			}
   767  		}
   768  	}
   769  	return missing
   770  }
   771  
   772  // Return whether a single LOB is missing
   773  func IsLOBMissing(sha string, checkHash bool) bool {
   774  	localroot := GetLocalLOBRoot()
   775  	err := CheckLOBFilesForSHA(sha, localroot, checkHash)
   776  	if err != nil {
   777  		// Recover from shared storage if possible
   778  		if IsUsingSharedStorage() && recoverLocalLOBFilesFromSharedStore(sha) {
   779  			// then we're OK
   780  		} else {
   781  			return true
   782  		}
   783  	}
   784  
   785  	return false
   786  }
   787  
   788  // Get the correct size of a given chunk
   789  func getLOBExpectedChunkSize(info *LOBInfo, chunkIdx int) int64 {
   790  	if chunkIdx+1 < info.NumChunks {
   791  		return ChunkSize
   792  	} else {
   793  		if info.NumChunks == 1 {
   794  			return info.Size
   795  		} else {
   796  			return info.Size - (int64(info.NumChunks-1) * ChunkSize)
   797  		}
   798  	}
   799  
   800  }
   801  
   802  // returns whether the local store has any binaries in it
   803  func IsLocalLOBStoreEmpty() bool {
   804  	root := GetLocalLOBRoot()
   805  	rootf, err := os.Open(root)
   806  	if err != nil {
   807  		return true
   808  	}
   809  	defer rootf.Close()
   810  	// Max 3 entries
   811  	dirs, err := rootf.Readdirnames(3)
   812  	if err != nil {
   813  		return true
   814  	}
   815  	// Will be no entries if this is new
   816  	return len(dirs) == 0
   817  }
   818  
   819  // Generates a diff between the contents of 2 LOBs
   820  // Automatically copes with chunking, the diff is one file across the entire content
   821  // Returns the size of the compressed delta
   822  func GenerateLOBDelta(basesha, targetsha string, out io.Writer) (int64, error) {
   823  	return GenerateLOBDeltaInBaseDir(GetLocalLOBRoot(), basesha, targetsha, out)
   824  }
   825  
   826  // Applies a diff to basesha and generates a LOB which should have targetsha (will be checked, error returned if disagrees)
   827  func ApplyLOBDelta(basesha, targetsha string, delta io.Reader) error {
   828  	var root string
   829  	if IsUsingSharedStorage() {
   830  		root = GetSharedLOBRoot()
   831  	} else {
   832  		root = GetLocalLOBRoot()
   833  	}
   834  	err := ApplyLOBDeltaInBaseDir(root, basesha, targetsha, delta)
   835  	if err != nil {
   836  		// This may have stored in shared storage, so link if required
   837  		if IsUsingSharedStorage() {
   838  			recoverLocalLOBFilesFromSharedStore(targetsha)
   839  		}
   840  	}
   841  	return err
   842  }
   843  
   844  // Retrieve the entire content for all chunks of a LOB and write to 'out'
   845  func GetLOBCompleteContent(sha string, out io.Writer) error {
   846  	return GetLOBCompleteContentInBaseDir(GetLocalLOBRoot(), sha, out)
   847  }
   848  
   849  // Retrieve the entire content for all chunks of a LOB within a base root, and write to 'out'
   850  func GetLOBCompleteContentInBaseDir(basedir, sha string, out io.Writer) error {
   851  	info, err := getLOBInfoInBaseDir(sha, basedir)
   852  	if err != nil {
   853  		return err
   854  	}
   855  	var bytesread int64
   856  	for i := 0; i < info.NumChunks; i++ {
   857  		chunkfile := filepath.Join(basedir, GetLOBChunkRelativePath(sha, i))
   858  		cf, err := os.OpenFile(chunkfile, os.O_RDONLY, 0644)
   859  		if err != nil {
   860  			return err
   861  		}
   862  		defer cf.Close()
   863  		n, err := io.Copy(out, cf)
   864  		if err != nil {
   865  			return fmt.Errorf("Error while copying data from content: %v", err.Error())
   866  		}
   867  		bytesread += n
   868  	}
   869  	if bytesread != info.Size {
   870  		return fmt.Errorf("Incorrect number of bytes read for LOB - expected %d actual %d", info.Size, bytesread)
   871  	}
   872  	return nil
   873  }
   874  
   875  // Generates a diff between the contents of 2 LOBs, with a specified root storage
   876  // Automatically copes with chunking, the diff is one file across the entire content
   877  // Returns the size of the compressed delta
   878  func GenerateLOBDeltaInBaseDir(basedir, basesha, targetsha string, out io.Writer) (int64, error) {
   879  	// Read all of base file into memory to use as dictionary (pre-size from info)
   880  	baseinfo, err := getLOBInfoInBaseDir(basesha, basedir)
   881  	basebuf := bytes.NewBuffer(make([]byte, 0, baseinfo.Size))
   882  	if err != nil {
   883  		return 0, err
   884  	}
   885  	err = GetLOBCompleteContentInBaseDir(basedir, basesha, basebuf)
   886  	if err != nil {
   887  		return 0, fmt.Errorf("Error getting base file content for delta: %v", err.Error())
   888  	}
   889  	comp := bm.NewCompressor()
   890  	baseDict := &bm.Dictionary{Dict: basebuf.Bytes()}
   891  	// Use SetDictionary to set on compressor, this computes the hashes
   892  	comp.SetDictionary(baseDict)
   893  	// Set the delta buffer as the output
   894  	comp.SetWriter(out)
   895  
   896  	// Now we read all the targetsha's content and copy it into the compressor
   897  	targetinfo, err := getLOBInfoInBaseDir(targetsha, basedir)
   898  	if err != nil {
   899  		return 0, err
   900  	}
   901  	var targetbytesread int64
   902  	for i := 0; i < targetinfo.NumChunks; i++ {
   903  		chunkfile := filepath.Join(basedir, GetLOBChunkRelativePath(targetsha, i))
   904  		cf, err := os.OpenFile(chunkfile, os.O_RDONLY, 0644)
   905  		if err != nil {
   906  			return 0, err
   907  		}
   908  		defer cf.Close()
   909  		n, err := io.Copy(comp, cf)
   910  		if err != nil {
   911  			return 0, fmt.Errorf("Error while copying data from target into compressor: %v", err.Error())
   912  		}
   913  		targetbytesread += n
   914  	}
   915  	if targetbytesread != targetinfo.Size {
   916  		return 0, fmt.Errorf("Incorrect number of bytes read for target file - expected %d actual %d", targetinfo.Size, targetbytesread)
   917  	}
   918  
   919  	// Now do the actual compression
   920  	// Maybe we can improve bm later so that it does it on the fly (less memory)
   921  	err = comp.Close()
   922  	if err != nil {
   923  		return 0, fmt.Errorf("Error during compression of delta: %v", err.Error())
   924  	}
   925  	// This has been written to out now so we're done
   926  
   927  	return int64(comp.CompressedSize()), nil
   928  }
   929  
   930  // Applies a diff to basesha and generates a LOB, with a specified root storage,
   931  // which should have targetsha (will be checked, error returned if disagrees)
   932  func ApplyLOBDeltaInBaseDir(basedir, basesha, targetsha string, delta io.Reader) error {
   933  	// Read all of base file into memory to use as dictionary (pre-size from info)
   934  	baseinfo, err := getLOBInfoInBaseDir(basesha, basedir)
   935  	basebuf := bytes.NewBuffer(make([]byte, 0, baseinfo.Size))
   936  	if err != nil {
   937  		return err
   938  	}
   939  	err = GetLOBCompleteContentInBaseDir(basedir, basesha, basebuf)
   940  	if err != nil {
   941  		return fmt.Errorf("Error getting base file content for delta: %v", err.Error())
   942  	}
   943  
   944  	exp := bm.NewExpander(delta, basebuf.Bytes())
   945  
   946  	// output result to temp file
   947  	outf, err := ioutil.TempFile("", fmt.Sprintf("tempdelta%v_%v", basesha, targetsha))
   948  	if err != nil {
   949  		return fmt.Errorf("Error opening temp file for writing: %v\n", err)
   950  	}
   951  	defer outf.Close()
   952  	defer os.Remove(outf.Name()) // always remove temp file if not moved
   953  
   954  	// bm.Expander claims to support io.Reader but it doesn't
   955  	// so can't use the same io.Copy approach we use for Compressor
   956  	// Probably because it doesn't currently implement the buffering required to support arbitrary Read() calls
   957  	// Do it all in memory right now - we should probably enhance bm to make this more efficient
   958  	// Reading the code, the input to Expand is the current slice if you want it
   959  	// good to pre-allocate some space here, estimate same size as base
   960  	outbytes, err := exp.Expand(make([]byte, 0, basebuf.Len()))
   961  	if err != nil {
   962  		return fmt.Errorf("Error applying LOB delta: %v", err)
   963  	}
   964  	// Check the SHA
   965  	shacalc := sha1.New()
   966  	shacalc.Write(outbytes)
   967  	testsha := fmt.Sprintf("%x", string(shacalc.Sum(nil)))
   968  	if testsha != targetsha {
   969  		return fmt.Errorf("Integrity error applying delta, SHA does not agree (expected: %v actual %v)", targetsha, testsha)
   970  	}
   971  	// Otherwise, we're good. Store this data
   972  	targetinfo, err := StoreLOBInBaseDir(basedir, bytes.NewReader(outbytes), nil)
   973  	if err != nil {
   974  		return fmt.Errorf("Error storing target LOB %v: %v", targetsha, err.Error())
   975  	} else if targetinfo.SHA != targetsha {
   976  		return fmt.Errorf("Integrity error saving applied delta, SHA does not agree (expected: %v actual %v)", targetsha, targetinfo)
   977  	}
   978  
   979  	return nil
   980  }
   981  
   982  // Record of a LOB delta (calculated but still to be done)
   983  type LOBDelta struct {
   984  	BaseSHA, TargetSHA string
   985  	DeltaSize          int64
   986  	// Optional already present delta filename, can be blank
   987  	DeltaFilename string
   988  }