github.com/atlassian/git-lob@v0.0.0-20150806085256-2386a5ed291a/providers/smart/smart.go (about)

     1  package smart
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"io/ioutil"
     8  	"net/url"
     9  	"os"
    10  	"path/filepath"
    11  	"strconv"
    12  	"strings"
    13  
    14  	"github.com/atlassian/git-lob/providers"
    15  	"github.com/atlassian/git-lob/util"
    16  )
    17  
    18  // The smart sync provider implements everything the standard SyncProvider does, but in addition
    19  // provides methods to exchange binary deltas rather than entire files (as chunks).
    20  // It can operate in 2 modes; 'persistent' mode where a connection is re-used for many requests
    21  // (only possible with options like SSH), or 'transient' mode where all requests & responses are
    22  // separate round-trips (e.g. REST). The Transport interface provides the abstraction required
    23  // for that.
    24  type SmartSyncProviderImpl struct {
    25  	// The remote we're working with right now (for cached info)
    26  	remoteName string
    27  	// The parsed url we're using
    28  	serverUrl *url.URL
    29  
    30  	// The transport which is providing the underlying operations
    31  	transport Transport
    32  	// capabilities which the server has indicated it supports
    33  	serverCaps []string
    34  	// capabilities which are enabled
    35  	enabledCaps []string
    36  }
    37  
    38  // See doc/smart_protocol.md for protocol definition
    39  
    40  func (*SmartSyncProviderImpl) TypeID() string {
    41  	return "smart"
    42  }
    43  
    44  func (*SmartSyncProviderImpl) HelpTextSummary() string {
    45  	return `smart: communicates with a git-lob compatible server to exchange binaries`
    46  }
    47  
    48  func (*SmartSyncProviderImpl) HelpTextDetail() string {
    49  	return `The "smart" provider transfers files by talking to service hosted on
    50  the remote binary store which can communicate using a git-lob protocol. Many
    51  transports are supportable so long as client and server can establish comms. 
    52  The reference implementation git-lob-server supports communicating over SSH.
    53  
    54  The smart provider is capable of optimising uploads and downloads by exchanging
    55  binary deltas with the server. Smart servers can also implement other features
    56  like proxy caching.
    57  
    58  Required parameters in remote section of .gitconfig:
    59      git-lob-url    URL which can be used to establish a connection
    60                     (SSH URLs only for now - more options in future)
    61  
    62  Example configuration:
    63      [remote "origin"]
    64          url = git@blah.com/your/usual/git/repo
    65          git-lob-provider = smart
    66          git-lob-url = me@someserver.com/path/to/binary/store
    67  
    68  When uploading & downloading, to avoid partially written files when interrupted
    69  a temporary file is created first, then moved to the final location on 
    70  completion. While we clean up files on error and exit, if forcibly interrupted
    71  temporary files may remain; these are called 'tempupload*' and 'tempdownload*'
    72  in the target file structure and can be safely deleted if older than 24h.
    73  `
    74  }
    75  
    76  func (self *SmartSyncProviderImpl) ValidateConfig(remoteName string) error {
    77  	return self.retrieveUrl(remoteName)
    78  }
    79  
    80  func (self *SmartSyncProviderImpl) Release() {
    81  	if self.transport != nil {
    82  		self.transport.Release()
    83  		self.transport = nil
    84  	}
    85  	self.serverCaps = nil
    86  	self.serverUrl = nil
    87  	self.remoteName = ""
    88  }
    89  
    90  func (self *SmartSyncProviderImpl) retrieveUrl(remoteName string) error {
    91  	urlsetting := fmt.Sprintf("remote.%v.git-lob-url", remoteName)
    92  	urlstr := util.GlobalOptions.GitConfig[urlsetting]
    93  	if urlstr == "" {
    94  		return fmt.Errorf("Configuration invalid for 'smart', missing setting %v", urlsetting)
    95  	}
    96  	// Check URL is valid
    97  	u, err := url.Parse(urlstr)
    98  	if err != nil {
    99  		return fmt.Errorf("Invalid git-lob-url setting '%v': %v", urlstr, err.Error())
   100  	}
   101  	self.serverUrl = u
   102  	return nil
   103  }
   104  
   105  // Internal method to make sure we've established a connection
   106  // we re-use connections where possible (TODO disconnection issues?)
   107  func (self *SmartSyncProviderImpl) connect(remoteName string) error {
   108  	if remoteName != self.remoteName || self.transport == nil {
   109  		if self.transport != nil {
   110  			self.transport.Release()
   111  			self.transport = nil
   112  		}
   113  		self.serverCaps = nil
   114  		self.enabledCaps = nil
   115  		if self.serverUrl == nil {
   116  			err := self.retrieveUrl(remoteName)
   117  			if err != nil {
   118  				return err
   119  			}
   120  		}
   121  		// use serverURL to establish transport
   122  		tf := GetTransportFactory(self.serverUrl)
   123  		if tf == nil {
   124  			return fmt.Errorf("Unsupported URL: %v", self.serverUrl)
   125  		}
   126  		var err error
   127  		self.transport, err = tf.Connect(self.serverUrl)
   128  		if err != nil {
   129  			return err
   130  		}
   131  		self.remoteName = remoteName
   132  
   133  		err = self.determineCaps()
   134  		if err != nil {
   135  			return err
   136  		}
   137  	}
   138  	return nil
   139  }
   140  
   141  // Negotiate with the server to determine capabilities
   142  func (self *SmartSyncProviderImpl) determineCaps() error {
   143  	var err error
   144  	self.serverCaps, err = self.transport.QueryCaps()
   145  	if err != nil {
   146  		return err
   147  	}
   148  	// Always enable deltas if available
   149  	self.enabledCaps = nil
   150  	for _, c := range self.serverCaps {
   151  		if c == "binary_delta" {
   152  			self.enabledCaps = append(self.enabledCaps, c)
   153  		}
   154  		// nothing else for now
   155  	}
   156  	err = self.transport.SetEnabledCaps(self.enabledCaps)
   157  	if err != nil {
   158  		return err
   159  	}
   160  
   161  	return nil
   162  }
   163  
   164  // This is the file-based upload (i.e. a meta or a chunk) so no deltas here
   165  // Client will use delta alts if it wants
   166  func (self *SmartSyncProviderImpl) Upload(remoteName string, filenames []string, fromDir string,
   167  	force bool, callback providers.SyncProgressCallback) error {
   168  
   169  	err := self.connect(remoteName)
   170  	if err != nil {
   171  		return err
   172  	}
   173  
   174  	var errorList []string
   175  	for _, filename := range filenames {
   176  		// Allow aborting
   177  		newerrs, abort := self.uploadSingleFile(remoteName, filename, fromDir, force, callback)
   178  		errorList = append(errorList, newerrs...)
   179  		if abort {
   180  			break
   181  		}
   182  	}
   183  
   184  	if len(errorList) > 0 {
   185  		return errors.New(strings.Join(errorList, "\n"))
   186  	}
   187  
   188  	return nil
   189  }
   190  
   191  // This is the file-based download (i.e. a meta or a chunk) so no deltas here
   192  // Client will use delta alts if it wants
   193  func (self *SmartSyncProviderImpl) Download(remoteName string, filenames []string, toDir string,
   194  	force bool, callback providers.SyncProgressCallback) error {
   195  
   196  	err := self.connect(remoteName)
   197  	if err != nil {
   198  		return err
   199  	}
   200  
   201  	var errorList []string
   202  	for _, filename := range filenames {
   203  		// Allow aborting
   204  		newerrs, abort := self.downloadSingleFile(remoteName, filename, toDir, force, callback)
   205  		errorList = append(errorList, newerrs...)
   206  		if abort {
   207  			break
   208  		}
   209  	}
   210  
   211  	if len(errorList) > 0 {
   212  		return errors.New(strings.Join(errorList, "\n"))
   213  	}
   214  
   215  	return nil
   216  }
   217  
   218  func (self *SmartSyncProviderImpl) parseFilename(filename string) (sha string, ischunk bool, chunk int) {
   219  	parts := strings.FieldsFunc(filename, func(r rune) bool {
   220  		switch r {
   221  		case '/', '_':
   222  			return true
   223  		}
   224  		return false
   225  	})
   226  	if len(parts) < 2 {
   227  		// Invalid
   228  		return "", false, 0
   229  	}
   230  	// last part will be 'meta' or a number
   231  	suffix := parts[len(parts)-1]
   232  	// second to last will be sha
   233  	thesha := parts[len(parts)-2]
   234  	if suffix == "meta" {
   235  		return thesha, false, 0
   236  	} else {
   237  		c, _ := strconv.ParseInt(suffix, 10, 32)
   238  		return thesha, true, int(c)
   239  	}
   240  }
   241  
   242  func (self *SmartSyncProviderImpl) FileExists(remoteName, filename string) bool {
   243  	err := self.connect(remoteName)
   244  	if err != nil {
   245  		return false
   246  	}
   247  
   248  	sha, ischunk, chunk := self.parseFilename(filename)
   249  	var exists bool
   250  	if ischunk {
   251  		exists, _, _ = self.transport.ChunkExists(sha, chunk)
   252  	} else {
   253  		exists, _, _ = self.transport.MetadataExists(sha)
   254  		return exists
   255  	}
   256  	return exists
   257  }
   258  func (self *SmartSyncProviderImpl) FileExistsAndIsOfSize(remoteName, filename string, sz int64) bool {
   259  	err := self.connect(remoteName)
   260  	if err != nil {
   261  		return false
   262  	}
   263  	sha, ischunk, chunk := self.parseFilename(filename)
   264  	var exists bool
   265  	if ischunk {
   266  		exists, _ = self.transport.ChunkExistsAndIsOfSize(sha, chunk, sz)
   267  	} else {
   268  		// Never check size for meta
   269  		exists, _, _ = self.transport.MetadataExists(sha)
   270  	}
   271  	return exists
   272  }
   273  
   274  func (self *SmartSyncProviderImpl) downloadSingleFile(remoteName, filename, toDir string,
   275  	force bool, callback providers.SyncProgressCallback) (errorList []string, abort bool) {
   276  
   277  	sha, ischunk, chunk := self.parseFilename(filename)
   278  	var exists bool
   279  	var sz int64
   280  	if ischunk {
   281  		exists, sz, _ = self.transport.ChunkExists(sha, chunk)
   282  	} else {
   283  		exists, sz, _ = self.transport.MetadataExists(sha)
   284  	}
   285  	if !exists {
   286  		if callback != nil {
   287  			if callback(filename, util.ProgressNotFound, 0, 0) {
   288  				return errorList, true
   289  			}
   290  		}
   291  		// Note how we don't add an error to the returned error list
   292  		// As per provider docs, we simply tell callback it happened & treat it
   293  		// as a skipped item otherwise, since caller can only request files & not know
   294  		// if they're on the remote or not
   295  		// Keep going with other files
   296  		return errorList, false
   297  	}
   298  
   299  	destfilename := filepath.Join(toDir, filename)
   300  	if !force {
   301  		// Check existence & size before downloading
   302  		if destfi, err := os.Stat(destfilename); err == nil {
   303  			// File exists locally, check the size
   304  			if destfi.Size() == sz {
   305  				// File already present and correct size, skip
   306  				if callback != nil {
   307  					if callback(filename, util.ProgressSkip, sz, sz) {
   308  						return errorList, true
   309  					}
   310  				}
   311  				return errorList, false
   312  			}
   313  		}
   314  	}
   315  
   316  	// Make sure dest dir exists
   317  	parentDir := filepath.Dir(destfilename)
   318  	err := os.MkdirAll(parentDir, 0755)
   319  	if err != nil {
   320  		msg := fmt.Sprintf("Unable to create dir %v: %v", parentDir, err)
   321  		errorList = append(errorList, msg)
   322  		return errorList, false
   323  	}
   324  	// Create a temporary file to copy, avoid issues with interruptions
   325  	// Note this isn't a valid thing to do in security conscious cases but this isn't one
   326  	// by opening the file we will get a unique temp file name (albeit a predictable one)
   327  	outf, err := ioutil.TempFile(parentDir, "tempdownload")
   328  	if err != nil {
   329  		msg := fmt.Sprintf("Unable to create temp file for download in %v: %v", parentDir, err)
   330  		errorList = append(errorList, msg)
   331  		return errorList, false
   332  	}
   333  	tmpfilename := outf.Name()
   334  	// This is safe to do even though we manually close & rename because both calls are no-ops if we succeed
   335  	defer func() {
   336  		outf.Close()
   337  		os.Remove(tmpfilename)
   338  	}()
   339  	var abortAfterThisFile bool
   340  	completecallbackdone := false
   341  	localcallback := func(bytesDone, totalBytes int64) {
   342  		if callback != nil {
   343  			if bytesDone == totalBytes {
   344  				completecallbackdone = true
   345  			}
   346  			if callback(filename, util.ProgressTransferBytes, bytesDone, totalBytes) {
   347  				// Can't abort in the middle of a transfer with smart protocol
   348  				abortAfterThisFile = true
   349  			}
   350  		}
   351  	}
   352  	// Initial callback
   353  	if callback != nil {
   354  		if callback(filename, util.ProgressTransferBytes, 0, sz) {
   355  			return errorList, true
   356  		}
   357  	}
   358  	if ischunk {
   359  		err = self.transport.DownloadChunk(sha, chunk, outf, localcallback)
   360  	} else {
   361  		err = self.transport.DownloadMetadata(sha, outf)
   362  	}
   363  	outf.Close()
   364  	if err != nil {
   365  		os.Remove(tmpfilename)
   366  		msg := fmt.Sprintf("Problem while downloading %v from %v: %v", filename, remoteName, err)
   367  		errorList = append(errorList, msg)
   368  		return errorList, abortAfterThisFile
   369  	}
   370  	// Make sure we do completion
   371  	if callback != nil && !completecallbackdone {
   372  		if callback(filename, util.ProgressTransferBytes, sz, sz) {
   373  			return errorList, true
   374  		}
   375  	}
   376  	// Move to correct location - remove before to deal with force or bad size cases
   377  	os.Remove(destfilename)
   378  	os.Rename(tmpfilename, destfilename)
   379  	return errorList, abortAfterThisFile
   380  }
   381  
   382  func (self *SmartSyncProviderImpl) uploadSingleFile(remoteName, filename, fromDir string,
   383  	force bool, callback providers.SyncProgressCallback) (errorList []string, abort bool) {
   384  
   385  	// Check to see if the file is already there, right size
   386  	srcfilename := filepath.Join(fromDir, filename)
   387  	srcfi, err := os.Stat(srcfilename)
   388  	if err != nil {
   389  		if callback != nil {
   390  			if callback(filename, util.ProgressNotFound, 0, 0) {
   391  				return errorList, true
   392  			}
   393  		}
   394  		msg := fmt.Sprintf("Unable to stat %v: %v", srcfilename, err)
   395  		errorList = append(errorList, msg)
   396  		// Keep going with other files
   397  		return errorList, false
   398  	}
   399  
   400  	if !force {
   401  		// Check existence & size before uploading
   402  		if self.FileExistsAndIsOfSize(remoteName, filename, srcfi.Size()) {
   403  			// File already present and correct size, skip
   404  			if callback != nil {
   405  				if callback(filename, util.ProgressSkip, srcfi.Size(), srcfi.Size()) {
   406  					return errorList, true
   407  				}
   408  			}
   409  			return errorList, false
   410  		}
   411  	}
   412  
   413  	sha, ischunk, chunk := self.parseFilename(filename)
   414  
   415  	// Initial callback
   416  	if callback != nil {
   417  		if callback(filename, util.ProgressTransferBytes, 0, srcfi.Size()) {
   418  			return errorList, true
   419  		}
   420  	}
   421  	var abortAfterThisFile bool
   422  	completecallbackdone := false
   423  	localcallback := func(bytesDone, totalBytes int64) {
   424  		if callback != nil {
   425  			if bytesDone == totalBytes {
   426  				completecallbackdone = true
   427  			}
   428  			if callback(filename, util.ProgressTransferBytes, bytesDone, totalBytes) {
   429  				// Can't abort in the middle of a transfer with smart protocol
   430  				abortAfterThisFile = true
   431  			}
   432  		}
   433  	}
   434  	inf, err := os.OpenFile(srcfilename, os.O_RDONLY, 0644)
   435  	if err != nil {
   436  		msg := fmt.Sprintf("Unable to read input file for upload %v: %v", srcfilename, err)
   437  		errorList = append(errorList, msg)
   438  		return errorList, abortAfterThisFile
   439  	}
   440  	defer inf.Close()
   441  	if ischunk {
   442  		err = self.transport.UploadChunk(sha, chunk, srcfi.Size(), inf, localcallback)
   443  	} else {
   444  		err = self.transport.UploadMetadata(sha, srcfi.Size(), inf)
   445  	}
   446  	if err != nil {
   447  		msg := fmt.Sprintf("Problem while uploading %v to %v: %v", srcfilename, remoteName, err)
   448  		errorList = append(errorList, msg)
   449  	}
   450  	// Make sure we do completion
   451  	if callback != nil && !completecallbackdone {
   452  		if callback(filename, util.ProgressTransferBytes, srcfi.Size(), srcfi.Size()) {
   453  			return errorList, true
   454  		}
   455  	}
   456  
   457  	return errorList, abortAfterThisFile
   458  
   459  }
   460  
   461  // Whether a LOB exists in full on the remote, and gets its size
   462  func (self *SmartSyncProviderImpl) LOBExists(remoteName, sha string) (ex bool, sz int64) {
   463  	err := self.connect(remoteName)
   464  	if err != nil {
   465  		return false, 0
   466  	}
   467  
   468  	exists, sz, _ := self.transport.LOBExists(sha)
   469  	return exists, sz
   470  }
   471  
   472  func (self *SmartSyncProviderImpl) PrepareDeltaForDownload(remoteName, sha string, candidateBaseSHAs []string) (size int64, base string, e error) {
   473  	err := self.connect(remoteName)
   474  	if err != nil {
   475  		return 0, "", err
   476  	}
   477  	baseSHA, err := self.transport.GetFirstCompleteLOBFromList(candidateBaseSHAs)
   478  	if err != nil {
   479  		return 0, "", err
   480  	}
   481  	if baseSHA == "" {
   482  		// no common base
   483  		return 0, "", nil
   484  	}
   485  	sz, err := self.transport.DownloadDeltaPrepare(baseSHA, sha)
   486  	if err != nil {
   487  		return 0, baseSHA, err
   488  	}
   489  	return sz, baseSHA, nil
   490  }
   491  
   492  // Download delta of LOB content (must be applied later)
   493  func (self *SmartSyncProviderImpl) DownloadDelta(remoteName, basesha, targetsha string, out io.Writer, callback providers.SyncProgressCallback) error {
   494  	err := self.connect(remoteName)
   495  	if err != nil {
   496  		return err
   497  	}
   498  	description := fmt.Sprintf("Delta %v..%v", basesha[:7], targetsha[:7])
   499  	localcallback := func(bytesDone, totalBytes int64) {
   500  		callback(description, util.ProgressTransferBytes, bytesDone, totalBytes)
   501  	}
   502  	ok, err := self.transport.DownloadDelta(basesha, targetsha, 1024*1024*1024, out, localcallback)
   503  	if !ok {
   504  		return fmt.Errorf("Server chose not to provide a delta for %v", targetsha)
   505  	}
   506  	return err
   507  }
   508  
   509  func (self *SmartSyncProviderImpl) GetFirstCompleteLOBFromList(remoteName string, candidateSHAs []string) (string, error) {
   510  	err := self.connect(remoteName)
   511  	if err != nil {
   512  		return "", err
   513  	}
   514  	return self.transport.GetFirstCompleteLOBFromList(candidateSHAs)
   515  }
   516  
   517  // Upload delta of LOB content (must be calculated first)
   518  func (self *SmartSyncProviderImpl) UploadDelta(remoteName, basesha, targetsha string, in io.Reader, size int64, callback providers.SyncProgressCallback) error {
   519  	err := self.connect(remoteName)
   520  	if err != nil {
   521  		return err
   522  	}
   523  	description := fmt.Sprintf("Delta %v..%v", basesha[:7], targetsha[:7])
   524  	localcallback := func(bytesDone, totalBytes int64) {
   525  		callback(description, util.ProgressTransferBytes, bytesDone, totalBytes)
   526  	}
   527  	ok, err := self.transport.UploadDelta(basesha, targetsha, size, in, localcallback)
   528  	if !ok {
   529  		return fmt.Errorf("Server chose not to accept a delta for %v", targetsha)
   530  	}
   531  	return err
   532  }
   533  
   534  // Init core smart providers
   535  func InitCoreProviders() {
   536  	// SSH transport
   537  	RegisterSshTransportFactory()
   538  	// Smart sync provider is a single instance which uses the transports to figure out concrete connection
   539  	// from a URL. Only implementation right now is persistent/SSH but can have different modes (e.g. transient)
   540  	// and different underlying network protocols (e.g. REST)
   541  	providers.RegisterSyncProvider(&SmartSyncProviderImpl{})
   542  }