github.com/atlassian/git-lob@v0.0.0-20150806085256-2386a5ed291a/providers/s3.go (about)

     1  package providers
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"io/ioutil"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  
    12  	"github.com/atlassian/git-lob/Godeps/_workspace/src/github.com/mitchellh/go-homedir"
    13  	"github.com/atlassian/git-lob/Godeps/_workspace/src/github.com/mitchellh/goamz/aws"
    14  	"github.com/atlassian/git-lob/Godeps/_workspace/src/github.com/mitchellh/goamz/s3"
    15  	"github.com/atlassian/git-lob/util"
    16  )
    17  
    18  // S3SyncProvider implements the basic SyncProvider interface for S3
    19  type S3SyncProvider struct {
    20  	S3Connection *s3.S3
    21  	Buckets      []string
    22  }
    23  
    24  func (*S3SyncProvider) TypeID() string {
    25  	return "s3"
    26  }
    27  
    28  func (*S3SyncProvider) HelpTextSummary() string {
    29  	return `s3: transfers binaries to/from an S3 bucket`
    30  }
    31  
    32  func (*S3SyncProvider) HelpTextDetail() string {
    33  	return `The "s3" provider synchronises files with a bucket on Amazon's S3 cloud storage
    34  
    35  Required parameters in remote section of .gitconfig:
    36      git-lob-s3-bucket   The bucket to use as the root remote store. Must already exist.
    37  
    38  Optional parameters in the remote section:
    39      git-lob-s3-region   The AWS region to use. If not specified will use region settings
    40                          from your ~/.aws/config. If no region is specified, uses US East.
    41      git-lob-s3-profile  The profile to use to authenticate for this remote. Can also 
    42                          be set in other ways, see global settings below.
    43  
    44  Example configuration:
    45      [remote "origin"]
    46          url = git@blah.com/your/usual/git/repo
    47          git-lob-provider = s3
    48          git-lob-s3-bucket = my.binary.bucket
    49  
    50  Global AWS settings:
    51  
    52    Authentication is performed using the same configuration you'd use with the
    53    command line AWS tools. Settings are read in this order:
    54  
    55    1. Environment variables i.e. AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY
    56    2. Credentials file in ~/.aws/credentials or %USERPROFILE%\.aws\credentials
    57  
    58    If using the credentials file, the [default] profile is used unless
    59    you specify otherwise. You can specify what profile to use several ways:
    60      1. In .git/config, remote.REMOTE.git-lob-s3-profile 
    61      2. git-lob.s3-profile in repo or global gitconfig
    62      3. AWS_PROFILE in your environment.
    63  
    64    Region settings are also read from your config file in ~/.aws/config.
    65    See:
    66    http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html
    67    for more details on the configuration process.
    68  `
    69  }
    70  
    71  const S3BufferSize = 131072
    72  
    73  // Configure the profile to use for a given remote. Preferences in order:
    74  // Git setting remote.REMOTENAME.git-lob-s3-profile
    75  // Git setting git-lob.s3-profile
    76  // AWS_PROFILE environment
    77  func (self *S3SyncProvider) configureProfile(remoteName string) {
    78  	// check whether git-lob-s3-profile has been specified; if so override local environment
    79  	// so s3 library will pick it up
    80  	// this allows per-repo credential profiles which is useful
    81  	profilesetting := fmt.Sprintf("remote.%v.git-lob-s3-profile", remoteName)
    82  	profile := strings.TrimSpace(util.GlobalOptions.GitConfig[profilesetting])
    83  	if profile == "" {
    84  		profilesetting = "git-lob.s3-profile"
    85  		profile = strings.TrimSpace(util.GlobalOptions.GitConfig[profilesetting])
    86  	}
    87  	if profile != "" {
    88  		// If we've retrieved the setting from our git config,
    89  		// set it in the environment so S3 lib will use it
    90  		os.Setenv("AWS_PROFILE", profile)
    91  	}
    92  }
    93  
    94  // get auth from the environment or config files
    95  func (self *S3SyncProvider) getAuth() (aws.Auth, error) {
    96  	auth, err := aws.EnvAuth()
    97  	if err != nil {
    98  		auth, err = aws.SharedAuth()
    99  		if err != nil {
   100  			return aws.Auth{}, errors.New("Unable to locate AWS authentication settings in environment or credentials file")
   101  		}
   102  	}
   103  	return auth, nil
   104  }
   105  
   106  // get region from the environment or config files
   107  func (self *S3SyncProvider) getRegion() (aws.Region, error) {
   108  	regstr := os.Getenv("AWS_DEFAULT_REGION")
   109  	if regstr == "" {
   110  		// Look for config file
   111  		profile := os.Getenv("AWS_PROFILE")
   112  		if profile == "" {
   113  			profile = "default"
   114  		}
   115  
   116  		cfgFile := os.Getenv("AWS_CONFIG_FILE")
   117  		if cfgFile == "" {
   118  			home, herr := homedir.Dir()
   119  			if herr == nil {
   120  				cfgFile = filepath.Join(home, ".aws", "config")
   121  			}
   122  		}
   123  		if cfgFile != "" {
   124  			configmap, err := util.ReadConfigFile(cfgFile)
   125  			if err == nil {
   126  				regstr = configmap[fmt.Sprintf("%v.region", profile)]
   127  			}
   128  		}
   129  	}
   130  	if regstr != "" {
   131  		reg, ok := aws.Regions[regstr]
   132  		if ok {
   133  			return reg, nil
   134  		}
   135  	}
   136  	// default
   137  	return aws.USEast, nil
   138  }
   139  func (self *S3SyncProvider) initS3() error {
   140  	// Get auth - try environment first
   141  	auth, err := self.getAuth()
   142  	if err != nil {
   143  		return err
   144  	}
   145  	region, err := self.getRegion()
   146  	if err != nil {
   147  		return err
   148  	}
   149  	self.S3Connection = s3.New(auth, region)
   150  
   151  	// Read bucket list right now since we have no way to probe whether a bucket exists
   152  	self.S3Connection.ListBuckets()
   153  
   154  	return nil
   155  }
   156  func (self *S3SyncProvider) getS3Connection() (*s3.S3, error) {
   157  	if self.S3Connection == nil {
   158  		err := self.initS3()
   159  		if err != nil {
   160  			return nil, err
   161  		}
   162  	}
   163  	return self.S3Connection, nil
   164  }
   165  
   166  func (self *S3SyncProvider) Release() {
   167  	self.S3Connection = nil
   168  	self.Buckets = nil
   169  }
   170  
   171  func (self *S3SyncProvider) getBucketName(remoteName string) (string, error) {
   172  	bucketsetting := fmt.Sprintf("remote.%v.git-lob-s3-bucket", remoteName)
   173  	bucket := strings.TrimSpace(util.GlobalOptions.GitConfig[bucketsetting])
   174  	if bucket == "" {
   175  		return "", fmt.Errorf("Configuration invalid for 'filesystem', missing setting %v", bucketsetting)
   176  	}
   177  	return bucket, nil
   178  }
   179  func (self *S3SyncProvider) getBucket(remoteName string) (*s3.Bucket, error) {
   180  	bucketname, err := self.getBucketName(remoteName)
   181  	if err != nil {
   182  		return nil, err
   183  	}
   184  	conn, err := self.getS3Connection()
   185  	if err != nil {
   186  		return nil, err
   187  	}
   188  	// Make sure we configure the correct profile for access to bucket
   189  	self.configureProfile(remoteName)
   190  	return conn.Bucket(bucketname), nil
   191  }
   192  
   193  func (self *S3SyncProvider) ValidateConfig(remoteName string) error {
   194  	_, err := self.getBucketName(remoteName)
   195  	if err != nil {
   196  		return err
   197  	}
   198  	return nil
   199  }
   200  
   201  func (self *S3SyncProvider) FileExists(remoteName, filename string) bool {
   202  	bucket, err := self.getBucket(remoteName)
   203  	if err != nil {
   204  		return false
   205  	}
   206  	key, err := bucket.GetKey(filename)
   207  	return err == nil && key != nil
   208  }
   209  func (self *S3SyncProvider) FileExistsAndIsOfSize(remoteName, filename string, sz int64) bool {
   210  	bucket, err := self.getBucket(remoteName)
   211  	if err != nil {
   212  		return false
   213  	}
   214  	key, err := bucket.GetKey(filename)
   215  	return err == nil && key != nil && key.Size == sz
   216  }
   217  
   218  func (*S3SyncProvider) uploadSingleFile(remoteName, filename, fromDir string, destBucket *s3.Bucket,
   219  	force bool, callback SyncProgressCallback) (errorList []string, abort bool) {
   220  	// Check to see if the file is already there, right size
   221  	srcfilename := filepath.Join(fromDir, filename)
   222  	srcfi, err := os.Stat(srcfilename)
   223  	if err != nil {
   224  		if callback != nil {
   225  			if callback(filename, util.ProgressNotFound, 0, 0) {
   226  				return errorList, true
   227  			}
   228  		}
   229  		msg := fmt.Sprintf("Unable to stat %v: %v", srcfilename, err)
   230  		errorList = append(errorList, msg)
   231  		// Keep going with other files
   232  		return errorList, false
   233  	}
   234  
   235  	if !force {
   236  		// Check if already there before uploading
   237  		if key, err := destBucket.GetKey(filename); key != nil && err == nil {
   238  			// File exists on remote, check the size
   239  			if key.Size == srcfi.Size() {
   240  				// File already present and correct size, skip
   241  				if callback != nil {
   242  					if callback(filename, util.ProgressSkip, srcfi.Size(), srcfi.Size()) {
   243  						return errorList, true
   244  					}
   245  				}
   246  				return errorList, false
   247  			}
   248  
   249  		}
   250  	}
   251  
   252  	// We don't need to create a temporary file on S3 to deal with interrupted uploads, because
   253  	// the file is not fully created in the bucket until fully uploaded
   254  	inf, err := os.OpenFile(srcfilename, os.O_RDONLY, 0644)
   255  	if err != nil {
   256  		msg := fmt.Sprintf("Unable to read input file for upload %v: %v", srcfilename, err)
   257  		errorList = append(errorList, msg)
   258  		return errorList, false
   259  	}
   260  	defer inf.Close()
   261  
   262  	// Initial callback
   263  	if callback != nil {
   264  		if callback(filename, util.ProgressTransferBytes, 0, srcfi.Size()) {
   265  			return errorList, true
   266  		}
   267  	}
   268  
   269  	// Create a Reader which reports progress as it is read from
   270  	progressReader := NewSyncProgressReader(inf, filename, srcfi.Size(), callback)
   271  	// Note default ACL
   272  	err = destBucket.PutReader(filename, progressReader, srcfi.Size(), "binary/octet-stream", "")
   273  	if err != nil {
   274  		errorList = append(errorList, fmt.Sprintf("Problem while uploading %v to %v: %v", filename, remoteName, err))
   275  	}
   276  
   277  	return errorList, progressReader.Aborted
   278  
   279  }
   280  
   281  func (self *S3SyncProvider) Upload(remoteName string, filenames []string, fromDir string,
   282  	force bool, callback SyncProgressCallback) error {
   283  
   284  	bucket, err := self.getBucket(remoteName)
   285  	if err != nil {
   286  		return err
   287  	}
   288  
   289  	util.LogDebug("Uploading to S3 bucket", bucket.Name)
   290  
   291  	// Check bucket exists (via HEAD endpoint)
   292  	// This saves us failing on every file
   293  	_, err = bucket.Head("/")
   294  	if err != nil {
   295  		return fmt.Errorf("Unable to access S3 bucket '%v' for remote '%v': %v", bucket.Name, err.Error())
   296  	}
   297  
   298  	var errorList []string
   299  	for _, filename := range filenames {
   300  		// Allow aborting
   301  		newerrs, abort := self.uploadSingleFile(remoteName, filename, fromDir, bucket, force, callback)
   302  		errorList = append(errorList, newerrs...)
   303  		if abort {
   304  			break
   305  		}
   306  	}
   307  
   308  	if len(errorList) > 0 {
   309  		return errors.New(strings.Join(errorList, "\n"))
   310  	}
   311  
   312  	return nil
   313  }
   314  
   315  func (*S3SyncProvider) downloadSingleFile(remoteName, filename string, bucket *s3.Bucket, toDir string,
   316  	force bool, callback SyncProgressCallback) (errorList []string, abort bool) {
   317  
   318  	// Query for existence & size first; we need the size either way to report d/l progress
   319  	key, err := bucket.GetKey(filename)
   320  	if err != nil {
   321  		// File missing on remote
   322  		if callback != nil {
   323  			if callback(filename, util.ProgressNotFound, 0, 0) {
   324  				return errorList, true
   325  			}
   326  		}
   327  		// Note how we don't add an error to the returned error list
   328  		// As per provider docs, we simply tell callback it happened & treat it
   329  		// as a skipped item otherwise, since caller can only request files & not know
   330  		// if they're on the remote or not
   331  		// Keep going with other files
   332  		return errorList, false
   333  	}
   334  
   335  	// Check to see if the file is already there, right size
   336  	destfilename := filepath.Join(toDir, filename)
   337  	if !force {
   338  		if destfi, err := os.Stat(destfilename); err == nil {
   339  			// File exists locally, check the size
   340  			if destfi.Size() == key.Size {
   341  				// File already present and correct size, skip
   342  				if callback != nil {
   343  					if callback(filename, util.ProgressSkip, destfi.Size(), destfi.Size()) {
   344  						return errorList, true
   345  					}
   346  				}
   347  				return errorList, false
   348  			}
   349  		}
   350  	}
   351  
   352  	// Make sure dest dir exists
   353  	parentDir := filepath.Dir(destfilename)
   354  	err = os.MkdirAll(parentDir, 0755)
   355  	if err != nil {
   356  		msg := fmt.Sprintf("Unable to create dir %v: %v", parentDir, err)
   357  		errorList = append(errorList, msg)
   358  		return errorList, false
   359  	}
   360  	// Create a temporary file to download, avoid issues with interruptions
   361  	// Note this isn't a valid thing to do in security conscious cases but this isn't one
   362  	// by opening the file we will get a unique temp file name (albeit a predictable one)
   363  	outf, err := ioutil.TempFile(parentDir, "tempdownload")
   364  	if err != nil {
   365  		msg := fmt.Sprintf("Unable to create temp file for download in %v: %v", parentDir, err)
   366  		errorList = append(errorList, msg)
   367  		return errorList, false
   368  	}
   369  	tmpfilename := outf.Name()
   370  	// This is safe to do even though we manually close & rename because both calls are no-ops if we succeed
   371  	defer func() {
   372  		outf.Close()
   373  		os.Remove(tmpfilename)
   374  	}()
   375  
   376  	inf, err := bucket.GetReader(filename)
   377  	if err != nil {
   378  		msg := fmt.Sprintf("Unable to read file %v from S3 bucket %v for download: %v", filename, bucket.Name, err)
   379  		errorList = append(errorList, msg)
   380  		return errorList, false
   381  	}
   382  	defer inf.Close()
   383  
   384  	// Initial callback
   385  	if callback != nil {
   386  		if callback(filename, util.ProgressTransferBytes, 0, key.Size) {
   387  			return errorList, true
   388  		}
   389  	}
   390  	var copysize int64 = 0
   391  	for {
   392  		var n int64
   393  		n, err = io.CopyN(outf, inf, S3BufferSize)
   394  		copysize += n
   395  		if n > 0 && callback != nil && key.Size > 0 {
   396  			if callback(filename, util.ProgressTransferBytes, copysize, key.Size) {
   397  				return errorList, true
   398  			}
   399  		}
   400  		if err != nil {
   401  			break
   402  		}
   403  	}
   404  	outf.Close()
   405  	inf.Close()
   406  	if copysize != key.Size {
   407  		os.Remove(tmpfilename)
   408  		var msg string
   409  		if err != nil {
   410  			msg = fmt.Sprintf("Problem while downloading %v from S3 bucket %v: %v", filename, bucket.Name, err)
   411  		} else {
   412  			msg = fmt.Sprintf("Download error: number of bytes read from S3 bucket %v in download of %v does not agree (%d/%d)",
   413  				bucket.Name, filename, copysize, key.Size)
   414  		}
   415  		errorList = append(errorList, msg)
   416  		return errorList, false
   417  	}
   418  	// Otherwise, file data is ok on remote
   419  	// Move to correct location - remove before to deal with force or bad size cases
   420  	os.Remove(destfilename)
   421  	os.Rename(tmpfilename, destfilename)
   422  	return errorList, false
   423  
   424  }
   425  
   426  func (self *S3SyncProvider) Download(remoteName string, filenames []string, toDir string, force bool, callback SyncProgressCallback) error {
   427  
   428  	bucket, err := self.getBucket(remoteName)
   429  	if err != nil {
   430  		return err
   431  	}
   432  
   433  	util.LogDebug("Downloading from S3 bucket", bucket.Name)
   434  
   435  	// Check bucket exists (via HEAD endpoint)
   436  	// This saves us failing on every file
   437  	_, err = bucket.Head("/")
   438  	if err != nil {
   439  		return fmt.Errorf("Unable to access S3 bucket '%v' for remote '%v': %v", bucket.Name, err.Error())
   440  	}
   441  
   442  	var errorList []string
   443  	for _, filename := range filenames {
   444  		// Allow aborting
   445  		newerrs, abort := self.downloadSingleFile(remoteName, filename, bucket, toDir, force, callback)
   446  		errorList = append(errorList, newerrs...)
   447  		if abort {
   448  			break
   449  		}
   450  	}
   451  
   452  	if len(errorList) > 0 {
   453  		return errors.New(strings.Join(errorList, "\n"))
   454  	}
   455  
   456  	return nil
   457  }