github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/misc/amazon/s3/client.go (about)

     1  /*
     2  Copyright 2011 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package s3 implements a generic Amazon S3 client, not specific
    18  // to Camlistore.
    19  package s3
    20  
    21  import (
    22  	"bytes"
    23  	"encoding/base64"
    24  	"encoding/hex"
    25  	"encoding/xml"
    26  	"errors"
    27  	"fmt"
    28  	"hash"
    29  	"io"
    30  	"io/ioutil"
    31  	"log"
    32  	"net/http"
    33  	"net/url"
    34  	"os"
    35  	"strconv"
    36  	"strings"
    37  	"time"
    38  
    39  	"camlistore.org/pkg/httputil"
    40  )
    41  
    42  const maxList = 1000
    43  
    44  // Client is an Amazon S3 client.
    45  type Client struct {
    46  	*Auth
    47  	Transport http.RoundTripper // or nil for the default
    48  }
    49  
    50  type Bucket struct {
    51  	Name         string
    52  	CreationDate string // 2006-02-03T16:45:09.000Z
    53  }
    54  
    55  func (c *Client) transport() http.RoundTripper {
    56  	if c.Transport != nil {
    57  		return c.Transport
    58  	}
    59  	return http.DefaultTransport
    60  }
    61  
    62  // bucketURL returns the URL prefix of the bucket, with trailing slash
    63  func (c *Client) bucketURL(bucket string) string {
    64  	if IsValidBucket(bucket) && !strings.Contains(bucket, ".") {
    65  		return fmt.Sprintf("https://%s.%s/", bucket, c.hostname())
    66  	}
    67  	return fmt.Sprintf("https://%s/%s/", c.hostname(), bucket)
    68  }
    69  
    70  func (c *Client) keyURL(bucket, key string) string {
    71  	return c.bucketURL(bucket) + key
    72  }
    73  
    74  func newReq(url_ string) *http.Request {
    75  	req, err := http.NewRequest("GET", url_, nil)
    76  	if err != nil {
    77  		panic(fmt.Sprintf("s3 client; invalid URL: %v", err))
    78  	}
    79  	req.Header.Set("User-Agent", "go-camlistore-s3")
    80  	return req
    81  }
    82  
    83  func (c *Client) Buckets() ([]*Bucket, error) {
    84  	req := newReq("https://" + c.hostname() + "/")
    85  	c.Auth.SignRequest(req)
    86  	res, err := c.transport().RoundTrip(req)
    87  	if err != nil {
    88  		return nil, err
    89  	}
    90  	defer httputil.CloseBody(res.Body)
    91  	if res.StatusCode != http.StatusOK {
    92  		return nil, fmt.Errorf("s3: Unexpected status code %d fetching bucket list", res.StatusCode)
    93  	}
    94  	return parseListAllMyBuckets(res.Body)
    95  }
    96  
    97  func parseListAllMyBuckets(r io.Reader) ([]*Bucket, error) {
    98  	type allMyBuckets struct {
    99  		Buckets struct {
   100  			Bucket []*Bucket
   101  		}
   102  	}
   103  	var res allMyBuckets
   104  	if err := xml.NewDecoder(r).Decode(&res); err != nil {
   105  		return nil, err
   106  	}
   107  	return res.Buckets.Bucket, nil
   108  }
   109  
   110  // Returns 0, os.ErrNotExist if not on S3, otherwise reterr is real.
   111  func (c *Client) Stat(key, bucket string) (size int64, reterr error) {
   112  	req := newReq(c.keyURL(bucket, key))
   113  	req.Method = "HEAD"
   114  	c.Auth.SignRequest(req)
   115  	res, err := c.transport().RoundTrip(req)
   116  	if err != nil {
   117  		return 0, err
   118  	}
   119  	if res.Body != nil {
   120  		defer res.Body.Close()
   121  	}
   122  	switch res.StatusCode {
   123  	case http.StatusNotFound:
   124  		return 0, os.ErrNotExist
   125  	case http.StatusOK:
   126  		return strconv.ParseInt(res.Header.Get("Content-Length"), 10, 64)
   127  	}
   128  	return 0, fmt.Errorf("s3: Unexpected status code %d statting object %v", res.StatusCode, key)
   129  }
   130  
   131  func (c *Client) PutObject(key, bucket string, md5 hash.Hash, size int64, body io.Reader) error {
   132  	req := newReq(c.keyURL(bucket, key))
   133  	req.Method = "PUT"
   134  	req.ContentLength = size
   135  	if md5 != nil {
   136  		b64 := new(bytes.Buffer)
   137  		encoder := base64.NewEncoder(base64.StdEncoding, b64)
   138  		encoder.Write(md5.Sum(nil))
   139  		encoder.Close()
   140  		req.Header.Set("Content-MD5", b64.String())
   141  	}
   142  	c.Auth.SignRequest(req)
   143  	req.Body = ioutil.NopCloser(body)
   144  
   145  	res, err := c.transport().RoundTrip(req)
   146  	if res != nil && res.Body != nil {
   147  		defer httputil.CloseBody(res.Body)
   148  	}
   149  	if err != nil {
   150  		return err
   151  	}
   152  	if res.StatusCode != http.StatusOK {
   153  		// res.Write(os.Stderr)
   154  		return fmt.Errorf("Got response code %d from s3", res.StatusCode)
   155  	}
   156  	return nil
   157  }
   158  
   159  type Item struct {
   160  	Key  string
   161  	Size int64
   162  }
   163  
   164  type listBucketResults struct {
   165  	Contents    []*Item
   166  	IsTruncated bool
   167  	MaxKeys     int
   168  	Name        string // bucket name
   169  	Marker      string
   170  }
   171  
   172  // BucketLocation returns the S3 hostname to be used with the given bucket.
   173  func (c *Client) BucketLocation(bucket string) (location string, err error) {
   174  	if !strings.HasSuffix(c.hostname(), "amazonaws.com") {
   175  		return "", errors.New("BucketLocation not implemented for non-Amazon S3 hostnames")
   176  	}
   177  	url_ := fmt.Sprintf("https://s3.amazonaws.com/%s/?location", url.QueryEscape(bucket))
   178  	req := newReq(url_)
   179  	c.Auth.SignRequest(req)
   180  	res, err := c.transport().RoundTrip(req)
   181  	if err != nil {
   182  		return
   183  	}
   184  	var xres xmlLocationConstraint
   185  	if err := xml.NewDecoder(res.Body).Decode(&xres); err != nil {
   186  		return "", err
   187  	}
   188  	if xres.Location == "" {
   189  		return "s3.amazonaws.com", nil
   190  	}
   191  	return "s3-" + xres.Location + ".amazonaws.com", nil
   192  }
   193  
   194  // ListBucket returns 0 to maxKeys (inclusive) items from the provided
   195  // bucket. Keys before startAt will be skipped. (This is the S3
   196  // 'marker' value). If the length of the returned items is equal to
   197  // maxKeys, there is no indication whether or not the returned list is
   198  // truncated.
   199  func (c *Client) ListBucket(bucket string, startAt string, maxKeys int) (items []*Item, err error) {
   200  	if maxKeys < 0 {
   201  		return nil, errors.New("invalid negative maxKeys")
   202  	}
   203  	marker := startAt
   204  	for len(items) < maxKeys {
   205  		fetchN := maxKeys - len(items)
   206  		if fetchN > maxList {
   207  			fetchN = maxList
   208  		}
   209  		var bres listBucketResults
   210  
   211  		url_ := fmt.Sprintf("%s?marker=%s&max-keys=%d",
   212  			c.bucketURL(bucket), url.QueryEscape(marker), fetchN)
   213  
   214  		// Try the enumerate three times, since Amazon likes to close
   215  		// https connections a lot, and Go sucks at dealing with it:
   216  		// https://code.google.com/p/go/issues/detail?id=3514
   217  		const maxTries = 5
   218  		for try := 1; try <= maxTries; try++ {
   219  			time.Sleep(time.Duration(try-1) * 100 * time.Millisecond)
   220  			req := newReq(url_)
   221  			c.Auth.SignRequest(req)
   222  			res, err := c.transport().RoundTrip(req)
   223  			if err != nil {
   224  				if try < maxTries {
   225  					continue
   226  				}
   227  				return nil, err
   228  			}
   229  			if res.StatusCode != http.StatusOK {
   230  				if res.StatusCode < 500 {
   231  					body, _ := ioutil.ReadAll(io.LimitReader(res.Body, 1<<20))
   232  					aerr := &Error{
   233  						Op:     "ListBucket",
   234  						Code:   res.StatusCode,
   235  						Body:   body,
   236  						Header: res.Header,
   237  					}
   238  					aerr.parseXML()
   239  					res.Body.Close()
   240  					return nil, aerr
   241  				}
   242  			} else {
   243  				bres = listBucketResults{}
   244  				var logbuf bytes.Buffer
   245  				err = xml.NewDecoder(io.TeeReader(res.Body, &logbuf)).Decode(&bres)
   246  				if err != nil {
   247  					log.Printf("Error parsing s3 XML response: %v for %q", err, logbuf.Bytes())
   248  				} else if bres.MaxKeys != fetchN || bres.Name != bucket || bres.Marker != marker {
   249  					err = fmt.Errorf("Unexpected parse from server: %#v from: %s", bres, logbuf.Bytes())
   250  					log.Print(err)
   251  				}
   252  			}
   253  			httputil.CloseBody(res.Body)
   254  			if err != nil {
   255  				if try < maxTries-1 {
   256  					continue
   257  				}
   258  				log.Print(err)
   259  				return nil, err
   260  			}
   261  			break
   262  		}
   263  		for _, it := range bres.Contents {
   264  			if it.Key == marker && it.Key != startAt {
   265  				// Skip first dup on pages 2 and higher.
   266  				continue
   267  			}
   268  			if it.Key < startAt {
   269  				return nil, fmt.Errorf("Unexpected response from Amazon: item key %q but wanted greater than %q", it.Key, startAt)
   270  			}
   271  			items = append(items, it)
   272  			marker = it.Key
   273  		}
   274  		if !bres.IsTruncated {
   275  			// log.Printf("Not truncated. so breaking. items = %d; len Contents = %d, url = %s", len(items), len(bres.Contents), url_)
   276  			break
   277  		}
   278  	}
   279  	return items, nil
   280  }
   281  
   282  func (c *Client) Get(bucket, key string) (body io.ReadCloser, size int64, err error) {
   283  	req := newReq(c.keyURL(bucket, key))
   284  	c.Auth.SignRequest(req)
   285  	var res *http.Response
   286  	res, err = c.transport().RoundTrip(req)
   287  	if err != nil {
   288  		return
   289  	}
   290  	if res.StatusCode != http.StatusOK && res != nil && res.Body != nil {
   291  		defer func() {
   292  			io.Copy(os.Stderr, res.Body)
   293  		}()
   294  	}
   295  	if res.StatusCode == http.StatusNotFound {
   296  		err = os.ErrNotExist
   297  		return
   298  	}
   299  	if res.StatusCode != http.StatusOK {
   300  		err = fmt.Errorf("Amazon HTTP error on GET: %d", res.StatusCode)
   301  		return
   302  	}
   303  	return res.Body, res.ContentLength, nil
   304  }
   305  
   306  func (c *Client) Delete(bucket, key string) error {
   307  	req := newReq(c.keyURL(bucket, key))
   308  	req.Method = "DELETE"
   309  	c.Auth.SignRequest(req)
   310  	res, err := c.transport().RoundTrip(req)
   311  	if err != nil {
   312  		return err
   313  	}
   314  	if res != nil && res.Body != nil {
   315  		defer res.Body.Close()
   316  	}
   317  	if res.StatusCode == http.StatusNotFound || res.StatusCode == http.StatusNoContent ||
   318  		res.StatusCode == http.StatusOK {
   319  		return nil
   320  	}
   321  	return fmt.Errorf("Amazon HTTP error on DELETE: %d", res.StatusCode)
   322  }
   323  
   324  // IsValid reports whether bucket is a valid bucket name, per Amazon's naming restrictions.
   325  //
   326  // See http://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html
   327  func IsValidBucket(bucket string) bool {
   328  	l := len(bucket)
   329  	if l < 3 || l > 63 {
   330  		return false
   331  	}
   332  
   333  	valid := false
   334  	prev := byte('.')
   335  	for i := 0; i < len(bucket); i++ {
   336  		c := bucket[i]
   337  		switch {
   338  		default:
   339  			return false
   340  		case 'a' <= c && c <= 'z':
   341  			valid = true
   342  		case '0' <= c && c <= '9':
   343  			// Is allowed, but bucketname can't be just numbers.
   344  			// Therefore, don't set valid to true
   345  		case c == '-':
   346  			if prev == '.' {
   347  				return false
   348  			}
   349  		case c == '.':
   350  			if prev == '.' || prev == '-' {
   351  				return false
   352  			}
   353  		}
   354  		prev = c
   355  	}
   356  
   357  	if prev == '-' || prev == '.' {
   358  		return false
   359  	}
   360  	return valid
   361  }
   362  
   363  // Error is the type returned by some API operations.
   364  //
   365  // TODO: it should be more/all of them.
   366  type Error struct {
   367  	Op     string
   368  	Code   int         // HTTP status code
   369  	Body   []byte      // response body
   370  	Header http.Header // response headers
   371  
   372  	// UsedEndpoint and AmazonCode are the XML response's Endpoint and
   373  	// Code fields, respectively.
   374  	UseEndpoint string // if a temporary redirect (wrong hostname)
   375  	AmazonCode  string
   376  }
   377  
   378  func (e *Error) Error() string {
   379  	if bytes.Contains(e.Body, []byte("<Error>")) {
   380  		return fmt.Sprintf("s3.%s: status %d: %s", e.Op, e.Code, e.Body)
   381  	}
   382  	return fmt.Sprintf("s3.%s: status %d", e.Op, e.Code)
   383  }
   384  
   385  func (e *Error) parseXML() {
   386  	var xe xmlError
   387  	_ = xml.NewDecoder(bytes.NewReader(e.Body)).Decode(&xe)
   388  	e.AmazonCode = xe.Code
   389  	if xe.Code == "TemporaryRedirect" {
   390  		e.UseEndpoint = xe.Endpoint
   391  	}
   392  	if xe.Code == "SignatureDoesNotMatch" {
   393  		want, _ := hex.DecodeString(strings.Replace(xe.StringToSignBytes, " ", "", -1))
   394  		log.Printf("S3 SignatureDoesNotMatch. StringToSign should be %d bytes: %q (%x)", len(want), want, want)
   395  	}
   396  
   397  }
   398  
   399  // xmlError is the Error response from Amazon.
   400  type xmlError struct {
   401  	XMLName           xml.Name `xml:"Error"`
   402  	Code              string
   403  	Message           string
   404  	RequestId         string
   405  	Bucket            string
   406  	Endpoint          string
   407  	StringToSignBytes string
   408  }
   409  
   410  // xmlLocationConstraint is the LocationConstraint returned from BucketLocation.
   411  type xmlLocationConstraint struct {
   412  	XMLName  xml.Name `xml:"LocationConstraint"`
   413  	Location string   `xml:",chardata"`
   414  }