github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/misc/amazon/s3/client.go (about) 1 /* 2 Copyright 2011 Google Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package s3 implements a generic Amazon S3 client, not specific 18 // to Camlistore. 19 package s3 20 21 import ( 22 "bytes" 23 "encoding/base64" 24 "encoding/hex" 25 "encoding/xml" 26 "errors" 27 "fmt" 28 "hash" 29 "io" 30 "io/ioutil" 31 "log" 32 "net/http" 33 "net/url" 34 "os" 35 "strconv" 36 "strings" 37 "time" 38 39 "camlistore.org/pkg/httputil" 40 ) 41 42 const maxList = 1000 43 44 // Client is an Amazon S3 client. 45 type Client struct { 46 *Auth 47 Transport http.RoundTripper // or nil for the default 48 } 49 50 type Bucket struct { 51 Name string 52 CreationDate string // 2006-02-03T16:45:09.000Z 53 } 54 55 func (c *Client) transport() http.RoundTripper { 56 if c.Transport != nil { 57 return c.Transport 58 } 59 return http.DefaultTransport 60 } 61 62 // bucketURL returns the URL prefix of the bucket, with trailing slash 63 func (c *Client) bucketURL(bucket string) string { 64 if IsValidBucket(bucket) && !strings.Contains(bucket, ".") { 65 return fmt.Sprintf("https://%s.%s/", bucket, c.hostname()) 66 } 67 return fmt.Sprintf("https://%s/%s/", c.hostname(), bucket) 68 } 69 70 func (c *Client) keyURL(bucket, key string) string { 71 return c.bucketURL(bucket) + key 72 } 73 74 func newReq(url_ string) *http.Request { 75 req, err := http.NewRequest("GET", url_, nil) 76 if err != nil { 77 panic(fmt.Sprintf("s3 client; invalid URL: %v", err)) 78 } 79 req.Header.Set("User-Agent", "go-camlistore-s3") 80 return req 81 } 82 83 func (c *Client) Buckets() ([]*Bucket, error) { 84 req := newReq("https://" + c.hostname() + "/") 85 c.Auth.SignRequest(req) 86 res, err := c.transport().RoundTrip(req) 87 if err != nil { 88 return nil, err 89 } 90 defer httputil.CloseBody(res.Body) 91 if res.StatusCode != http.StatusOK { 92 return nil, fmt.Errorf("s3: Unexpected status code %d fetching bucket list", res.StatusCode) 93 } 94 return parseListAllMyBuckets(res.Body) 95 } 96 97 func parseListAllMyBuckets(r io.Reader) ([]*Bucket, error) { 98 type allMyBuckets struct { 99 Buckets struct { 100 Bucket []*Bucket 101 } 102 } 103 var res allMyBuckets 104 if err := xml.NewDecoder(r).Decode(&res); err != nil { 105 return nil, err 106 } 107 return res.Buckets.Bucket, nil 108 } 109 110 // Returns 0, os.ErrNotExist if not on S3, otherwise reterr is real. 111 func (c *Client) Stat(key, bucket string) (size int64, reterr error) { 112 req := newReq(c.keyURL(bucket, key)) 113 req.Method = "HEAD" 114 c.Auth.SignRequest(req) 115 res, err := c.transport().RoundTrip(req) 116 if err != nil { 117 return 0, err 118 } 119 if res.Body != nil { 120 defer res.Body.Close() 121 } 122 switch res.StatusCode { 123 case http.StatusNotFound: 124 return 0, os.ErrNotExist 125 case http.StatusOK: 126 return strconv.ParseInt(res.Header.Get("Content-Length"), 10, 64) 127 } 128 return 0, fmt.Errorf("s3: Unexpected status code %d statting object %v", res.StatusCode, key) 129 } 130 131 func (c *Client) PutObject(key, bucket string, md5 hash.Hash, size int64, body io.Reader) error { 132 req := newReq(c.keyURL(bucket, key)) 133 req.Method = "PUT" 134 req.ContentLength = size 135 if md5 != nil { 136 b64 := new(bytes.Buffer) 137 encoder := base64.NewEncoder(base64.StdEncoding, b64) 138 encoder.Write(md5.Sum(nil)) 139 encoder.Close() 140 req.Header.Set("Content-MD5", b64.String()) 141 } 142 c.Auth.SignRequest(req) 143 req.Body = ioutil.NopCloser(body) 144 145 res, err := c.transport().RoundTrip(req) 146 if res != nil && res.Body != nil { 147 defer httputil.CloseBody(res.Body) 148 } 149 if err != nil { 150 return err 151 } 152 if res.StatusCode != http.StatusOK { 153 // res.Write(os.Stderr) 154 return fmt.Errorf("Got response code %d from s3", res.StatusCode) 155 } 156 return nil 157 } 158 159 type Item struct { 160 Key string 161 Size int64 162 } 163 164 type listBucketResults struct { 165 Contents []*Item 166 IsTruncated bool 167 MaxKeys int 168 Name string // bucket name 169 Marker string 170 } 171 172 // BucketLocation returns the S3 hostname to be used with the given bucket. 173 func (c *Client) BucketLocation(bucket string) (location string, err error) { 174 if !strings.HasSuffix(c.hostname(), "amazonaws.com") { 175 return "", errors.New("BucketLocation not implemented for non-Amazon S3 hostnames") 176 } 177 url_ := fmt.Sprintf("https://s3.amazonaws.com/%s/?location", url.QueryEscape(bucket)) 178 req := newReq(url_) 179 c.Auth.SignRequest(req) 180 res, err := c.transport().RoundTrip(req) 181 if err != nil { 182 return 183 } 184 var xres xmlLocationConstraint 185 if err := xml.NewDecoder(res.Body).Decode(&xres); err != nil { 186 return "", err 187 } 188 if xres.Location == "" { 189 return "s3.amazonaws.com", nil 190 } 191 return "s3-" + xres.Location + ".amazonaws.com", nil 192 } 193 194 // ListBucket returns 0 to maxKeys (inclusive) items from the provided 195 // bucket. Keys before startAt will be skipped. (This is the S3 196 // 'marker' value). If the length of the returned items is equal to 197 // maxKeys, there is no indication whether or not the returned list is 198 // truncated. 199 func (c *Client) ListBucket(bucket string, startAt string, maxKeys int) (items []*Item, err error) { 200 if maxKeys < 0 { 201 return nil, errors.New("invalid negative maxKeys") 202 } 203 marker := startAt 204 for len(items) < maxKeys { 205 fetchN := maxKeys - len(items) 206 if fetchN > maxList { 207 fetchN = maxList 208 } 209 var bres listBucketResults 210 211 url_ := fmt.Sprintf("%s?marker=%s&max-keys=%d", 212 c.bucketURL(bucket), url.QueryEscape(marker), fetchN) 213 214 // Try the enumerate three times, since Amazon likes to close 215 // https connections a lot, and Go sucks at dealing with it: 216 // https://code.google.com/p/go/issues/detail?id=3514 217 const maxTries = 5 218 for try := 1; try <= maxTries; try++ { 219 time.Sleep(time.Duration(try-1) * 100 * time.Millisecond) 220 req := newReq(url_) 221 c.Auth.SignRequest(req) 222 res, err := c.transport().RoundTrip(req) 223 if err != nil { 224 if try < maxTries { 225 continue 226 } 227 return nil, err 228 } 229 if res.StatusCode != http.StatusOK { 230 if res.StatusCode < 500 { 231 body, _ := ioutil.ReadAll(io.LimitReader(res.Body, 1<<20)) 232 aerr := &Error{ 233 Op: "ListBucket", 234 Code: res.StatusCode, 235 Body: body, 236 Header: res.Header, 237 } 238 aerr.parseXML() 239 res.Body.Close() 240 return nil, aerr 241 } 242 } else { 243 bres = listBucketResults{} 244 var logbuf bytes.Buffer 245 err = xml.NewDecoder(io.TeeReader(res.Body, &logbuf)).Decode(&bres) 246 if err != nil { 247 log.Printf("Error parsing s3 XML response: %v for %q", err, logbuf.Bytes()) 248 } else if bres.MaxKeys != fetchN || bres.Name != bucket || bres.Marker != marker { 249 err = fmt.Errorf("Unexpected parse from server: %#v from: %s", bres, logbuf.Bytes()) 250 log.Print(err) 251 } 252 } 253 httputil.CloseBody(res.Body) 254 if err != nil { 255 if try < maxTries-1 { 256 continue 257 } 258 log.Print(err) 259 return nil, err 260 } 261 break 262 } 263 for _, it := range bres.Contents { 264 if it.Key == marker && it.Key != startAt { 265 // Skip first dup on pages 2 and higher. 266 continue 267 } 268 if it.Key < startAt { 269 return nil, fmt.Errorf("Unexpected response from Amazon: item key %q but wanted greater than %q", it.Key, startAt) 270 } 271 items = append(items, it) 272 marker = it.Key 273 } 274 if !bres.IsTruncated { 275 // log.Printf("Not truncated. so breaking. items = %d; len Contents = %d, url = %s", len(items), len(bres.Contents), url_) 276 break 277 } 278 } 279 return items, nil 280 } 281 282 func (c *Client) Get(bucket, key string) (body io.ReadCloser, size int64, err error) { 283 req := newReq(c.keyURL(bucket, key)) 284 c.Auth.SignRequest(req) 285 var res *http.Response 286 res, err = c.transport().RoundTrip(req) 287 if err != nil { 288 return 289 } 290 if res.StatusCode != http.StatusOK && res != nil && res.Body != nil { 291 defer func() { 292 io.Copy(os.Stderr, res.Body) 293 }() 294 } 295 if res.StatusCode == http.StatusNotFound { 296 err = os.ErrNotExist 297 return 298 } 299 if res.StatusCode != http.StatusOK { 300 err = fmt.Errorf("Amazon HTTP error on GET: %d", res.StatusCode) 301 return 302 } 303 return res.Body, res.ContentLength, nil 304 } 305 306 func (c *Client) Delete(bucket, key string) error { 307 req := newReq(c.keyURL(bucket, key)) 308 req.Method = "DELETE" 309 c.Auth.SignRequest(req) 310 res, err := c.transport().RoundTrip(req) 311 if err != nil { 312 return err 313 } 314 if res != nil && res.Body != nil { 315 defer res.Body.Close() 316 } 317 if res.StatusCode == http.StatusNotFound || res.StatusCode == http.StatusNoContent || 318 res.StatusCode == http.StatusOK { 319 return nil 320 } 321 return fmt.Errorf("Amazon HTTP error on DELETE: %d", res.StatusCode) 322 } 323 324 // IsValid reports whether bucket is a valid bucket name, per Amazon's naming restrictions. 325 // 326 // See http://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html 327 func IsValidBucket(bucket string) bool { 328 l := len(bucket) 329 if l < 3 || l > 63 { 330 return false 331 } 332 333 valid := false 334 prev := byte('.') 335 for i := 0; i < len(bucket); i++ { 336 c := bucket[i] 337 switch { 338 default: 339 return false 340 case 'a' <= c && c <= 'z': 341 valid = true 342 case '0' <= c && c <= '9': 343 // Is allowed, but bucketname can't be just numbers. 344 // Therefore, don't set valid to true 345 case c == '-': 346 if prev == '.' { 347 return false 348 } 349 case c == '.': 350 if prev == '.' || prev == '-' { 351 return false 352 } 353 } 354 prev = c 355 } 356 357 if prev == '-' || prev == '.' { 358 return false 359 } 360 return valid 361 } 362 363 // Error is the type returned by some API operations. 364 // 365 // TODO: it should be more/all of them. 366 type Error struct { 367 Op string 368 Code int // HTTP status code 369 Body []byte // response body 370 Header http.Header // response headers 371 372 // UsedEndpoint and AmazonCode are the XML response's Endpoint and 373 // Code fields, respectively. 374 UseEndpoint string // if a temporary redirect (wrong hostname) 375 AmazonCode string 376 } 377 378 func (e *Error) Error() string { 379 if bytes.Contains(e.Body, []byte("<Error>")) { 380 return fmt.Sprintf("s3.%s: status %d: %s", e.Op, e.Code, e.Body) 381 } 382 return fmt.Sprintf("s3.%s: status %d", e.Op, e.Code) 383 } 384 385 func (e *Error) parseXML() { 386 var xe xmlError 387 _ = xml.NewDecoder(bytes.NewReader(e.Body)).Decode(&xe) 388 e.AmazonCode = xe.Code 389 if xe.Code == "TemporaryRedirect" { 390 e.UseEndpoint = xe.Endpoint 391 } 392 if xe.Code == "SignatureDoesNotMatch" { 393 want, _ := hex.DecodeString(strings.Replace(xe.StringToSignBytes, " ", "", -1)) 394 log.Printf("S3 SignatureDoesNotMatch. StringToSign should be %d bytes: %q (%x)", len(want), want, want) 395 } 396 397 } 398 399 // xmlError is the Error response from Amazon. 400 type xmlError struct { 401 XMLName xml.Name `xml:"Error"` 402 Code string 403 Message string 404 RequestId string 405 Bucket string 406 Endpoint string 407 StringToSignBytes string 408 } 409 410 // xmlLocationConstraint is the LocationConstraint returned from BucketLocation. 411 type xmlLocationConstraint struct { 412 XMLName xml.Name `xml:"LocationConstraint"` 413 Location string `xml:",chardata"` 414 }