github.com/jbendotnet/noms@v0.0.0-20190904222105-c43e4293ea92/go/nbs/s3_table_reader.go (about) 1 // Copyright 2016 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 package nbs 6 7 import ( 8 "fmt" 9 "io" 10 "net" 11 "os" 12 "time" 13 14 "golang.org/x/sys/unix" 15 16 "github.com/attic-labs/noms/go/d" 17 "github.com/aws/aws-sdk-go/aws" 18 "github.com/aws/aws-sdk-go/service/s3" 19 "github.com/jpillora/backoff" 20 ) 21 22 const ( 23 s3RangePrefix = "bytes" 24 s3BlockSize = (1 << 10) * 512 // 512K 25 ) 26 27 type s3TableReaderAt struct { 28 s3 *s3ObjectReader 29 h addr 30 } 31 32 type s3svc interface { 33 AbortMultipartUpload(input *s3.AbortMultipartUploadInput) (*s3.AbortMultipartUploadOutput, error) 34 CreateMultipartUpload(input *s3.CreateMultipartUploadInput) (*s3.CreateMultipartUploadOutput, error) 35 UploadPart(input *s3.UploadPartInput) (*s3.UploadPartOutput, error) 36 UploadPartCopy(input *s3.UploadPartCopyInput) (*s3.UploadPartCopyOutput, error) 37 CompleteMultipartUpload(input *s3.CompleteMultipartUploadInput) (*s3.CompleteMultipartUploadOutput, error) 38 GetObject(input *s3.GetObjectInput) (*s3.GetObjectOutput, error) 39 PutObject(input *s3.PutObjectInput) (*s3.PutObjectOutput, error) 40 } 41 42 func (s3tra *s3TableReaderAt) ReadAtWithStats(p []byte, off int64, stats *Stats) (n int, err error) { 43 return s3tra.s3.ReadAt(s3tra.h, p, off, stats) 44 } 45 46 // TODO: Bring all the multipart upload and remote-conjoin stuff over here and make this a better analogue to ddbTableStore 47 type s3ObjectReader struct { 48 s3 s3svc 49 bucket string 50 readRl chan struct{} 51 tc tableCache 52 } 53 54 func (s3or *s3ObjectReader) ReadAt(name addr, p []byte, off int64, stats *Stats) (n int, err error) { 55 t1 := time.Now() 56 57 if s3or.tc != nil { 58 r := s3or.tc.checkout(name) 59 if r != nil { 60 defer func() { 61 stats.FileBytesPerRead.Sample(uint64(len(p))) 62 stats.FileReadLatency.SampleTimeSince(t1) 63 }() 64 defer s3or.tc.checkin(name) 65 return r.ReadAt(p, off) 66 } 67 } 68 69 defer func() { 70 stats.S3BytesPerRead.Sample(uint64(len(p))) 71 stats.S3ReadLatency.SampleTimeSince(t1) 72 }() 73 return s3or.readRange(name, p, s3RangeHeader(off, int64(len(p)))) 74 } 75 76 func s3RangeHeader(off, length int64) string { 77 lastByte := off + length - 1 // insanely, the HTTP range header specifies ranges inclusively. 78 return fmt.Sprintf("%s=%d-%d", s3RangePrefix, off, lastByte) 79 } 80 81 func (s3or *s3ObjectReader) ReadFromEnd(name addr, p []byte, stats *Stats) (n int, err error) { 82 // TODO: enable this to use the tableCache. The wrinkle is the tableCache currently just returns a ReaderAt, which doesn't give you the length of the object that backs it, so you can't calculate an offset if all you know is that you want the last N bytes. 83 defer func(t1 time.Time) { 84 stats.S3BytesPerRead.Sample(uint64(len(p))) 85 stats.S3ReadLatency.SampleTimeSince(t1) 86 }(time.Now()) 87 return s3or.readRange(name, p, fmt.Sprintf("%s=-%d", s3RangePrefix, len(p))) 88 } 89 90 func (s3or *s3ObjectReader) readRange(name addr, p []byte, rangeHeader string) (n int, err error) { 91 read := func() (int, error) { 92 if s3or.readRl != nil { 93 s3or.readRl <- struct{}{} 94 defer func() { 95 <-s3or.readRl 96 }() 97 } 98 99 input := &s3.GetObjectInput{ 100 Bucket: aws.String(s3or.bucket), 101 Key: aws.String(name.String()), 102 Range: aws.String(rangeHeader), 103 } 104 result, err := s3or.s3.GetObject(input) 105 d.PanicIfError(err) 106 d.PanicIfFalse(*result.ContentLength == int64(len(p))) 107 108 n, err := io.ReadFull(result.Body, p) 109 if err != nil { 110 fmt.Fprintf(os.Stderr, "Failed ranged read from S3\n%s\nerr type: %T\nerror: %v\n", input.GoString(), err, err) 111 } 112 return n, err 113 } 114 115 n, err = read() 116 // We hit the point of diminishing returns investigating #3255, so add retries. In conversations with AWS people, it's not surprising to get transient failures when talking to S3, though SDKs are intended to have their own retrying. The issue may be that, in Go, making the S3 request and reading the data are separate operations, and the SDK kind of can't do its own retrying to handle failures in the latter. 117 if isConnReset(err) { 118 // We are backing off here because its possible and likely that the rate of requests to S3 is the underlying issue. 119 b := &backoff.Backoff{ 120 Min: 128 * time.Microsecond, 121 Max: 1024 * time.Millisecond, 122 Factor: 2, 123 Jitter: true, 124 } 125 for ; isConnReset(err); n, err = read() { 126 dur := b.Duration() 127 fmt.Fprintf(os.Stderr, "Retrying S3 read in %s\n", dur.String()) 128 time.Sleep(dur) 129 } 130 } 131 return 132 } 133 134 func isConnReset(err error) bool { 135 nErr, ok := err.(*net.OpError) 136 if !ok { 137 return false 138 } 139 scErr, ok := nErr.Err.(*os.SyscallError) 140 return ok && scErr.Err == unix.ECONNRESET 141 }