github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/s3_table_reader.go (about) 1 // Copyright 2019-2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package nbs 23 24 import ( 25 "context" 26 "fmt" 27 "io" 28 "net" 29 "os" 30 "strconv" 31 "strings" 32 "sync/atomic" 33 "syscall" 34 "time" 35 36 "github.com/aws/aws-sdk-go/aws" 37 "github.com/aws/aws-sdk-go/service/s3" 38 "github.com/aws/aws-sdk-go/service/s3/s3iface" 39 "github.com/jpillora/backoff" 40 "golang.org/x/sync/errgroup" 41 42 "github.com/dolthub/dolt/go/store/hash" 43 ) 44 45 const ( 46 s3RangePrefix = "bytes" 47 s3BlockSize = (1 << 10) * 512 // 512K 48 ) 49 50 type s3TableReaderAt struct { 51 s3 *s3ObjectReader 52 h hash.Hash 53 } 54 55 func (s3tra *s3TableReaderAt) Close() error { 56 return nil 57 } 58 59 func (s3tra *s3TableReaderAt) clone() (tableReaderAt, error) { 60 return s3tra, nil 61 } 62 63 func (s3tra *s3TableReaderAt) Reader(ctx context.Context) (io.ReadCloser, error) { 64 return s3tra.s3.Reader(ctx, s3tra.h) 65 } 66 67 func (s3tra *s3TableReaderAt) ReadAtWithStats(ctx context.Context, p []byte, off int64, stats *Stats) (n int, err error) { 68 return s3tra.s3.ReadAt(ctx, s3tra.h, p, off, stats) 69 } 70 71 // TODO: Bring all the multipart upload and remote-conjoin stuff over here and make this a better analogue to ddbTableStore 72 type s3ObjectReader struct { 73 s3 s3iface.S3API 74 bucket string 75 readRl chan struct{} 76 ns string 77 } 78 79 func (s3or *s3ObjectReader) key(k string) string { 80 if s3or.ns != "" { 81 return s3or.ns + "/" + k 82 } 83 return k 84 } 85 86 func (s3or *s3ObjectReader) Reader(ctx context.Context, name hash.Hash) (io.ReadCloser, error) { 87 return s3or.reader(ctx, name) 88 } 89 90 func (s3or *s3ObjectReader) ReadAt(ctx context.Context, name hash.Hash, p []byte, off int64, stats *Stats) (n int, err error) { 91 t1 := time.Now() 92 93 defer func() { 94 stats.S3BytesPerRead.Sample(uint64(len(p))) 95 stats.S3ReadLatency.SampleTimeSince(t1) 96 }() 97 98 n, _, err = s3or.readRange(ctx, name, p, s3RangeHeader(off, int64(len(p)))) 99 return 100 } 101 102 func s3RangeHeader(off, length int64) string { 103 lastByte := off + length - 1 // insanely, the HTTP range header specifies ranges inclusively. 104 return fmt.Sprintf("%s=%d-%d", s3RangePrefix, off, lastByte) 105 } 106 107 const maxS3ReadFromEndReqSize = 256 * 1024 * 1024 // 256MB 108 const preferredS3ReadFromEndReqSize = 128 * 1024 * 1024 // 128MB 109 110 func (s3or *s3ObjectReader) ReadFromEnd(ctx context.Context, name hash.Hash, p []byte, stats *Stats) (n int, sz uint64, err error) { 111 defer func(t1 time.Time) { 112 stats.S3BytesPerRead.Sample(uint64(len(p))) 113 stats.S3ReadLatency.SampleTimeSince(t1) 114 }(time.Now()) 115 totalN := uint64(0) 116 if len(p) > maxS3ReadFromEndReqSize { 117 // If we're bigger than 256MB, parallelize the read... 118 // Read the footer first and capture the size of the entire table file. 119 n, sz, err := s3or.readRange(ctx, name, p[len(p)-footerSize:], fmt.Sprintf("%s=-%d", s3RangePrefix, footerSize)) 120 if err != nil { 121 return n, sz, err 122 } 123 totalN += uint64(n) 124 eg, egctx := errgroup.WithContext(ctx) 125 start := 0 126 for start < len(p)-footerSize { 127 // Make parallel read requests of up to 128MB. 128 end := start + preferredS3ReadFromEndReqSize 129 if end > len(p)-footerSize { 130 end = len(p) - footerSize 131 } 132 bs := p[start:end] 133 rangeStart := sz - uint64(len(p)) + uint64(start) 134 rangeEnd := sz - uint64(len(p)) + uint64(end) - 1 135 eg.Go(func() error { 136 n, _, err := s3or.readRange(egctx, name, bs, fmt.Sprintf("%s=%d-%d", s3RangePrefix, rangeStart, rangeEnd)) 137 if err != nil { 138 return err 139 } 140 atomic.AddUint64(&totalN, uint64(n)) 141 return nil 142 }) 143 start = end 144 } 145 err = eg.Wait() 146 if err != nil { 147 return 0, 0, err 148 } 149 return int(totalN), sz, nil 150 } 151 return s3or.readRange(ctx, name, p, fmt.Sprintf("%s=-%d", s3RangePrefix, len(p))) 152 } 153 154 func (s3or *s3ObjectReader) reader(ctx context.Context, name hash.Hash) (io.ReadCloser, error) { 155 input := &s3.GetObjectInput{ 156 Bucket: aws.String(s3or.bucket), 157 Key: aws.String(s3or.key(name.String())), 158 } 159 result, err := s3or.s3.GetObjectWithContext(ctx, input) 160 if err != nil { 161 return nil, err 162 } 163 return result.Body, nil 164 } 165 166 func (s3or *s3ObjectReader) readRange(ctx context.Context, name hash.Hash, p []byte, rangeHeader string) (n int, sz uint64, err error) { 167 read := func() (int, uint64, error) { 168 if s3or.readRl != nil { 169 s3or.readRl <- struct{}{} 170 defer func() { 171 <-s3or.readRl 172 }() 173 } 174 175 input := &s3.GetObjectInput{ 176 Bucket: aws.String(s3or.bucket), 177 Key: aws.String(s3or.key(name.String())), 178 Range: aws.String(rangeHeader), 179 } 180 181 result, err := s3or.s3.GetObjectWithContext(ctx, input) 182 if err != nil { 183 return 0, 0, err 184 } 185 defer result.Body.Close() 186 187 if *result.ContentLength != int64(len(p)) { 188 return 0, 0, fmt.Errorf("failed to read entire range, key: %v, len(p): %d, rangeHeader: %s, ContentLength: %d", s3or.key(name.String()), len(p), rangeHeader, *result.ContentLength) 189 } 190 191 sz := uint64(0) 192 if result.ContentRange != nil { 193 i := strings.Index(*result.ContentRange, "/") 194 if i != -1 { 195 sz, err = strconv.ParseUint((*result.ContentRange)[i+1:], 10, 64) 196 if err != nil { 197 return 0, 0, err 198 } 199 } 200 } 201 n, err = io.ReadFull(result.Body, p) 202 return n, sz, err 203 } 204 205 n, sz, err = read() 206 // We hit the point of diminishing returns investigating #3255, so add retries. In conversations with AWS people, it's not surprising to get transient failures when talking to S3, though SDKs are intended to have their own retrying. The issue may be that, in Go, making the S3 request and reading the data are separate operations, and the SDK kind of can't do its own retrying to handle failures in the latter. 207 if isConnReset(err) { 208 // We are backing off here because its possible and likely that the rate of requests to S3 is the underlying issue. 209 b := &backoff.Backoff{ 210 Min: 128 * time.Microsecond, 211 Max: 1024 * time.Millisecond, 212 Factor: 2, 213 Jitter: true, 214 } 215 for ; isConnReset(err); n, sz, err = read() { 216 dur := b.Duration() 217 time.Sleep(dur) 218 } 219 } 220 221 return n, sz, err 222 } 223 224 func isConnReset(err error) bool { 225 nErr, ok := err.(*net.OpError) 226 if !ok { 227 return false 228 } 229 scErr, ok := nErr.Err.(*os.SyscallError) 230 return ok && scErr.Err == syscall.ECONNRESET 231 }