github.com/release-engineering/exodus-rsync@v1.11.2/internal/gw/client.go (about) 1 package gw 2 3 import ( 4 "bytes" 5 "context" 6 "crypto/tls" 7 "encoding/json" 8 "fmt" 9 "io" 10 "net/http" 11 "os" 12 "runtime" 13 "sync" 14 "time" 15 16 "github.com/PuerkitoBio/rehttp" 17 "github.com/aws/aws-sdk-go/aws" 18 "github.com/aws/aws-sdk-go/aws/awserr" 19 "github.com/aws/aws-sdk-go/aws/credentials" 20 "github.com/aws/aws-sdk-go/aws/session" 21 "github.com/aws/aws-sdk-go/service/s3" 22 "github.com/aws/aws-sdk-go/service/s3/s3manager" 23 "github.com/release-engineering/exodus-rsync/internal/conf" 24 "github.com/release-engineering/exodus-rsync/internal/log" 25 "github.com/release-engineering/exodus-rsync/internal/walk" 26 ) 27 28 func logConnectionOpen(ctx context.Context, url string) { 29 log.FromContext(ctx).F("url", url).Info("Initializing connection") 30 } 31 32 func logConnectionClose(ctx context.Context, url string) { 33 log.FromContext(ctx).F("url", url).Info("Closing connection") 34 } 35 36 type client struct { 37 cfg conf.Config 38 httpClient *http.Client 39 s3 *s3.S3 40 uploader *s3manager.Uploader 41 dryRun bool 42 } 43 44 func (c *client) doJSONRequest(ctx context.Context, method string, url string, body interface{}, target interface{}, headers map[string][]string) error { 45 var bodyReader io.Reader 46 if body == nil { 47 bodyReader = nil 48 } else { 49 buf := bytes.Buffer{} 50 enc := json.NewEncoder(&buf) 51 if err := enc.Encode(body); err != nil { 52 return fmt.Errorf("encoding request body: %w", err) 53 } 54 bodyReader = &buf 55 } 56 57 fullURL := c.cfg.GwURL() + url 58 req, err := http.NewRequestWithContext(ctx, method, fullURL, bodyReader) 59 60 if err != nil { 61 return fmt.Errorf("preparing request to %s: %w", fullURL, err) 62 } 63 64 req.Header["Accept"] = []string{"application/json"} 65 req.Header["Content-Type"] = []string{"application/json"} 66 // Adding provided headers after setting Accept and Content-Type 67 // headers allows caller to overwrite them if necessary. 68 for key, value := range headers { 69 req.Header[key] = value 70 } 71 72 logConnectionOpen(ctx, fullURL) 73 defer logConnectionClose(ctx, fullURL) 74 75 resp, err := c.httpClient.Do(req) 76 if err != nil { 77 return err 78 } 79 80 defer resp.Body.Close() 81 82 if resp.StatusCode < 200 || resp.StatusCode > 299 { 83 byteSlice, err := io.ReadAll(io.LimitReader(resp.Body, 2000)) 84 if err != nil { 85 log.FromContext(ctx).F("error", err).Debugf( 86 "No body in response for '%s %s'", req.Method, req.URL, 87 ) 88 } else if len(byteSlice) > 0 { 89 return fmt.Errorf("%s %s: %s, %s", req.Method, req.URL, resp.Status, byteSlice) 90 } 91 return fmt.Errorf("%s %s: %s", req.Method, req.URL, resp.Status) 92 } 93 94 dec := json.NewDecoder(resp.Body) 95 err = dec.Decode(target) 96 if err != nil { 97 return fmt.Errorf("%s %s: %w", req.Method, req.URL, err) 98 } 99 100 return nil 101 } 102 103 func (c *client) WhoAmI(ctx context.Context) (map[string]interface{}, error) { 104 out := make(map[string]interface{}) 105 err := c.doJSONRequest(ctx, "GET", "/whoami", nil, &out, nil) 106 return out, err 107 } 108 109 func (c *client) haveBlob(ctx context.Context, item walk.SyncItem) (bool, error) { 110 logger := log.FromContext(ctx) 111 112 fullURL := c.s3.Endpoint + "/" + c.cfg.GwEnv() + "/" + item.Key 113 logConnectionOpen(ctx, fullURL) 114 defer logConnectionClose(ctx, fullURL) 115 116 _, err := c.s3.HeadObject(&s3.HeadObjectInput{ 117 Bucket: aws.String(c.cfg.GwEnv()), 118 Key: aws.String(item.Key), 119 }) 120 121 if err == nil { 122 logger.F("key", item.Key).Info("Skipping upload, blob is present") 123 return true, nil 124 } 125 126 awsErr, isAwsErr := err.(awserr.Error) 127 128 if isAwsErr && awsErr.Code() == "NotFound" { 129 // Fine, object doesn't exist yet 130 logger.F("key", item.Key).Debug("blob is not present") 131 return false, nil 132 } 133 134 // Anything else is unusual 135 logger.F("error", err, "key", item.Key).Warn("S3 HEAD unexpected error") 136 137 return false, err 138 } 139 140 func (c *client) uploadBlob(ctx context.Context, item walk.SyncItem) error { 141 logger := log.FromContext(ctx) 142 143 var err error 144 145 defer logger.F("src", item.SrcPath, "key", item.Key).Trace("Uploading").Stop(&err) 146 147 if c.dryRun { 148 return nil 149 } 150 151 file, err := os.Open(item.SrcPath) 152 if err != nil { 153 return err 154 } 155 defer file.Close() 156 157 fullURL := c.s3.Endpoint + "/" + c.cfg.GwEnv() + "/" + item.Key 158 logConnectionOpen(ctx, fullURL) 159 defer logConnectionClose(ctx, fullURL) 160 161 res, err := c.uploader.UploadWithContext(ctx, &s3manager.UploadInput{ 162 Bucket: aws.String(c.cfg.GwEnv()), 163 Key: &item.Key, 164 Body: file, 165 }) 166 167 if err != nil { 168 return fmt.Errorf("upload %s: %w", item.SrcPath, err) 169 } 170 171 logger.F("location", res.Location).Debug("uploaded blob") 172 173 return nil 174 } 175 176 type uploadState int 177 178 const ( 179 uploaded = iota // uploaded successfully 180 present // skipped because already present 181 duplicate // skipped because it's handled by another item in the same publish 182 failed // tried to upload and failed 183 ) 184 185 type uploadResult struct { 186 State uploadState 187 Error error 188 Item walk.SyncItem 189 } 190 191 func (c *client) uploadWorker( 192 ctx context.Context, 193 items <-chan walk.SyncItem, 194 results chan<- uploadResult, 195 wg *sync.WaitGroup, 196 workerID int, 197 ) { 198 defer wg.Done() 199 200 for item := range items { 201 // Determine if the blob is already present in the bucket 202 have, err := c.haveBlob(ctx, item) 203 if err != nil { 204 results <- uploadResult{ 205 failed, 206 fmt.Errorf("checking for presence of %s: %w", item.Key, err), 207 item} 208 return 209 } 210 211 // If so, no need to upload it 212 if have { 213 results <- uploadResult{present, nil, item} 214 continue 215 } 216 217 if err := c.uploadBlob(ctx, item); err != nil { 218 results <- uploadResult{failed, err, item} 219 break 220 } 221 222 results <- uploadResult{uploaded, nil, item} 223 log.FromContext(ctx).F("worker", workerID, "goroutines", runtime.NumGoroutine(), "key", item.Key).Debug("upload thread") 224 } 225 } 226 227 func readUploadResults( 228 out chan<- error, 229 cancelFn func(), 230 results <-chan uploadResult, 231 onUploaded func(walk.SyncItem) error, 232 onPresent func(walk.SyncItem) error, 233 onDuplicate func(walk.SyncItem) error, 234 ) { 235 writtenOut := false 236 sendError := func(err error) { 237 if !writtenOut { 238 out <- err 239 writtenOut = true 240 } 241 } 242 243 defer close(out) 244 defer sendError(nil) 245 246 for result := range results { 247 if result.State == failed { 248 sendError(result.Error) 249 cancelFn() 250 } 251 252 callback := func(walk.SyncItem) error { 253 return nil 254 } 255 256 switch result.State { 257 case present: 258 callback = onPresent 259 case duplicate: 260 callback = onDuplicate 261 case uploaded: 262 callback = onUploaded 263 } 264 265 callbackErr := callback(result.Item) 266 267 if callbackErr != nil { 268 sendError(callbackErr) 269 cancelFn() 270 } 271 } 272 } 273 274 func (c *client) EnsureUploaded( 275 ctx context.Context, 276 items []walk.SyncItem, 277 onUploaded func(walk.SyncItem) error, 278 onPresent func(walk.SyncItem) error, 279 onDuplicate func(walk.SyncItem) error, 280 ) error { 281 // Maintain a map of items processed thus far 282 processedItems := make(map[string]walk.SyncItem) 283 284 numThreads := c.cfg.UploadThreads() 285 var wg sync.WaitGroup 286 results := make(chan uploadResult, len(items)) 287 jobs := make(chan walk.SyncItem, len(items)) 288 289 // Make a child context so we can cancel all uploads at once if an error occurs 290 // in any of them. 291 uploadCtx, uploadCancel := context.WithCancel(ctx) 292 293 // These goroutines are responsible for handling each item by reading 294 // from 'jobs' and writing a result per item to 'results'. 295 for i := 0; i < numThreads; i++ { 296 wg.Add(1) 297 go c.uploadWorker(uploadCtx, jobs, results, &wg, i+1) 298 } 299 300 // This goroutine is responsible for reading all the results as they come into 301 // 'results' channel and executing callbacks as needed, as well as calculating 302 // the final error state. 303 // 304 // Ensures that callbacks are invoked as quickly as possible, but only 305 // from a single goroutine. 306 out := make(chan error, 1) 307 go readUploadResults( 308 out, uploadCancel, results, 309 onUploaded, onPresent, onDuplicate) 310 311 // Now send all the items 312 for _, item := range items { 313 if item.Key == "" && item.LinkTo != "" { 314 log.FromContext(ctx).F("uri", item.SrcPath).Debug("Skipping unfollowed symlink") 315 continue 316 } 317 318 // Determine if the item already exists in the final set of items to upload 319 // If so, ensure we put it on the queue only once 320 if _, ok := processedItems[item.Key]; ok { 321 log.FromContext(ctx).F("uri", item.SrcPath).Debug("Skipping duplicate item") 322 // This can bypass 'jobs' completely and go straight to 'results' as we 323 // know there's nothing to be done. 324 results <- uploadResult{duplicate, nil, item} 325 continue 326 } 327 328 processedItems[item.Key] = item 329 jobs <- item 330 } 331 332 // Let the uploaders know there are no more items to process. 333 close(jobs) 334 335 // Wait for uploaders to complete. 336 wg.Wait() 337 338 // Let the results reader know there are no more results coming. 339 close(results) 340 341 // Block for the result reader to complete and return whatever 342 // error (or nil) it calculated. 343 return <-out 344 } 345 346 func retryWithLogging(logger *log.Logger, fn rehttp.RetryFn) rehttp.RetryFn { 347 // Wraps a rehttp.RetryFn to add warnings on retries. 348 return func(attempt rehttp.Attempt) bool { 349 willRetry := fn(attempt) 350 status := "<none>" 351 if attempt.Response != nil { 352 // status is the HTTP response status and that 353 // sometimes won't be present. 354 status = attempt.Response.Status 355 } 356 357 entry := logger.F( 358 "url", attempt.Request.URL, 359 "index", attempt.Index, 360 "method", attempt.Request.Method, 361 "status", status, 362 "error", attempt.Error, 363 ) 364 365 if willRetry { 366 entry.Warn("Retrying failed request") 367 } else { 368 // This is Debug because we get here even for successful 369 // requests, so it's not normally worth logging. 370 // But if we don't log at all, it's hard to find out which 371 // errors are not getting retried in order to tune it. 372 entry.Debug("Not retrying request") 373 } 374 375 return willRetry 376 } 377 } 378 379 func retryTransport(ctx context.Context, cfg conf.Config, rt http.RoundTripper) http.RoundTripper { 380 // Wrap a roundtripper with retries. 381 logger := log.FromContext(ctx) 382 383 retryFn := rehttp.RetryAll( 384 rehttp.RetryMaxRetries(cfg.GwMaxAttempts()), 385 rehttp.RetryAny( 386 rehttp.RetryStatuses(500, 502, 503, 504), 387 rehttp.RetryTimeoutErr(), 388 rehttp.RetryIsErr(func(err error) bool { 389 return err == io.EOF 390 }), 391 ), 392 ) 393 retryFn = retryWithLogging(logger, retryFn) 394 395 return rehttp.NewTransport(rt, 396 retryFn, 397 rehttp.ExpJitterDelay( 398 time.Duration(2)*time.Second, 399 time.Duration(cfg.GwMaxBackoff())*time.Millisecond, 400 ), 401 ) 402 } 403 404 func (impl) NewClient(ctx context.Context, cfg conf.Config) (Client, error) { 405 cert, err := tls.LoadX509KeyPair(cfg.GwCert(), cfg.GwKey()) 406 if err != nil { 407 return nil, fmt.Errorf("can't load cert/key: %w", err) 408 } 409 410 out := &client{cfg: cfg} 411 412 transport := http.Transport{ 413 TLSClientConfig: &tls.Config{ 414 Certificates: []tls.Certificate{cert}, 415 }, 416 } 417 418 // This client is passed into AWS SDK and it should not add any 419 // retry logic because the AWS SDK already does that: 420 s3HttpClient := &http.Client{Transport: &transport} 421 422 // This client is used outside of the AWS SDK (i.e. for requests 423 // to "publish" API) and it should wrap the transport to enable 424 // retries for certain types of error. 425 out.httpClient = &http.Client{Transport: retryTransport(ctx, cfg, &transport)} 426 427 awsLogLevel := aws.LogOff 428 if cfg.Verbosity() > 2 || cfg.LogLevel() == "trace" { 429 awsLogLevel = aws.LogDebug 430 } 431 432 sess, err := ext.awsSessionProvider(session.Options{ 433 SharedConfigState: session.SharedConfigDisable, 434 Config: aws.Config{ 435 Endpoint: aws.String(cfg.GwURL() + "/upload"), 436 S3ForcePathStyle: aws.Bool(true), 437 Region: aws.String("us-east-1"), 438 Credentials: credentials.AnonymousCredentials, 439 HTTPClient: s3HttpClient, 440 Logger: log.FromContext(ctx), 441 LogLevel: aws.LogLevel(awsLogLevel), 442 MaxRetries: aws.Int(cfg.GwMaxAttempts()), 443 }, 444 }) 445 if err != nil { 446 return nil, fmt.Errorf("create AWS session: %w", err) 447 } 448 449 out.s3 = s3.New(sess) 450 out.uploader = s3manager.NewUploaderWithClient(out.s3) 451 452 return out, nil 453 }