github.com/pf-qiu/concourse/v6@v6.7.3-0.20201207032516-1f455d73275f/atc/worker/client.go (about) 1 package worker 2 3 import ( 4 "context" 5 "crypto/sha256" 6 "fmt" 7 "io" 8 "path" 9 "strconv" 10 "strings" 11 "time" 12 13 "code.cloudfoundry.org/garden" 14 "code.cloudfoundry.org/lager" 15 "github.com/concourse/baggageclaim" 16 "github.com/pf-qiu/concourse/v6/atc" 17 "github.com/pf-qiu/concourse/v6/atc/compression" 18 "github.com/pf-qiu/concourse/v6/atc/db" 19 "github.com/pf-qiu/concourse/v6/atc/db/lock" 20 "github.com/pf-qiu/concourse/v6/atc/metric" 21 "github.com/pf-qiu/concourse/v6/atc/resource" 22 "github.com/pf-qiu/concourse/v6/atc/runtime" 23 "github.com/hashicorp/go-multierror" 24 ) 25 26 const taskProcessID = "task" 27 const taskExitStatusPropertyName = "concourse:exit-status" 28 29 //go:generate counterfeiter . Client 30 31 type Client interface { 32 FindContainer(logger lager.Logger, teamID int, handle string) (Container, bool, error) 33 FindVolume(logger lager.Logger, teamID int, handle string) (Volume, bool, error) 34 CreateVolume(logger lager.Logger, vSpec VolumeSpec, wSpec WorkerSpec, volumeType db.VolumeType) (Volume, error) 35 StreamFileFromArtifact( 36 ctx context.Context, 37 logger lager.Logger, 38 artifact runtime.Artifact, 39 filePath string, 40 ) (io.ReadCloser, error) 41 42 RunCheckStep( 43 context.Context, 44 lager.Logger, 45 db.ContainerOwner, 46 ContainerSpec, 47 WorkerSpec, 48 ContainerPlacementStrategy, 49 db.ContainerMetadata, 50 runtime.ProcessSpec, 51 runtime.StartingEventDelegate, 52 resource.Resource, 53 time.Duration, 54 ) (CheckResult, error) 55 56 RunTaskStep( 57 context.Context, 58 lager.Logger, 59 db.ContainerOwner, 60 ContainerSpec, 61 WorkerSpec, 62 ContainerPlacementStrategy, 63 db.ContainerMetadata, 64 runtime.ProcessSpec, 65 runtime.StartingEventDelegate, 66 lock.LockFactory, 67 ) (TaskResult, error) 68 69 RunPutStep( 70 context.Context, 71 lager.Logger, 72 db.ContainerOwner, 73 ContainerSpec, 74 WorkerSpec, 75 ContainerPlacementStrategy, 76 db.ContainerMetadata, 77 runtime.ProcessSpec, 78 runtime.StartingEventDelegate, 79 resource.Resource, 80 ) (PutResult, error) 81 82 RunGetStep( 83 context.Context, 84 lager.Logger, 85 db.ContainerOwner, 86 ContainerSpec, 87 WorkerSpec, 88 ContainerPlacementStrategy, 89 db.ContainerMetadata, 90 runtime.ProcessSpec, 91 runtime.StartingEventDelegate, 92 db.UsedResourceCache, 93 resource.Resource, 94 ) (GetResult, error) 95 } 96 97 func NewClient(pool Pool, 98 provider WorkerProvider, 99 compression compression.Compression, 100 workerPollingInterval time.Duration, 101 workerStatusPublishInterval time.Duration, 102 enabledP2pStreaming bool, 103 p2pStreamingTimeout time.Duration, 104 ) *client { 105 return &client{ 106 pool: pool, 107 provider: provider, 108 compression: compression, 109 workerPollingInterval: workerPollingInterval, 110 workerStatusPublishInterval: workerStatusPublishInterval, 111 enabledP2pStreaming: enabledP2pStreaming, 112 p2pStreamingTimeout: p2pStreamingTimeout, 113 } 114 } 115 116 type client struct { 117 pool Pool 118 provider WorkerProvider 119 compression compression.Compression 120 workerPollingInterval time.Duration 121 workerStatusPublishInterval time.Duration 122 enabledP2pStreaming bool 123 p2pStreamingTimeout time.Duration 124 } 125 126 type TaskResult struct { 127 ExitStatus int 128 VolumeMounts []VolumeMount 129 } 130 131 type CheckResult struct { 132 Versions []atc.Version 133 } 134 135 type PutResult struct { 136 ExitStatus int 137 VersionResult runtime.VersionResult 138 } 139 140 type GetResult struct { 141 ExitStatus int 142 VersionResult runtime.VersionResult 143 GetArtifact runtime.GetArtifact 144 } 145 146 type processStatus struct { 147 processStatus int 148 processErr error 149 } 150 151 func (client *client) FindContainer(logger lager.Logger, teamID int, handle string) (Container, bool, error) { 152 worker, found, err := client.provider.FindWorkerForContainer( 153 logger.Session("find-worker"), 154 teamID, 155 handle, 156 ) 157 if err != nil { 158 return nil, false, err 159 } 160 161 if !found { 162 return nil, false, nil 163 } 164 165 return worker.FindContainerByHandle(logger, teamID, handle) 166 } 167 168 func (client *client) FindVolume(logger lager.Logger, teamID int, handle string) (Volume, bool, error) { 169 worker, found, err := client.provider.FindWorkerForVolume( 170 logger.Session("find-worker"), 171 teamID, 172 handle, 173 ) 174 if err != nil { 175 return nil, false, err 176 } 177 178 if !found { 179 return nil, false, nil 180 } 181 182 return worker.LookupVolume(logger, handle) 183 } 184 185 func (client *client) CreateVolume(logger lager.Logger, volumeSpec VolumeSpec, workerSpec WorkerSpec, volumeType db.VolumeType) (Volume, error) { 186 worker, err := client.pool.FindOrChooseWorker(logger, workerSpec) 187 if err != nil { 188 return nil, err 189 } 190 191 return worker.CreateVolume(logger, volumeSpec, workerSpec.TeamID, volumeType) 192 } 193 194 func (client *client) RunCheckStep( 195 ctx context.Context, 196 logger lager.Logger, 197 owner db.ContainerOwner, 198 containerSpec ContainerSpec, 199 workerSpec WorkerSpec, 200 strategy ContainerPlacementStrategy, 201 containerMetadata db.ContainerMetadata, 202 processSpec runtime.ProcessSpec, 203 eventDelegate runtime.StartingEventDelegate, 204 checkable resource.Resource, 205 timeout time.Duration, 206 ) (CheckResult, error) { 207 if containerSpec.ImageSpec.ImageArtifact != nil { 208 err := client.wireImageVolume(logger, &containerSpec.ImageSpec) 209 if err != nil { 210 return CheckResult{}, err 211 } 212 } 213 214 chosenWorker, err := client.pool.FindOrChooseWorkerForContainer( 215 ctx, 216 logger, 217 owner, 218 containerSpec, 219 workerSpec, 220 strategy, 221 ) 222 if err != nil { 223 return CheckResult{}, fmt.Errorf("find or choose worker for container: %w", err) 224 } 225 226 eventDelegate.SelectedWorker(logger, chosenWorker.Name()) 227 228 container, err := chosenWorker.FindOrCreateContainer( 229 ctx, 230 logger, 231 owner, 232 containerMetadata, 233 containerSpec, 234 ) 235 if err != nil { 236 return CheckResult{}, err 237 } 238 239 eventDelegate.Starting(logger) 240 241 deadline, cancel := context.WithTimeout(ctx, timeout) 242 defer cancel() 243 244 versions, err := checkable.Check(deadline, processSpec, container) 245 if err != nil { 246 if err == context.DeadlineExceeded { 247 return CheckResult{}, fmt.Errorf("timed out after %v checking for new versions", timeout) 248 } 249 250 return CheckResult{}, fmt.Errorf("check: %w", err) 251 } 252 253 return CheckResult{Versions: versions}, nil 254 } 255 256 func (client *client) RunTaskStep( 257 ctx context.Context, 258 logger lager.Logger, 259 owner db.ContainerOwner, 260 containerSpec ContainerSpec, 261 workerSpec WorkerSpec, 262 strategy ContainerPlacementStrategy, 263 metadata db.ContainerMetadata, 264 processSpec runtime.ProcessSpec, 265 eventDelegate runtime.StartingEventDelegate, 266 lockFactory lock.LockFactory, 267 ) (TaskResult, error) { 268 err := client.wireInputsAndCaches(logger, &containerSpec) 269 if err != nil { 270 return TaskResult{}, err 271 } 272 273 if containerSpec.ImageSpec.ImageArtifact != nil { 274 err = client.wireImageVolume(logger, &containerSpec.ImageSpec) 275 if err != nil { 276 return TaskResult{}, err 277 } 278 } 279 280 chosenWorker, err := client.chooseTaskWorker( 281 ctx, 282 logger, 283 strategy, 284 lockFactory, 285 owner, 286 containerSpec, 287 workerSpec, 288 processSpec.StdoutWriter, 289 ) 290 if err != nil { 291 return TaskResult{}, err 292 } 293 294 eventDelegate.SelectedWorker(logger, chosenWorker.Name()) 295 296 if strategy.ModifiesActiveTasks() { 297 defer decreaseActiveTasks(logger.Session("decrease-active-tasks"), chosenWorker) 298 } 299 300 container, err := chosenWorker.FindOrCreateContainer( 301 ctx, 302 logger, 303 owner, 304 metadata, 305 containerSpec, 306 ) 307 308 if err != nil { 309 return TaskResult{}, err 310 } 311 312 // container already exited 313 exitStatusProp, _ := container.Properties() 314 code := exitStatusProp[taskExitStatusPropertyName] 315 if code != "" { 316 logger.Info("already-exited", lager.Data{"status": taskExitStatusPropertyName}) 317 318 status, err := strconv.Atoi(code) 319 if err != nil { 320 return TaskResult{}, err 321 } 322 323 return TaskResult{ 324 ExitStatus: status, 325 VolumeMounts: container.VolumeMounts(), 326 }, err 327 } 328 329 processIO := garden.ProcessIO{ 330 Stdout: processSpec.StdoutWriter, 331 Stderr: processSpec.StderrWriter, 332 } 333 334 process, err := container.Attach(context.Background(), taskProcessID, processIO) 335 if err == nil { 336 logger.Info("already-running") 337 } else { 338 eventDelegate.Starting(logger) 339 logger.Info("spawning") 340 341 process, err = container.Run( 342 context.Background(), 343 garden.ProcessSpec{ 344 ID: taskProcessID, 345 346 Path: processSpec.Path, 347 Args: processSpec.Args, 348 349 Dir: path.Join(metadata.WorkingDirectory, processSpec.Dir), 350 351 // Guardian sets the default TTY window size to width: 80, height: 24, 352 // which creates ANSI control sequences that do not work with other window sizes 353 TTY: &garden.TTYSpec{ 354 WindowSize: &garden.WindowSize{Columns: 500, Rows: 500}, 355 }, 356 }, 357 processIO, 358 ) 359 360 if err != nil { 361 return TaskResult{}, err 362 } 363 } 364 365 logger.Info("attached") 366 367 exitStatusChan := make(chan processStatus) 368 369 go func() { 370 status := processStatus{} 371 status.processStatus, status.processErr = process.Wait() 372 exitStatusChan <- status 373 }() 374 375 select { 376 case <-ctx.Done(): 377 err = container.Stop(false) 378 if err != nil { 379 logger.Error("stopping-container", err) 380 } 381 382 status := <-exitStatusChan 383 return TaskResult{ 384 ExitStatus: status.processStatus, 385 VolumeMounts: container.VolumeMounts(), 386 }, ctx.Err() 387 388 case status := <-exitStatusChan: 389 if status.processErr != nil { 390 return TaskResult{ 391 ExitStatus: status.processStatus, 392 }, status.processErr 393 } 394 395 err = container.SetProperty(taskExitStatusPropertyName, fmt.Sprintf("%d", status.processStatus)) 396 if err != nil { 397 return TaskResult{ 398 ExitStatus: status.processStatus, 399 }, err 400 } 401 return TaskResult{ 402 ExitStatus: status.processStatus, 403 VolumeMounts: container.VolumeMounts(), 404 }, err 405 } 406 } 407 408 func (client *client) RunGetStep( 409 ctx context.Context, 410 logger lager.Logger, 411 owner db.ContainerOwner, 412 containerSpec ContainerSpec, 413 workerSpec WorkerSpec, 414 strategy ContainerPlacementStrategy, 415 containerMetadata db.ContainerMetadata, 416 processSpec runtime.ProcessSpec, 417 eventDelegate runtime.StartingEventDelegate, 418 resourceCache db.UsedResourceCache, 419 resource resource.Resource, 420 ) (GetResult, error) { 421 if containerSpec.ImageSpec.ImageArtifact != nil { 422 err := client.wireImageVolume(logger, &containerSpec.ImageSpec) 423 if err != nil { 424 return GetResult{}, err 425 } 426 } 427 428 chosenWorker, err := client.pool.FindOrChooseWorkerForContainer( 429 ctx, 430 logger, 431 owner, 432 containerSpec, 433 workerSpec, 434 strategy, 435 ) 436 if err != nil { 437 return GetResult{}, err 438 } 439 440 eventDelegate.SelectedWorker(logger, chosenWorker.Name()) 441 442 sign, err := resource.Signature() 443 if err != nil { 444 return GetResult{}, err 445 } 446 447 lockName := lockName(sign, chosenWorker.Name()) 448 449 // TODO: this needs to be emitted right before executing the `in` script 450 eventDelegate.Starting(logger) 451 452 getResult, _, err := chosenWorker.Fetch( 453 ctx, 454 logger, 455 containerMetadata, 456 chosenWorker, 457 containerSpec, 458 processSpec, 459 resource, 460 owner, 461 resourceCache, 462 lockName, 463 ) 464 return getResult, err 465 } 466 467 func (client *client) RunPutStep( 468 ctx context.Context, 469 logger lager.Logger, 470 owner db.ContainerOwner, 471 containerSpec ContainerSpec, 472 workerSpec WorkerSpec, 473 strategy ContainerPlacementStrategy, 474 metadata db.ContainerMetadata, 475 spec runtime.ProcessSpec, 476 eventDelegate runtime.StartingEventDelegate, 477 resource resource.Resource, 478 ) (PutResult, error) { 479 if containerSpec.ImageSpec.ImageArtifact != nil { 480 err := client.wireImageVolume(logger, &containerSpec.ImageSpec) 481 if err != nil { 482 return PutResult{}, err 483 } 484 } 485 486 vr := runtime.VersionResult{} 487 err := client.wireInputsAndCaches(logger, &containerSpec) 488 if err != nil { 489 return PutResult{}, err 490 } 491 492 chosenWorker, err := client.pool.FindOrChooseWorkerForContainer( 493 ctx, 494 logger, 495 owner, 496 containerSpec, 497 workerSpec, 498 strategy, 499 ) 500 if err != nil { 501 return PutResult{}, err 502 } 503 504 eventDelegate.SelectedWorker(logger, chosenWorker.Name()) 505 506 container, err := chosenWorker.FindOrCreateContainer( 507 ctx, 508 logger, 509 owner, 510 metadata, 511 containerSpec, 512 ) 513 if err != nil { 514 return PutResult{}, err 515 } 516 517 // container already exited 518 exitStatusProp, err := container.Property(taskExitStatusPropertyName) 519 if err == nil { 520 logger.Info("already-exited", lager.Data{"status": exitStatusProp}) 521 522 status, err := strconv.Atoi(exitStatusProp) 523 if err != nil { 524 return PutResult{}, err 525 } 526 527 return PutResult{ 528 ExitStatus: status, 529 VersionResult: runtime.VersionResult{}, 530 }, nil 531 } 532 533 eventDelegate.Starting(logger) 534 535 vr, err = resource.Put(ctx, spec, container) 536 if err != nil { 537 if failErr, ok := err.(runtime.ErrResourceScriptFailed); ok { 538 return PutResult{ 539 ExitStatus: failErr.ExitStatus, 540 VersionResult: runtime.VersionResult{}, 541 }, nil 542 } else { 543 return PutResult{}, err 544 } 545 } 546 return PutResult{ 547 ExitStatus: 0, 548 VersionResult: vr, 549 }, nil 550 } 551 552 func (client *client) StreamFileFromArtifact( 553 ctx context.Context, 554 logger lager.Logger, 555 artifact runtime.Artifact, 556 filePath string, 557 ) (io.ReadCloser, error) { 558 artifactVolume, found, err := client.FindVolume(logger, 0, artifact.ID()) 559 if err != nil { 560 return nil, err 561 } 562 if !found { 563 return nil, baggageclaim.ErrVolumeNotFound 564 } 565 566 source := artifactSource{ 567 artifact: artifact, 568 volume: artifactVolume, 569 compression: client.compression, 570 } 571 return source.StreamFile(ctx, filePath) 572 } 573 574 func (client *client) chooseTaskWorker( 575 ctx context.Context, 576 logger lager.Logger, 577 strategy ContainerPlacementStrategy, 578 lockFactory lock.LockFactory, 579 owner db.ContainerOwner, 580 containerSpec ContainerSpec, 581 workerSpec WorkerSpec, 582 outputWriter io.Writer, 583 ) (Worker, error) { 584 var ( 585 chosenWorker Worker 586 activeTasksLock lock.Lock 587 lockAcquired bool 588 elapsed time.Duration 589 err error 590 ) 591 592 started := time.Now() 593 workerPollingTicker := time.NewTicker(client.workerPollingInterval) 594 defer workerPollingTicker.Stop() 595 workerStatusPublishTicker := time.NewTicker(client.workerStatusPublishInterval) 596 defer workerStatusPublishTicker.Stop() 597 598 tasksWaitingLabels := metric.TasksWaitingLabels{ 599 TeamId: strconv.Itoa(workerSpec.TeamID), 600 WorkerTags: strings.Join(workerSpec.Tags, "_"), 601 Platform: workerSpec.Platform, 602 } 603 604 for { 605 if strategy.ModifiesActiveTasks() { 606 if activeTasksLock, lockAcquired, err = lockFactory.Acquire(logger, lock.NewActiveTasksLockID()); err != nil { 607 return nil, err 608 } 609 610 if !lockAcquired { 611 time.Sleep(time.Second) 612 continue 613 } 614 } 615 616 if chosenWorker, err = client.pool.FindOrChooseWorkerForContainer( 617 ctx, 618 logger, 619 owner, 620 containerSpec, 621 workerSpec, 622 strategy, 623 ); err != nil { 624 return nil, err 625 } 626 627 if !strategy.ModifiesActiveTasks() { 628 return chosenWorker, nil 629 } 630 631 select { 632 case <-ctx.Done(): 633 logger.Info("aborted-waiting-worker") 634 e := multierror.Append(err, activeTasksLock.Release(), ctx.Err()) 635 return nil, e 636 default: 637 } 638 639 if chosenWorker != nil { 640 err = increaseActiveTasks(logger, 641 client.pool, 642 chosenWorker, 643 activeTasksLock, 644 owner, 645 containerSpec, 646 workerSpec) 647 648 if elapsed > 0 { 649 message := fmt.Sprintf("Found a free worker after waiting %s.\n", elapsed.Round(1*time.Second)) 650 writeOutputMessage(logger, outputWriter, message) 651 metric.TasksWaitingDuration{ 652 Labels: tasksWaitingLabels, 653 Duration: elapsed, 654 }.Emit(logger) 655 } 656 657 return chosenWorker, err 658 } 659 660 err := activeTasksLock.Release() 661 if err != nil { 662 return nil, err 663 } 664 665 // Increase task waiting only once 666 if elapsed == 0 { 667 _, ok := metric.Metrics.TasksWaiting[tasksWaitingLabels] 668 if !ok { 669 metric.Metrics.TasksWaiting[tasksWaitingLabels] = &metric.Gauge{} 670 } 671 metric.Metrics.TasksWaiting[tasksWaitingLabels].Inc() 672 defer metric.Metrics.TasksWaiting[tasksWaitingLabels].Dec() 673 } 674 675 elapsed = waitForWorker(logger, 676 workerPollingTicker, 677 workerStatusPublishTicker, 678 outputWriter, 679 started) 680 } 681 } 682 683 // TODO (runtime) don't modify spec inside here, Specs don't change after you write them 684 func (client *client) wireInputsAndCaches(logger lager.Logger, spec *ContainerSpec) error { 685 var inputs []InputSource 686 687 for path, artifact := range spec.ArtifactByPath { 688 689 if cache, ok := artifact.(*runtime.CacheArtifact); ok { 690 // task caches may not have a volume, it will be discovered on 691 // the worker later. We do not stream task caches 692 source := NewCacheArtifactSource(*cache) 693 inputs = append(inputs, inputSource{source, path}) 694 } else { 695 artifactVolume, found, err := client.FindVolume(logger, spec.TeamID, artifact.ID()) 696 if err != nil { 697 return err 698 } 699 if !found { 700 return fmt.Errorf("volume not found for artifact id %v type %T", artifact.ID(), artifact) 701 } 702 703 source := NewStreamableArtifactSource(artifact, artifactVolume, client.compression, client.enabledP2pStreaming, client.p2pStreamingTimeout) 704 inputs = append(inputs, inputSource{source, path}) 705 } 706 } 707 708 spec.Inputs = inputs 709 return nil 710 } 711 712 func (client *client) wireImageVolume(logger lager.Logger, spec *ImageSpec) error { 713 imageArtifact := spec.ImageArtifact 714 715 artifactVolume, found, err := client.FindVolume(logger, 0, imageArtifact.ID()) 716 if err != nil { 717 return err 718 } 719 if !found { 720 return fmt.Errorf("volume not found for artifact id %v type %T", imageArtifact.ID(), imageArtifact) 721 } 722 723 spec.ImageArtifactSource = NewStreamableArtifactSource(imageArtifact, artifactVolume, client.compression, client.enabledP2pStreaming, client.p2pStreamingTimeout) 724 725 return nil 726 } 727 728 func decreaseActiveTasks(logger lager.Logger, w Worker) { 729 err := w.DecreaseActiveTasks() 730 if err != nil { 731 logger.Error("failed-to-decrease-active-tasks", err) 732 return 733 } 734 } 735 736 func lockName(resourceJSON []byte, workerName string) string { 737 jsonRes := append(resourceJSON, []byte(workerName)...) 738 return fmt.Sprintf("%x", sha256.Sum256(jsonRes)) 739 } 740 741 func waitForWorker( 742 logger lager.Logger, 743 waitForWorkerTicker, workerStatusTicker *time.Ticker, 744 outputWriter io.Writer, 745 started time.Time) (elapsed time.Duration) { 746 747 select { 748 case <-waitForWorkerTicker.C: 749 elapsed = time.Since(started) 750 751 case <-workerStatusTicker.C: 752 message := "All workers are busy at the moment, please stand-by.\n" 753 writeOutputMessage(logger, outputWriter, message) 754 elapsed = time.Since(started) 755 } 756 757 return elapsed 758 } 759 760 func writeOutputMessage(logger lager.Logger, outputWriter io.Writer, message string) { 761 _, err := outputWriter.Write([]byte(message)) 762 if err != nil { 763 logger.Error("failed-to-report-status", err) 764 } 765 } 766 767 func increaseActiveTasks( 768 logger lager.Logger, 769 pool Pool, 770 chosenWorker Worker, 771 activeTasksLock lock.Lock, 772 owner db.ContainerOwner, 773 containerSpec ContainerSpec, 774 workerSpec WorkerSpec) (err error) { 775 776 var existingContainer bool 777 defer release(activeTasksLock, err) 778 779 existingContainer, err = pool.ContainerInWorker(logger, owner, workerSpec) 780 if err != nil { 781 return err 782 } 783 784 if !existingContainer { 785 if err = chosenWorker.IncreaseActiveTasks(); err != nil { 786 logger.Error("failed-to-increase-active-tasks", err) 787 } 788 } 789 790 return err 791 } 792 793 func release(activeTasksLock lock.Lock, err error) { 794 releaseErr := activeTasksLock.Release() 795 if releaseErr != nil { 796 err = multierror.Append(err, releaseErr) 797 } 798 }