github.com/pf-qiu/concourse/v6@v6.7.3-0.20201207032516-1f455d73275f/atc/db/worker_lifecycle_test.go (about) 1 package db_test 2 3 import ( 4 "database/sql" 5 "time" 6 7 "github.com/pf-qiu/concourse/v6/atc" 8 "github.com/pf-qiu/concourse/v6/atc/db" 9 . "github.com/onsi/ginkgo" 10 . "github.com/onsi/ginkgo/extensions/table" 11 . "github.com/onsi/gomega" 12 ) 13 14 var _ = Describe("Worker Lifecycle", func() { 15 var ( 16 atcWorker atc.Worker 17 worker db.Worker 18 ) 19 20 BeforeEach(func() { 21 atcWorker = atc.Worker{ 22 GardenAddr: "some-garden-addr", 23 BaggageclaimURL: "some-bc-url", 24 HTTPProxyURL: "some-http-proxy-url", 25 HTTPSProxyURL: "some-https-proxy-url", 26 NoProxy: "some-no-proxy", 27 ActiveContainers: 140, 28 ResourceTypes: []atc.WorkerResourceType{ 29 { 30 Type: "some-resource-type", 31 Image: "some-image", 32 Version: "some-version", 33 }, 34 { 35 Type: "other-resource-type", 36 Image: "other-image", 37 Version: "other-version", 38 }, 39 }, 40 Platform: "some-platform", 41 Tags: atc.Tags{"some", "tags"}, 42 Ephemeral: true, 43 Name: "some-name", 44 StartTime: 55, 45 } 46 }) 47 48 Describe("DeleteUnresponsiveEphemeralWorkers", func() { 49 Context("when the worker has heartbeated recently", func() { 50 BeforeEach(func() { 51 _, err := workerFactory.SaveWorker(atcWorker, 5*time.Minute) 52 Expect(err).ToNot(HaveOccurred()) 53 }) 54 55 It("leaves the worker alone", func() { 56 deletedWorkers, err := workerLifecycle.DeleteUnresponsiveEphemeralWorkers() 57 Expect(err).ToNot(HaveOccurred()) 58 Expect(deletedWorkers).To(BeEmpty()) 59 }) 60 }) 61 62 Context("when the worker has not heartbeated recently", func() { 63 BeforeEach(func() { 64 _, err := workerFactory.SaveWorker(atcWorker, -1*time.Minute) 65 Expect(err).ToNot(HaveOccurred()) 66 }) 67 68 It("deletes the ephemeral worker", func() { 69 deletedWorkers, err := workerLifecycle.DeleteUnresponsiveEphemeralWorkers() 70 Expect(err).ToNot(HaveOccurred()) 71 Expect(len(deletedWorkers)).To(Equal(1)) 72 Expect(deletedWorkers[0]).To(Equal("some-name")) 73 }) 74 }) 75 }) 76 77 Describe("StallUnresponsiveWorkers", func() { 78 Context("when the worker has heartbeated recently", func() { 79 BeforeEach(func() { 80 _, err := workerFactory.SaveWorker(atcWorker, 5*time.Minute) 81 Expect(err).ToNot(HaveOccurred()) 82 }) 83 84 It("leaves the worker alone", func() { 85 stalledWorkers, err := workerLifecycle.StallUnresponsiveWorkers() 86 Expect(err).ToNot(HaveOccurred()) 87 Expect(stalledWorkers).To(BeEmpty()) 88 }) 89 }) 90 91 Context("when the worker has not heartbeated recently", func() { 92 BeforeEach(func() { 93 _, err := workerFactory.SaveWorker(atcWorker, -1*time.Minute) 94 Expect(err).ToNot(HaveOccurred()) 95 }) 96 97 It("marks the worker as `stalled`", func() { 98 stalledWorkers, err := workerLifecycle.StallUnresponsiveWorkers() 99 Expect(err).ToNot(HaveOccurred()) 100 Expect(len(stalledWorkers)).To(Equal(1)) 101 Expect(stalledWorkers[0]).To(Equal("some-name")) 102 }) 103 }) 104 }) 105 106 Describe("DeleteFinishedRetiringWorkers", func() { 107 var ( 108 dbWorker db.Worker 109 dbBuild db.Build 110 ) 111 112 JustBeforeEach(func() { 113 var err error 114 dbWorker, err = workerFactory.SaveWorker(atcWorker, 5*time.Minute) 115 Expect(err).ToNot(HaveOccurred()) 116 }) 117 118 Context("when worker is not retiring", func() { 119 JustBeforeEach(func() { 120 var err error 121 atcWorker.State = string(db.WorkerStateRunning) 122 dbWorker, err = workerFactory.SaveWorker(atcWorker, 5*time.Minute) 123 Expect(err).ToNot(HaveOccurred()) 124 }) 125 126 It("does not delete worker", func() { 127 _, found, err := workerFactory.GetWorker(atcWorker.Name) 128 Expect(err).ToNot(HaveOccurred()) 129 Expect(found).To(BeTrue()) 130 131 deletedWorkers, err := workerLifecycle.DeleteFinishedRetiringWorkers() 132 Expect(err).ToNot(HaveOccurred()) 133 Expect(len(deletedWorkers)).To(Equal(0)) 134 135 _, found, err = workerFactory.GetWorker(atcWorker.Name) 136 Expect(err).ToNot(HaveOccurred()) 137 Expect(found).To(BeTrue()) 138 }) 139 }) 140 141 Context("when worker is retiring", func() { 142 BeforeEach(func() { 143 atcWorker.State = string(db.WorkerStateRetiring) 144 }) 145 146 Context("when the worker does not have any running builds", func() { 147 It("deletes worker", func() { 148 _, found, err := workerFactory.GetWorker(atcWorker.Name) 149 Expect(err).ToNot(HaveOccurred()) 150 Expect(found).To(BeTrue()) 151 152 deletedWorkers, err := workerLifecycle.DeleteFinishedRetiringWorkers() 153 Expect(err).ToNot(HaveOccurred()) 154 Expect(len(deletedWorkers)).To(Equal(1)) 155 Expect(deletedWorkers[0]).To(Equal(atcWorker.Name)) 156 157 _, found, err = workerFactory.GetWorker(atcWorker.Name) 158 Expect(err).ToNot(HaveOccurred()) 159 Expect(found).To(BeFalse()) 160 }) 161 }) 162 163 DescribeTable("deleting workers with builds that are", 164 func(s db.BuildStatus, expectedExistence bool) { 165 dbBuild, err := defaultTeam.CreateOneOffBuild() 166 Expect(err).ToNot(HaveOccurred()) 167 168 switch s { 169 case db.BuildStatusPending: 170 case db.BuildStatusStarted: 171 _, err = dbBuild.Start(atc.Plan{}) 172 Expect(err).ToNot(HaveOccurred()) 173 default: 174 err = dbBuild.Finish(s) 175 Expect(err).ToNot(HaveOccurred()) 176 } 177 _, err = dbWorker.CreateContainer(db.NewBuildStepContainerOwner(dbBuild.ID(), atc.PlanID("4"), defaultTeam.ID()), db.ContainerMetadata{}) 178 Expect(err).ToNot(HaveOccurred()) 179 180 _, found, err := workerFactory.GetWorker(atcWorker.Name) 181 Expect(err).ToNot(HaveOccurred()) 182 Expect(found).To(BeTrue()) 183 184 _, err = workerLifecycle.DeleteFinishedRetiringWorkers() 185 Expect(err).ToNot(HaveOccurred()) 186 187 _, found, err = workerFactory.GetWorker(atcWorker.Name) 188 Expect(err).ToNot(HaveOccurred()) 189 Expect(found).To(Equal(expectedExistence)) 190 }, 191 Entry("pending", db.BuildStatusPending, true), 192 Entry("started", db.BuildStatusStarted, true), 193 Entry("aborted", db.BuildStatusAborted, false), 194 Entry("succeeded", db.BuildStatusSucceeded, false), 195 Entry("failed", db.BuildStatusFailed, false), 196 Entry("errored", db.BuildStatusErrored, false), 197 ) 198 199 ItRetiresWorkerWithState := func(s db.BuildStatus, expectedExistence bool) { 200 switch s { 201 case db.BuildStatusPending: 202 case db.BuildStatusStarted: 203 _, err := dbBuild.Start(atc.Plan{}) 204 Expect(err).ToNot(HaveOccurred()) 205 default: 206 err := dbBuild.Finish(s) 207 Expect(err).ToNot(HaveOccurred()) 208 } 209 210 _, err := dbWorker.CreateContainer(db.NewBuildStepContainerOwner(dbBuild.ID(), atc.PlanID("4"), defaultTeam.ID()), db.ContainerMetadata{}) 211 Expect(err).ToNot(HaveOccurred()) 212 213 _, found, err := workerFactory.GetWorker(atcWorker.Name) 214 Expect(err).ToNot(HaveOccurred()) 215 Expect(found).To(BeTrue()) 216 217 _, err = workerLifecycle.DeleteFinishedRetiringWorkers() 218 Expect(err).ToNot(HaveOccurred()) 219 220 _, found, err = workerFactory.GetWorker(atcWorker.Name) 221 Expect(err).ToNot(HaveOccurred()) 222 Expect(found).To(Equal(expectedExistence)) 223 } 224 225 Context("when worker has build with uninterruptible job", func() { 226 BeforeEach(func() { 227 pipeline, created, err := defaultTeam.SavePipeline(atc.PipelineRef{Name: "some-pipeline"}, atc.Config{ 228 Jobs: atc.JobConfigs{ 229 { 230 Name: "some-job", 231 Interruptible: false, 232 }, 233 }, 234 }, db.ConfigVersion(0), false) 235 Expect(err).ToNot(HaveOccurred()) 236 Expect(created).To(BeTrue()) 237 238 job, found, err := pipeline.Job("some-job") 239 Expect(err).ToNot(HaveOccurred()) 240 Expect(found).To(BeTrue()) 241 242 dbBuild, err = job.CreateBuild() 243 Expect(err).ToNot(HaveOccurred()) 244 }) 245 246 DescribeTable("with builds that are", 247 ItRetiresWorkerWithState, 248 Entry("pending", db.BuildStatusPending, true), 249 Entry("started", db.BuildStatusStarted, true), 250 Entry("aborted", db.BuildStatusAborted, false), 251 Entry("succeeded", db.BuildStatusSucceeded, false), 252 Entry("failed", db.BuildStatusFailed, false), 253 Entry("errored", db.BuildStatusErrored, false), 254 ) 255 }) 256 257 Context("when worker has build with interruptible job", func() { 258 BeforeEach(func() { 259 pipeline, created, err := defaultTeam.SavePipeline(atc.PipelineRef{Name: "some-pipeline"}, atc.Config{ 260 Jobs: atc.JobConfigs{ 261 { 262 Name: "some-job", 263 Interruptible: true, 264 }, 265 }, 266 }, db.ConfigVersion(0), false) 267 Expect(err).ToNot(HaveOccurred()) 268 Expect(created).To(BeTrue()) 269 270 job, found, err := pipeline.Job("some-job") 271 Expect(err).ToNot(HaveOccurred()) 272 Expect(found).To(BeTrue()) 273 274 dbBuild, err = job.CreateBuild() 275 Expect(err).ToNot(HaveOccurred()) 276 }) 277 278 DescribeTable("with builds that are", 279 ItRetiresWorkerWithState, 280 Entry("pending", db.BuildStatusPending, false), 281 Entry("started", db.BuildStatusStarted, false), 282 Entry("aborted", db.BuildStatusAborted, false), 283 Entry("succeeded", db.BuildStatusSucceeded, false), 284 Entry("failed", db.BuildStatusFailed, false), 285 Entry("errored", db.BuildStatusErrored, false), 286 ) 287 }) 288 289 Context("when worker has one-off build", func() { 290 BeforeEach(func() { 291 var err error 292 dbBuild, err = defaultTeam.CreateOneOffBuild() 293 Expect(err).ToNot(HaveOccurred()) 294 }) 295 296 DescribeTable("with builds that are", 297 ItRetiresWorkerWithState, 298 Entry("pending", db.BuildStatusPending, true), 299 Entry("started", db.BuildStatusStarted, true), 300 Entry("aborted", db.BuildStatusAborted, false), 301 Entry("succeeded", db.BuildStatusSucceeded, false), 302 Entry("failed", db.BuildStatusFailed, false), 303 Entry("errored", db.BuildStatusErrored, false), 304 ) 305 }) 306 }) 307 }) 308 309 Describe("LandFinishedLandingWorkers", func() { 310 var ( 311 dbWorker db.Worker 312 dbBuild db.Build 313 ) 314 315 JustBeforeEach(func() { 316 var err error 317 dbWorker, err = workerFactory.SaveWorker(atcWorker, 5*time.Minute) 318 Expect(err).ToNot(HaveOccurred()) 319 }) 320 321 Context("when worker is not landing", func() { 322 JustBeforeEach(func() { 323 var err error 324 atcWorker.State = string(db.WorkerStateRunning) 325 dbWorker, err = workerFactory.SaveWorker(atcWorker, 5*time.Minute) 326 Expect(err).ToNot(HaveOccurred()) 327 }) 328 329 It("does not land worker", func() { 330 _, found, err := workerFactory.GetWorker(atcWorker.Name) 331 Expect(err).ToNot(HaveOccurred()) 332 Expect(found).To(BeTrue()) 333 334 landedWorkers, err := workerLifecycle.LandFinishedLandingWorkers() 335 Expect(err).ToNot(HaveOccurred()) 336 Expect(len(landedWorkers)).To(Equal(0)) 337 338 foundWorker, found, err := workerFactory.GetWorker(atcWorker.Name) 339 Expect(err).ToNot(HaveOccurred()) 340 Expect(found).To(BeTrue()) 341 Expect(foundWorker.State()).To(Equal(db.WorkerStateRunning)) 342 }) 343 }) 344 345 Context("when worker is landing", func() { 346 BeforeEach(func() { 347 atcWorker.State = string(db.WorkerStateLanding) 348 }) 349 350 Context("when the worker does not have any running builds", func() { 351 It("lands worker", func() { 352 _, found, err := workerFactory.GetWorker(atcWorker.Name) 353 Expect(err).ToNot(HaveOccurred()) 354 Expect(found).To(BeTrue()) 355 356 landedWorkers, err := workerLifecycle.LandFinishedLandingWorkers() 357 Expect(err).ToNot(HaveOccurred()) 358 Expect(len(landedWorkers)).To(Equal(1)) 359 Expect(landedWorkers[0]).To(Equal(atcWorker.Name)) 360 361 foundWorker, found, err := workerFactory.GetWorker(atcWorker.Name) 362 Expect(err).ToNot(HaveOccurred()) 363 Expect(found).To(BeTrue()) 364 Expect(foundWorker.State()).To(Equal(db.WorkerStateLanded)) 365 }) 366 367 It("clears out the garden/baggageclaim addresses", func() { 368 var ( 369 beforegardenAddr sql.NullString 370 beforeBaggagaClaimUrl sql.NullString 371 aftergardenAddr sql.NullString 372 afterBaggagaClaimUrl sql.NullString 373 found bool 374 err error 375 ) 376 377 worker, found, err = workerFactory.GetWorker(atcWorker.Name) 378 Expect(err).ToNot(HaveOccurred()) 379 Expect(found).To(BeTrue()) 380 381 err = dbConn.QueryRow("SELECT addr, baggageclaim_url FROM workers WHERE name = '"+atcWorker.Name+"'").Scan(&beforegardenAddr, 382 &beforeBaggagaClaimUrl, 383 ) 384 Expect(err).ToNot(HaveOccurred()) 385 386 Expect(beforegardenAddr.Valid).To(BeTrue()) 387 Expect(beforeBaggagaClaimUrl.Valid).To(BeTrue()) 388 389 err = worker.Land() 390 Expect(err).ToNot(HaveOccurred()) 391 landedWorkers, err := workerLifecycle.LandFinishedLandingWorkers() 392 Expect(err).ToNot(HaveOccurred()) 393 Expect(len(landedWorkers)).To(Equal(1)) 394 Expect(landedWorkers[0]).To(Equal(atcWorker.Name)) 395 396 err = dbConn.QueryRow("SELECT addr, baggageclaim_url FROM workers WHERE name = '"+atcWorker.Name+"'").Scan(&aftergardenAddr, 397 &afterBaggagaClaimUrl, 398 ) 399 Expect(err).ToNot(HaveOccurred()) 400 401 Expect(aftergardenAddr.String).To(Equal("")) 402 Expect(afterBaggagaClaimUrl.String).To(Equal("")) 403 404 }) 405 }) 406 407 DescribeTable("land workers with builds that are", 408 func(s db.BuildStatus, expectedState db.WorkerState) { 409 dbBuild, err := defaultTeam.CreateOneOffBuild() 410 Expect(err).ToNot(HaveOccurred()) 411 412 switch s { 413 case db.BuildStatusPending: 414 case db.BuildStatusStarted: 415 _, err := dbBuild.Start(atc.Plan{}) 416 Expect(err).ToNot(HaveOccurred()) 417 default: 418 err := dbBuild.Finish(s) 419 Expect(err).ToNot(HaveOccurred()) 420 } 421 422 _, err = dbWorker.CreateContainer(db.NewBuildStepContainerOwner(dbBuild.ID(), atc.PlanID("4"), defaultTeam.ID()), db.ContainerMetadata{}) 423 Expect(err).ToNot(HaveOccurred()) 424 425 _, found, err := workerFactory.GetWorker(atcWorker.Name) 426 Expect(err).ToNot(HaveOccurred()) 427 Expect(found).To(BeTrue()) 428 429 _, err = workerLifecycle.LandFinishedLandingWorkers() 430 Expect(err).ToNot(HaveOccurred()) 431 432 foundWorker, found, err := workerFactory.GetWorker(atcWorker.Name) 433 Expect(err).ToNot(HaveOccurred()) 434 Expect(found).To(BeTrue()) 435 Expect(foundWorker.State()).To(Equal(expectedState)) 436 }, 437 Entry("pending", db.BuildStatusPending, db.WorkerStateLanding), 438 Entry("started", db.BuildStatusStarted, db.WorkerStateLanding), 439 Entry("aborted", db.BuildStatusAborted, db.WorkerStateLanded), 440 Entry("succeeded", db.BuildStatusSucceeded, db.WorkerStateLanded), 441 Entry("failed", db.BuildStatusFailed, db.WorkerStateLanded), 442 Entry("errored", db.BuildStatusErrored, db.WorkerStateLanded), 443 ) 444 445 ItLandsWorkerWithExpectedState := func(s db.BuildStatus, expectedState db.WorkerState) { 446 switch s { 447 case db.BuildStatusPending: 448 case db.BuildStatusStarted: 449 _, err := dbBuild.Start(atc.Plan{}) 450 Expect(err).ToNot(HaveOccurred()) 451 default: 452 err := dbBuild.Finish(s) 453 Expect(err).ToNot(HaveOccurred()) 454 } 455 456 _, err := dbWorker.CreateContainer(db.NewBuildStepContainerOwner(dbBuild.ID(), atc.PlanID("4"), defaultTeam.ID()), db.ContainerMetadata{}) 457 Expect(err).ToNot(HaveOccurred()) 458 459 _, found, err := workerFactory.GetWorker(atcWorker.Name) 460 Expect(err).ToNot(HaveOccurred()) 461 Expect(found).To(BeTrue()) 462 463 _, err = workerLifecycle.LandFinishedLandingWorkers() 464 Expect(err).ToNot(HaveOccurred()) 465 466 foundWorker, found, err := workerFactory.GetWorker(atcWorker.Name) 467 Expect(err).ToNot(HaveOccurred()) 468 Expect(found).To(BeTrue()) 469 Expect(foundWorker.State()).To(Equal(expectedState)) 470 } 471 472 Context("when worker has build with uninterruptible job", func() { 473 BeforeEach(func() { 474 pipeline, created, err := defaultTeam.SavePipeline(atc.PipelineRef{Name: "some-pipeline"}, atc.Config{ 475 Jobs: atc.JobConfigs{ 476 { 477 Name: "some-job", 478 Interruptible: false, 479 }, 480 }, 481 }, db.ConfigVersion(0), false) 482 Expect(err).ToNot(HaveOccurred()) 483 Expect(created).To(BeTrue()) 484 485 job, found, err := pipeline.Job("some-job") 486 Expect(err).ToNot(HaveOccurred()) 487 Expect(found).To(BeTrue()) 488 489 dbBuild, err = job.CreateBuild() 490 Expect(err).ToNot(HaveOccurred()) 491 }) 492 493 DescribeTable("with builds that are", 494 ItLandsWorkerWithExpectedState, 495 Entry("pending", db.BuildStatusPending, db.WorkerStateLanding), 496 Entry("started", db.BuildStatusStarted, db.WorkerStateLanding), 497 Entry("aborted", db.BuildStatusAborted, db.WorkerStateLanded), 498 Entry("succeeded", db.BuildStatusSucceeded, db.WorkerStateLanded), 499 Entry("failed", db.BuildStatusFailed, db.WorkerStateLanded), 500 Entry("errored", db.BuildStatusErrored, db.WorkerStateLanded), 501 ) 502 }) 503 504 Context("when worker has build with interruptible job", func() { 505 BeforeEach(func() { 506 pipeline, created, err := defaultTeam.SavePipeline(atc.PipelineRef{Name: "some-pipeline"}, atc.Config{ 507 Jobs: atc.JobConfigs{ 508 { 509 Name: "some-job", 510 Interruptible: true, 511 }, 512 }, 513 }, db.ConfigVersion(0), false) 514 Expect(err).ToNot(HaveOccurred()) 515 Expect(created).To(BeTrue()) 516 517 job, found, err := pipeline.Job("some-job") 518 Expect(err).ToNot(HaveOccurred()) 519 Expect(found).To(BeTrue()) 520 521 dbBuild, err = job.CreateBuild() 522 Expect(err).ToNot(HaveOccurred()) 523 }) 524 525 DescribeTable("with builds that are", 526 ItLandsWorkerWithExpectedState, 527 Entry("pending", db.BuildStatusPending, db.WorkerStateLanded), 528 Entry("started", db.BuildStatusStarted, db.WorkerStateLanded), 529 Entry("aborted", db.BuildStatusAborted, db.WorkerStateLanded), 530 Entry("succeeded", db.BuildStatusSucceeded, db.WorkerStateLanded), 531 Entry("failed", db.BuildStatusFailed, db.WorkerStateLanded), 532 Entry("errored", db.BuildStatusErrored, db.WorkerStateLanded), 533 ) 534 }) 535 536 Context("when worker has one-off build", func() { 537 BeforeEach(func() { 538 var err error 539 dbBuild, err = defaultTeam.CreateOneOffBuild() 540 Expect(err).ToNot(HaveOccurred()) 541 }) 542 543 DescribeTable("with builds that are", 544 ItLandsWorkerWithExpectedState, 545 Entry("pending", db.BuildStatusPending, db.WorkerStateLanding), 546 Entry("started", db.BuildStatusStarted, db.WorkerStateLanding), 547 Entry("aborted", db.BuildStatusAborted, db.WorkerStateLanded), 548 Entry("succeeded", db.BuildStatusSucceeded, db.WorkerStateLanded), 549 Entry("failed", db.BuildStatusFailed, db.WorkerStateLanded), 550 Entry("errored", db.BuildStatusErrored, db.WorkerStateLanded), 551 ) 552 }) 553 }) 554 }) 555 556 Describe("GetWorkersState", func() { 557 558 JustBeforeEach(func() { 559 atcWorker.State = string(db.WorkerStateStalled) 560 _, err := workerFactory.SaveWorker(atcWorker, 5*time.Minute) 561 Expect(err).ToNot(HaveOccurred()) 562 }) 563 564 It("gets the workers' state", func() { 565 countByState, err := workerLifecycle.GetWorkerStateByName() 566 Expect(err).ToNot(HaveOccurred()) 567 expectedState := map[string]db.WorkerState{ 568 "default-worker": db.WorkerStateRunning, 569 "other-worker": db.WorkerStateRunning, 570 "some-name": db.WorkerStateStalled, 571 } 572 Expect(countByState).To(Equal(expectedState)) 573 }) 574 575 }) 576 })