github.com/openshift-online/ocm-sdk-go@v0.1.473/leadership/flag.go (about) 1 /* 2 Copyright (c) 2021 Red Hat, Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package leadership 18 19 import ( 20 "context" 21 "database/sql" 22 "errors" 23 "math/rand" 24 "sync/atomic" 25 "time" 26 27 "github.com/openshift-online/ocm-sdk-go/database" 28 "github.com/openshift-online/ocm-sdk-go/logging" 29 "github.com/prometheus/client_golang/prometheus" 30 ) 31 32 type contextKey string 33 34 const ( 35 leadershipFlag contextKey = "leadership-flag" 36 ) 37 38 // FlagBuilder contains the data and logic needed to build leadership flags. 39 type FlagBuilder struct { 40 // Basic fields: 41 logger logging.Logger 42 handle *sql.DB 43 name string 44 process string 45 interval time.Duration 46 timeout time.Duration 47 jitter float64 48 49 // Fields used for metrics: 50 metricsSubsystem string 51 metricsRegisterer prometheus.Registerer 52 53 precheckFunc func() (bool, error) 54 } 55 56 // Flag is a distributed flag intended to manage leadership in a group of processes. Only one of the 57 // processes using it will see it raised at any point in time. 58 type Flag struct { 59 // Basic fields: 60 logger logging.Logger 61 handle *sql.DB 62 name string 63 process string 64 renewInterval time.Duration 65 checkInterval time.Duration 66 retryInterval time.Duration 67 timeout time.Duration 68 jitter float64 69 value int32 70 timer *time.Timer 71 stop chan struct{} 72 ctx context.Context 73 74 // Fields used for metrics: 75 stateMetric *prometheus.GaugeVec 76 77 precheck func() (bool, error) 78 } 79 80 // NewFlag creates a builder that can then be used to configure and create a leadership flag. 81 func NewFlag() *FlagBuilder { 82 return &FlagBuilder{ 83 interval: defaultFlagInterval, 84 timeout: defaultFlagTimeout, 85 jitter: defaultFlagJitter, 86 metricsRegisterer: prometheus.DefaultRegisterer, 87 } 88 } 89 90 // Logger sets the logger that the flag will use to write to the log. This is mandatory. 91 func (b *FlagBuilder) Logger(value logging.Logger) *FlagBuilder { 92 b.logger = value 93 return b 94 } 95 96 // Handle sets the database handle that the flag will use to store its state. This is mandatory. 97 func (b *FlagBuilder) Handle(value *sql.DB) *FlagBuilder { 98 b.handle = value 99 return b 100 } 101 102 // Name of the flag. This can be used to have different flags for different uses, or for different 103 // environments that happen to share the database. This is mandatory. 104 func (b *FlagBuilder) Name(value string) *FlagBuilder { 105 b.name = value 106 return b 107 } 108 109 // Process sets the name of the process. This should be unique amonts the set of processes using the 110 // same flag name. A typical name would be the name of a Kubernetes pod, or the combination of a 111 // Kubernets cluser name and pod name, to make it unique across different clusters. This is 112 // mandatory. 113 func (b *FlagBuilder) Process(value string) *FlagBuilder { 114 b.process = value 115 return b 116 } 117 118 // Interval sets the interval for renewing the ownership of the flag. The default value is thirty 119 // seconds. 120 func (b *FlagBuilder) Interval(value time.Duration) *FlagBuilder { 121 b.interval = value 122 return b 123 } 124 125 // Timeout sets the timeout for database operations. The default is on second. 126 func (b *FlagBuilder) Timeout(value time.Duration) *FlagBuilder { 127 b.timeout = value 128 return b 129 } 130 131 // Jitter sets a factor that will be used to randomize the intervals. For example, if this is set to 132 // 0.1 then a random adjustment of +10% or -10% will be done to the intervals each time they are 133 // used. This is intended to reduce simultaneous database accesses by processes that have been 134 // started simultaneously. The default value is 0.2. 135 func (b *FlagBuilder) Jitter(value float64) *FlagBuilder { 136 b.jitter = value 137 return b 138 } 139 140 // MetricsSubsystem sets the name of the subsystem that will be used by the flag to register metrics 141 // with Prometheus. If this isn't explicitly specified, or if it is an empty string, then no metrics 142 // will be registered. For example, if the value is `background_tasks` then the following metrics 143 // will be registered: 144 // 145 // tasks_leadership_flag_state - State of the flag. 146 // 147 // The `...leadership_flag_state` metric will have the following labels: 148 // 149 // name - Name of the flag. 150 // process - Name of the process. 151 // 152 // The value of the `...leaderhsip_flag_state` metric will be one if this process is currently the 153 // holder of the flag or zero if it isn't. 154 // 155 // Note that setting this attribute is not enough to have metrics published, you also need to 156 // create and start a metrics server, as described in the documentation of the Prometheus library. 157 func (b *FlagBuilder) MetricsSubsystem(value string) *FlagBuilder { 158 b.metricsSubsystem = value 159 return b 160 } 161 162 // MetricsRegisterer sets the Prometheus registerer that will be used to register the metrics. The 163 // default is to use the default Prometheus registerer and there is usually no need to change that. 164 // This is intended for unit tests, where it is convenient to have a registerer that doesn't 165 // interfere with the rest of the system. 166 func (b *FlagBuilder) MetricsRegisterer(value prometheus.Registerer) *FlagBuilder { 167 if value == nil { 168 value = prometheus.DefaultRegisterer 169 } 170 b.metricsRegisterer = value 171 return b 172 } 173 174 // Precheck sets a precheck function that, when set, will run before the 175 // leadership flag is checked. The precheck function returns two values: a 176 // boolean indicating whether the leadership check should occur, and an error 177 // indicating if an error occurred when running the precheck. When an error 178 // occurs during precheck, the error is logged and the leadership check is 179 // skipped. 180 func (b *FlagBuilder) PrecheckFunc(f func() (bool, error)) *FlagBuilder { 181 b.precheckFunc = f 182 return b 183 } 184 185 // Build uses the data stored in the builder to configure and create a new leadership flag. 186 func (b *FlagBuilder) Build(ctx context.Context) (result *Flag, err error) { 187 // Check parameters: 188 if b.logger == nil { 189 err = errors.New("logger is mandatory") 190 return 191 } 192 if b.handle == nil { 193 err = errors.New("database handle is mandatory") 194 return 195 } 196 if b.name == "" { 197 err = errors.New("name is mandatory") 198 return 199 } 200 if b.process == "" { 201 err = errors.New("process is mandatory") 202 return 203 } 204 if b.interval <= 0 { 205 err = errors.New("interval should be greater than zero") 206 return 207 } 208 if b.timeout <= 0 { 209 err = errors.New("timeout should be greater than zero") 210 return 211 } 212 if b.jitter < 0 || b.jitter > 1 { 213 err = errors.New("jitter should be between zero and one") 214 return 215 } 216 217 // Calculate specific intervals from the general interval given in the configuration: 218 renewInterval := b.interval 219 checkInterval := b.interval / 2 220 retryInterval := b.interval / 10 221 222 // Make sure that the table exists, creating it if needed: 223 err = b.ensureTable(ctx) 224 if err != nil { 225 return 226 } 227 228 // Create a timer that will fire inmediatelly, so that the first check will be performed 229 // also inmediately after starting the loop: 230 timer := time.NewTimer(0) 231 232 // Crete the channel that will be used to stop the loop: 233 stop := make(chan struct{}) 234 235 // Register the metrics: 236 var stateMetric *prometheus.GaugeVec 237 if b.metricsSubsystem != "" && b.metricsRegisterer != nil { 238 stateMetric = prometheus.NewGaugeVec( 239 prometheus.GaugeOpts{ 240 Subsystem: b.metricsSubsystem, 241 Name: "leadership_flag_state", 242 Help: "State of the leadership flag; one if raised, zero " + 243 "if lowered.", 244 }, 245 flagMetricsLabels, 246 ) 247 err = b.metricsRegisterer.Register(stateMetric) 248 if err != nil { 249 registered, ok := err.(prometheus.AlreadyRegisteredError) 250 if ok { 251 stateMetric = registered.ExistingCollector.(*prometheus.GaugeVec) 252 err = nil 253 } else { 254 return 255 } 256 } 257 } 258 259 // Create and populate the flag: 260 result = &Flag{ 261 logger: b.logger, 262 handle: b.handle, 263 name: b.name, 264 process: b.process, 265 timeout: b.timeout, 266 renewInterval: renewInterval, 267 checkInterval: checkInterval, 268 retryInterval: retryInterval, 269 jitter: b.jitter, 270 timer: timer, 271 stop: stop, 272 stateMetric: stateMetric, 273 ctx: ctx, 274 precheck: b.precheckFunc, 275 } 276 277 // Run the loop: 278 go result.run() 279 280 return 281 } 282 283 // ensureTable creates the table if it doesn't already exist. 284 func (b *FlagBuilder) ensureTable(ctx context.Context) error { 285 var err error 286 _, err = b.handle.ExecContext( 287 ctx, 288 ` 289 create table if not exists leadership_flags ( 290 name text not null primary key, 291 holder text not null, 292 version bigint not null, 293 timestamp timestamp with time zone not null 294 ) 295 `, 296 ) 297 return err 298 } 299 300 // Raised returns true if the flag is raised. At any point in time only one of the identities will 301 // see the flag raised. 302 func (f *Flag) Raised() bool { 303 return atomic.LoadInt32(&f.value) == 1 304 } 305 306 // Close releases all the resources used by the flag. 307 func (f *Flag) Close() error { 308 close(f.stop) 309 return nil 310 } 311 312 // run runs the loop that checks the contents of the table and updates it and the state of the flag 313 // accordinly. 314 func (f *Flag) run() { 315 // Create a context: 316 // whilst respecting parent context values 317 ctx := context.WithValue(f.ctx, leadershipFlag, "") 318 loop: 319 for { 320 select { 321 case <-f.timer.C: 322 if f.precheck != nil { 323 check, err := f.precheck() 324 if err != nil { 325 f.logger.Error( 326 f.ctx, 327 "error running precheck: %v", 328 f.process, f.name, err, 329 ) 330 f.schedule(ctx, f.retryInterval) 331 f.lower(ctx) 332 continue 333 } 334 if !check { 335 f.schedule(ctx, f.retryInterval) 336 f.lower(ctx) 337 continue 338 } 339 340 } 341 f.check(ctx) 342 case <-f.stop: 343 break loop 344 } 345 } 346 } 347 348 // check checks the contents of the table and updates it and the state of the flag accordingly. 349 func (f *Flag) check(ctx context.Context) { 350 var err error 351 352 // Get the global time from the database, so that we don't depend on synchronization of the 353 // machines that compete for the flag. 354 var now time.Time 355 now, err = f.now(ctx) 356 if err != nil { 357 f.logger.Error( 358 ctx, 359 "Process '%s' can't get current time for flag '%s': %v", 360 f.process, f.name, err, 361 ) 362 f.lower(ctx) 363 f.schedule(ctx, f.retryInterval) 364 return 365 } 366 367 // Try to load the state: 368 found, holder, version, timestamp, err := f.loadState(ctx) 369 if err != nil { 370 f.logger.Error( 371 ctx, 372 "Process '%s' can't load state for flag '%s': %v", 373 f.process, f.name, err, 374 ) 375 f.lower(ctx) 376 f.schedule(ctx, f.retryInterval) 377 return 378 } 379 380 // If the state doesn't exist yet then try to create it: 381 if !found { 382 var created bool 383 created, err = f.createState(ctx, now) 384 if err != nil { 385 f.logger.Error( 386 ctx, 387 "Process '%s' can't create initial state for flag: %v", 388 f.process, f.name, err, 389 ) 390 f.lower(ctx) 391 f.schedule(ctx, f.retryInterval) 392 return 393 } 394 if !created { 395 f.logger.Debug( 396 ctx, 397 "Process '%s' found a conflict when trying to create the initial "+ 398 "state for flag '%s'", 399 f.process, f.name, 400 ) 401 f.lower(ctx) 402 f.schedule(ctx, f.checkInterval) 403 return 404 } 405 f.logger.Info( 406 ctx, 407 "Process '%s' successfully created initial state for flag '%s'", 408 f.process, f.name, 409 ) 410 f.raise(ctx) 411 f.schedule(ctx, f.checkInterval) 412 return 413 } 414 415 // If we are here then the state already existed and we were able to load it. If we are 416 // the current holder then we should extend the renew time and make sure that the flag is 417 // raised. 418 if holder == f.process { 419 var updated bool 420 updated, err = f.updateTimestamp(ctx, version, now) 421 if err != nil { 422 f.logger.Error( 423 ctx, 424 "Process '%s' can't update the timestamp for flag '%s': %v", 425 f.process, f.name, err, 426 ) 427 f.lower(ctx) 428 f.schedule(ctx, f.retryInterval) 429 return 430 } 431 if !updated { 432 f.logger.Info( 433 ctx, 434 "Process '%s' found a conflict when trying to update the "+ 435 "timestamp for flag '%s'", 436 f.process, f.name, 437 ) 438 f.lower(ctx) 439 f.schedule(ctx, f.checkInterval) 440 return 441 } 442 f.logger.Debug( 443 ctx, 444 "Process '%s' successfully updated the timestamp for flag '%s'", 445 f.process, f.name, 446 ) 447 f.raise(ctx) 448 f.schedule(ctx, f.checkInterval) 449 return 450 } 451 452 // If we aren't the holder then we should check the timestamp and try to become the leader 453 // if it hasn't been updated recently enough: 454 excess := now.Sub(timestamp) - f.renewInterval 455 if excess > 0 { 456 f.logger.Info( 457 ctx, 458 "Process '%s' detected that flag '%s' is currently held by process '%s' "+ 459 "but it should have been renewed %s ago, will try to get hold "+ 460 "of it", 461 f.process, f.name, holder, excess, 462 ) 463 var updated bool 464 updated, err = f.updateHolder(ctx, version, now) 465 if err != nil { 466 f.logger.Error( 467 ctx, 468 "Process '%s' can't update holder for flag '%s': %v", 469 f.process, f.name, 470 ) 471 f.lower(ctx) 472 f.schedule(ctx, f.retryInterval) 473 return 474 } 475 if !updated { 476 f.logger.Info( 477 ctx, 478 "Process '%s' found a conflict when trying to update the holder "+ 479 "for flag '%s'", 480 f.process, f.name, 481 ) 482 f.lower(ctx) 483 f.schedule(ctx, f.checkInterval) 484 return 485 } 486 f.logger.Debug( 487 ctx, 488 "Process '%s' successfully updated holder for flag '%s'", 489 f.process, f.name, 490 ) 491 f.raise(ctx) 492 f.schedule(ctx, f.checkInterval) 493 return 494 } 495 496 // If we are here we aren't the holder, and the renew time isn't expired, so all we should 497 // do is check again later: 498 f.logger.Debug( 499 ctx, 500 "Process '%s' found that flag '%s' is currently held by process '%s' and it "+ 501 "should be renewed in %s", 502 f.process, f.name, holder, -excess, 503 ) 504 f.lower(ctx) 505 f.schedule(ctx, f.checkInterval) 506 } 507 508 // schedule programs the timer so that it fires in the given time from now. 509 func (f *Flag) schedule(ctx context.Context, d time.Duration) { 510 // Adjust the given duration adding or subtracting a random amount. For example, if the 511 // random factor given in the configuration is 0.1 will add or sustract up to a 10% of the 512 // duration. This is convenient to avoid having all the process doing their checks 513 // simultaneously when they have been started simultaneously. 514 factor := f.jitter * (1 - 2*rand.Float64()) 515 delta := time.Duration(float64(d) * factor) 516 d += delta 517 518 // Reset the timer: 519 f.logger.Debug(ctx, "Process '%s' will check flag '%s' in %s", f.process, f.name, d) 520 f.timer.Reset(d) 521 } 522 523 // now returns get the current time from the database, so that there is no need to synchornize the 524 // clocks of the machines that compete for the flag. 525 func (f *Flag) now(ctx context.Context) (result time.Time, err error) { 526 ctx, cancel := context.WithTimeout(ctx, f.timeout) 527 defer cancel() 528 row := f.handle.QueryRowContext(ctx, `select now()`) 529 var tmp time.Time 530 err = row.Scan(&tmp) 531 if err != nil { 532 return 533 } 534 result = tmp 535 return 536 } 537 538 // loadState tries to load the database state corresponding to this flag. It returns a flag 539 // indicating if the state was found and the values. 540 func (f *Flag) loadState(ctx context.Context) (found bool, holder string, version int64, 541 timestamp time.Time, err error) { 542 ctx, cancel := context.WithTimeout(ctx, f.timeout) 543 defer cancel() 544 row := f.handle.QueryRowContext( 545 ctx, 546 ` 547 select 548 holder, 549 version, 550 timestamp 551 from 552 leadership_flags 553 where 554 name = $1 555 `, 556 f.name, 557 ) 558 if err != nil { 559 return 560 } 561 var tmpHolder string 562 var tmpVersion int64 563 var tmpTimestamp time.Time 564 err = row.Scan( 565 &tmpHolder, 566 &tmpVersion, 567 &tmpTimestamp, 568 ) 569 if err != nil { 570 if errors.Is(err, sql.ErrNoRows) { 571 err = nil 572 } 573 return 574 } 575 found = true 576 holder = tmpHolder 577 version = tmpVersion 578 timestamp = tmpTimestamp 579 return 580 } 581 582 // createState tries to save the initial state of the flag. It returns a boolean indicating if the 583 // state was actually created. 584 func (f *Flag) createState(ctx context.Context, timestamp time.Time) (created bool, err error) { 585 ctx, cancel := context.WithTimeout(ctx, f.timeout) 586 defer cancel() 587 _, err = f.handle.ExecContext( 588 ctx, 589 ` 590 insert into leadership_flags ( 591 name, 592 holder, 593 version, 594 timestamp 595 ) values ( 596 $1, 597 $2, 598 0, 599 $3 600 ) 601 `, 602 f.name, 603 f.process, 604 timestamp, 605 ) 606 if err != nil { 607 // 23505 is the code corresponding to `unique_violation` condition. 608 if database.ErrorCode(err) == "23505" { 609 err = nil 610 } 611 return 612 } 613 created = true 614 return 615 } 616 617 // updateTimestamp tries to update the timestamp. 618 func (f *Flag) updateTimestamp(ctx context.Context, version int64, timestamp time.Time) (updated bool, 619 err error) { 620 ctx, cancel := context.WithTimeout(ctx, f.timeout) 621 defer cancel() 622 result, err := f.handle.ExecContext( 623 ctx, 624 ` 625 update 626 leadership_flags 627 set 628 version = $1, 629 timestamp = $2 630 where 631 name = $3 and 632 holder = $4 and 633 version = $5 634 `, 635 version+1, 636 timestamp, 637 f.name, 638 f.process, 639 version, 640 ) 641 if err != nil { 642 return 643 } 644 count, err := result.RowsAffected() 645 if err != nil { 646 return 647 } 648 updated = count == 1 649 return 650 } 651 652 // updateHolder tries to update the holder. 653 func (f *Flag) updateHolder(ctx context.Context, version int64, timestamp time.Time) (updated bool, 654 err error) { 655 ctx, cancel := context.WithTimeout(ctx, f.timeout) 656 defer cancel() 657 result, err := f.handle.ExecContext( 658 ctx, 659 ` 660 update 661 leadership_flags 662 set 663 version = $1, 664 holder = $2, 665 timestamp = $3 666 where 667 name = $4 and 668 version = $5 669 `, 670 version+1, 671 f.process, 672 timestamp, 673 f.name, 674 version, 675 ) 676 if err != nil { 677 return 678 } 679 count, err := result.RowsAffected() 680 if err != nil { 681 return 682 } 683 updated = count == 1 684 return 685 } 686 687 // raise raises the flag locally, without touching the database. 688 func (f *Flag) raise(ctx context.Context) { 689 old := atomic.SwapInt32(&f.value, 1) 690 if old == 0 { 691 f.logger.Debug( 692 ctx, 693 "Process '%s' is now holding flag '%s'", 694 f.process, f.name, 695 ) 696 } 697 if f.stateMetric != nil { 698 f.stateMetric.WithLabelValues(f.name, f.process).Set(1) 699 } 700 } 701 702 // lower lowers the flag locally, without touching the database. 703 func (f *Flag) lower(ctx context.Context) { 704 old := atomic.SwapInt32(&f.value, 0) 705 if old == 1 { 706 f.logger.Debug( 707 ctx, 708 "Process '%s' is no longer holding flag '%s'", 709 f.process, f.name, 710 ) 711 } 712 if f.stateMetric != nil { 713 f.stateMetric.WithLabelValues(f.name, f.process).Set(0) 714 } 715 } 716 717 // Defaults for configuration settings: 718 const ( 719 defaultFlagInterval = 30 * time.Second 720 defaultFlagTimeout = 1 * time.Second 721 defaultFlagJitter = 0.2 722 ) 723 724 // Names of the labels added to the metrics: 725 const ( 726 flagMetricsNameLabel = "name" 727 flagMetricsProcessLabel = "process" 728 ) 729 730 // Array of labels added to metrics: 731 var flagMetricsLabels = []string{ 732 flagMetricsNameLabel, 733 flagMetricsProcessLabel, 734 }