vitess.io/vitess@v0.16.2/go/vt/vtorc/inst/downtime_dao.go (about)

     1  /*
     2     Copyright 2015 Shlomi Noach, courtesy Booking.com
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  package inst
    18  
    19  import (
    20  	"fmt"
    21  	"time"
    22  
    23  	"vitess.io/vitess/go/vt/log"
    24  
    25  	"vitess.io/vitess/go/vt/vtorc/config"
    26  	"vitess.io/vitess/go/vt/vtorc/db"
    27  )
    28  
    29  // BeginDowntime will make mark an instance as downtimed (or override existing downtime period)
    30  func BeginDowntime(downtime *Downtime) (err error) {
    31  	if downtime.Duration == 0 {
    32  		downtime.Duration = config.MaintenanceExpireMinutes * time.Minute
    33  	}
    34  	if downtime.EndsAtString != "" {
    35  		_, err = db.ExecVTOrc(`
    36  				insert
    37  					into database_instance_downtime (
    38  						hostname, port, downtime_active, begin_timestamp, end_timestamp, owner, reason
    39  					) VALUES (
    40  						?, ?, 1, ?, ?, ?, ?
    41  					)
    42  					on duplicate key update
    43  						downtime_active=values(downtime_active),
    44  						begin_timestamp=values(begin_timestamp),
    45  						end_timestamp=values(end_timestamp),
    46  						owner=values(owner),
    47  						reason=values(reason)
    48  				`,
    49  			downtime.Key.Hostname,
    50  			downtime.Key.Port,
    51  			downtime.BeginsAtString,
    52  			downtime.EndsAtString,
    53  			downtime.Owner,
    54  			downtime.Reason,
    55  		)
    56  	} else {
    57  		if downtime.Ended() {
    58  			// No point in writing it down; it's expired
    59  			return nil
    60  		}
    61  
    62  		_, err = db.ExecVTOrc(`
    63  			insert
    64  				into database_instance_downtime (
    65  					hostname, port, downtime_active, begin_timestamp, end_timestamp, owner, reason
    66  				) VALUES (
    67  					?, ?, 1, NOW(), NOW() + INTERVAL ? SECOND, ?, ?
    68  				)
    69  				on duplicate key update
    70  					downtime_active=values(downtime_active),
    71  					begin_timestamp=values(begin_timestamp),
    72  					end_timestamp=values(end_timestamp),
    73  					owner=values(owner),
    74  					reason=values(reason)
    75  			`,
    76  			downtime.Key.Hostname,
    77  			downtime.Key.Port,
    78  			int(downtime.EndsIn().Seconds()),
    79  			downtime.Owner,
    80  			downtime.Reason,
    81  		)
    82  	}
    83  	if err != nil {
    84  		log.Error(err)
    85  		return err
    86  	}
    87  	_ = AuditOperation("begin-downtime", downtime.Key, fmt.Sprintf("owner: %s, reason: %s", downtime.Owner, downtime.Reason))
    88  
    89  	return nil
    90  }
    91  
    92  // EndDowntime will remove downtime flag from an instance
    93  func EndDowntime(instanceKey *InstanceKey) (wasDowntimed bool, err error) {
    94  	res, err := db.ExecVTOrc(`
    95  			delete from
    96  				database_instance_downtime
    97  			where
    98  				hostname = ?
    99  				and port = ?
   100  			`,
   101  		instanceKey.Hostname,
   102  		instanceKey.Port,
   103  	)
   104  	if err != nil {
   105  		log.Error(err)
   106  		return wasDowntimed, err
   107  	}
   108  
   109  	if affected, _ := res.RowsAffected(); affected > 0 {
   110  		wasDowntimed = true
   111  		_ = AuditOperation("end-downtime", instanceKey, "")
   112  	}
   113  	return wasDowntimed, err
   114  }
   115  
   116  // renewLostInRecoveryDowntime renews hosts who are downtimed due to being lost in recovery, such that
   117  // their downtime never expires.
   118  func renewLostInRecoveryDowntime() error {
   119  	_, err := db.ExecVTOrc(`
   120  			update
   121  				database_instance_downtime
   122  			set
   123  				end_timestamp = NOW() + INTERVAL ? SECOND
   124  			where
   125  				end_timestamp > NOW()
   126  				and reason = ?
   127  			`,
   128  		config.LostInRecoveryDowntimeSeconds,
   129  		DowntimeLostInRecoveryMessage,
   130  	)
   131  
   132  	return err
   133  }
   134  
   135  // expireLostInRecoveryDowntime expires downtime for servers who have been lost in recovery in the last,
   136  // but are now replicating.
   137  func expireLostInRecoveryDowntime() error {
   138  	instances, err := ReadLostInRecoveryInstances("", "")
   139  	if err != nil {
   140  		return err
   141  	}
   142  	if len(instances) == 0 {
   143  		return nil
   144  	}
   145  	for _, instance := range instances {
   146  		// We _may_ expire this downtime, but only after a minute
   147  		// This is a graceful period, during which other servers can claim ownership of the alias,
   148  		// or can update their own cluster name to match a new primary's name
   149  		if instance.ElapsedDowntime < time.Minute {
   150  			continue
   151  		}
   152  		if !instance.IsLastCheckValid {
   153  			continue
   154  		}
   155  		if instance.ReplicaRunning() {
   156  			// back, alive, replicating in some topology
   157  			if _, err := EndDowntime(&instance.Key); err != nil {
   158  				return err
   159  			}
   160  		}
   161  	}
   162  	return nil
   163  }
   164  
   165  // ExpireDowntime will remove the maintenance flag on old downtimes
   166  func ExpireDowntime() error {
   167  	if err := renewLostInRecoveryDowntime(); err != nil {
   168  		log.Error(err)
   169  		return err
   170  	}
   171  	if err := expireLostInRecoveryDowntime(); err != nil {
   172  		log.Error(err)
   173  		return err
   174  	}
   175  	{
   176  		res, err := db.ExecVTOrc(`
   177  			delete from
   178  				database_instance_downtime
   179  			where
   180  				end_timestamp < NOW()
   181  			`,
   182  		)
   183  		if err != nil {
   184  			log.Error(err)
   185  			return err
   186  		}
   187  		if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 {
   188  			_ = AuditOperation("expire-downtime", nil, fmt.Sprintf("Expired %d entries", rowsAffected))
   189  		}
   190  	}
   191  
   192  	return nil
   193  }