vitess.io/vitess@v0.16.2/go/vt/vtorc/inst/downtime_dao.go (about) 1 /* 2 Copyright 2015 Shlomi Noach, courtesy Booking.com 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package inst 18 19 import ( 20 "fmt" 21 "time" 22 23 "vitess.io/vitess/go/vt/log" 24 25 "vitess.io/vitess/go/vt/vtorc/config" 26 "vitess.io/vitess/go/vt/vtorc/db" 27 ) 28 29 // BeginDowntime will make mark an instance as downtimed (or override existing downtime period) 30 func BeginDowntime(downtime *Downtime) (err error) { 31 if downtime.Duration == 0 { 32 downtime.Duration = config.MaintenanceExpireMinutes * time.Minute 33 } 34 if downtime.EndsAtString != "" { 35 _, err = db.ExecVTOrc(` 36 insert 37 into database_instance_downtime ( 38 hostname, port, downtime_active, begin_timestamp, end_timestamp, owner, reason 39 ) VALUES ( 40 ?, ?, 1, ?, ?, ?, ? 41 ) 42 on duplicate key update 43 downtime_active=values(downtime_active), 44 begin_timestamp=values(begin_timestamp), 45 end_timestamp=values(end_timestamp), 46 owner=values(owner), 47 reason=values(reason) 48 `, 49 downtime.Key.Hostname, 50 downtime.Key.Port, 51 downtime.BeginsAtString, 52 downtime.EndsAtString, 53 downtime.Owner, 54 downtime.Reason, 55 ) 56 } else { 57 if downtime.Ended() { 58 // No point in writing it down; it's expired 59 return nil 60 } 61 62 _, err = db.ExecVTOrc(` 63 insert 64 into database_instance_downtime ( 65 hostname, port, downtime_active, begin_timestamp, end_timestamp, owner, reason 66 ) VALUES ( 67 ?, ?, 1, NOW(), NOW() + INTERVAL ? SECOND, ?, ? 68 ) 69 on duplicate key update 70 downtime_active=values(downtime_active), 71 begin_timestamp=values(begin_timestamp), 72 end_timestamp=values(end_timestamp), 73 owner=values(owner), 74 reason=values(reason) 75 `, 76 downtime.Key.Hostname, 77 downtime.Key.Port, 78 int(downtime.EndsIn().Seconds()), 79 downtime.Owner, 80 downtime.Reason, 81 ) 82 } 83 if err != nil { 84 log.Error(err) 85 return err 86 } 87 _ = AuditOperation("begin-downtime", downtime.Key, fmt.Sprintf("owner: %s, reason: %s", downtime.Owner, downtime.Reason)) 88 89 return nil 90 } 91 92 // EndDowntime will remove downtime flag from an instance 93 func EndDowntime(instanceKey *InstanceKey) (wasDowntimed bool, err error) { 94 res, err := db.ExecVTOrc(` 95 delete from 96 database_instance_downtime 97 where 98 hostname = ? 99 and port = ? 100 `, 101 instanceKey.Hostname, 102 instanceKey.Port, 103 ) 104 if err != nil { 105 log.Error(err) 106 return wasDowntimed, err 107 } 108 109 if affected, _ := res.RowsAffected(); affected > 0 { 110 wasDowntimed = true 111 _ = AuditOperation("end-downtime", instanceKey, "") 112 } 113 return wasDowntimed, err 114 } 115 116 // renewLostInRecoveryDowntime renews hosts who are downtimed due to being lost in recovery, such that 117 // their downtime never expires. 118 func renewLostInRecoveryDowntime() error { 119 _, err := db.ExecVTOrc(` 120 update 121 database_instance_downtime 122 set 123 end_timestamp = NOW() + INTERVAL ? SECOND 124 where 125 end_timestamp > NOW() 126 and reason = ? 127 `, 128 config.LostInRecoveryDowntimeSeconds, 129 DowntimeLostInRecoveryMessage, 130 ) 131 132 return err 133 } 134 135 // expireLostInRecoveryDowntime expires downtime for servers who have been lost in recovery in the last, 136 // but are now replicating. 137 func expireLostInRecoveryDowntime() error { 138 instances, err := ReadLostInRecoveryInstances("", "") 139 if err != nil { 140 return err 141 } 142 if len(instances) == 0 { 143 return nil 144 } 145 for _, instance := range instances { 146 // We _may_ expire this downtime, but only after a minute 147 // This is a graceful period, during which other servers can claim ownership of the alias, 148 // or can update their own cluster name to match a new primary's name 149 if instance.ElapsedDowntime < time.Minute { 150 continue 151 } 152 if !instance.IsLastCheckValid { 153 continue 154 } 155 if instance.ReplicaRunning() { 156 // back, alive, replicating in some topology 157 if _, err := EndDowntime(&instance.Key); err != nil { 158 return err 159 } 160 } 161 } 162 return nil 163 } 164 165 // ExpireDowntime will remove the maintenance flag on old downtimes 166 func ExpireDowntime() error { 167 if err := renewLostInRecoveryDowntime(); err != nil { 168 log.Error(err) 169 return err 170 } 171 if err := expireLostInRecoveryDowntime(); err != nil { 172 log.Error(err) 173 return err 174 } 175 { 176 res, err := db.ExecVTOrc(` 177 delete from 178 database_instance_downtime 179 where 180 end_timestamp < NOW() 181 `, 182 ) 183 if err != nil { 184 log.Error(err) 185 return err 186 } 187 if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 { 188 _ = AuditOperation("expire-downtime", nil, fmt.Sprintf("Expired %d entries", rowsAffected)) 189 } 190 } 191 192 return nil 193 }