vitess.io/vitess@v0.16.2/go/vt/binlog/binlog_connection.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package binlog 18 19 import ( 20 crand "crypto/rand" 21 "fmt" 22 "math" 23 "math/big" 24 "sync" 25 26 "context" 27 28 "vitess.io/vitess/go/mysql" 29 "vitess.io/vitess/go/pools" 30 "vitess.io/vitess/go/vt/dbconfigs" 31 "vitess.io/vitess/go/vt/log" 32 ) 33 34 var ( 35 // ErrBinlogUnavailable is returned by this library when we 36 // cannot find a suitable binlog to satisfy the request. 37 ErrBinlogUnavailable = fmt.Errorf("cannot find relevant binlogs on this server") 38 ) 39 40 // BinlogConnection represents a connection to mysqld that pretends to be a replica 41 // connecting for replication. Each such connection must identify itself to 42 // mysqld with a server ID that is unique both among other BinlogConnections and 43 // among actual replicas in the topology. 44 // 45 //revive:disable because I'm not trying to refactor the entire code base right now 46 type BinlogConnection struct { 47 *mysql.Conn 48 cp dbconfigs.Connector 49 serverID uint32 50 cancel context.CancelFunc 51 wg sync.WaitGroup 52 } 53 54 // serverIDPool is the IDPool for server IDs used to connect as a replica. 55 // We randomize the initial value, so it is unlikely that we reuse serverID 56 // values when connecting to the same server (e.g. if it is external MySQL 57 // like RDS); but we DO want to reuse serverID values from the same 58 // client, or we will start orphaning binlog streaming connections 59 var serverIDPool = pools.NewIDPool(getRandomInitialServerID()) 60 61 func getRandomInitialServerID() uint32 { 62 // Leave some breathing room below MaxInt32 to generate IDs into 63 max := big.NewInt(math.MaxInt32 - 10000) 64 id, _ := crand.Int(crand.Reader, max) 65 return uint32(id.Int64()) 66 } 67 68 // NewBinlogConnection creates a new binlog connection to the mysqld instance. 69 func NewBinlogConnection(cp dbconfigs.Connector) (*BinlogConnection, error) { 70 conn, err := connectForReplication(cp) 71 if err != nil { 72 return nil, err 73 } 74 75 bc := &BinlogConnection{ 76 Conn: conn, 77 cp: cp, 78 serverID: serverIDPool.Get(), 79 } 80 log.Infof("new binlog connection: serverID=%d", bc.serverID) 81 return bc, nil 82 } 83 84 // connectForReplication create a MySQL connection ready to use for replication. 85 func connectForReplication(cp dbconfigs.Connector) (*mysql.Conn, error) { 86 ctx := context.Background() 87 conn, err := cp.Connect(ctx) 88 if err != nil { 89 return nil, err 90 } 91 // Tell the server that we understand the format of events 92 // that will be used if binlog_checksum is enabled on the server. 93 if _, err := conn.ExecuteFetch("SET @master_binlog_checksum=@@global.binlog_checksum", 0, false); err != nil { 94 return nil, fmt.Errorf("failed to set @master_binlog_checksum=@@global.binlog_checksum: %v", err) 95 } 96 97 return conn, nil 98 } 99 100 // StartBinlogDumpFromCurrent requests a replication binlog dump from 101 // the current position. 102 func (bc *BinlogConnection) StartBinlogDumpFromCurrent(ctx context.Context) (mysql.Position, <-chan mysql.BinlogEvent, <-chan error, error) { 103 ctx, bc.cancel = context.WithCancel(ctx) 104 105 position, err := bc.Conn.PrimaryPosition() 106 if err != nil { 107 return mysql.Position{}, nil, nil, fmt.Errorf("failed to get primary position: %v", err) 108 } 109 110 c, e, err := bc.StartBinlogDumpFromPosition(ctx, "", position) 111 return position, c, e, err 112 } 113 114 // StartBinlogDumpFromPosition requests a replication binlog dump from 115 // the replication source mysqld (typically the primary server in the cluster) 116 // at the given Position and then sends binlog 117 // events to the provided channel. 118 // The stream will continue in the background, waiting for new events if 119 // necessary, until the connection is closed, either by the replication source or 120 // by canceling the context. 121 // 122 // Note the context is valid and used until eventChan is closed. 123 func (bc *BinlogConnection) StartBinlogDumpFromPosition(ctx context.Context, binlogFilename string, startPos mysql.Position) (<-chan mysql.BinlogEvent, <-chan error, error) { 124 ctx, bc.cancel = context.WithCancel(ctx) 125 126 log.Infof("sending binlog dump command: startPos=%v, serverID=%v", startPos, bc.serverID) 127 if err := bc.SendBinlogDumpCommand(bc.serverID, binlogFilename, startPos); err != nil { 128 log.Errorf("couldn't send binlog dump command: %v", err) 129 return nil, nil, err 130 } 131 132 c, e := bc.streamEvents(ctx) 133 134 return c, e, nil 135 } 136 137 // streamEvents returns a channel on which events are streamed and a channel on 138 // which errors are propagated. 139 func (bc *BinlogConnection) streamEvents(ctx context.Context) (chan mysql.BinlogEvent, chan error) { 140 // FIXME(alainjobart) I think we can use a buffered channel for better performance. 141 eventChan := make(chan mysql.BinlogEvent) 142 errChan := make(chan error) 143 144 // Start reading events. 145 bc.wg.Add(1) 146 go func() { 147 defer func() { 148 close(eventChan) 149 close(errChan) 150 bc.wg.Done() 151 }() 152 for { 153 event, err := bc.Conn.ReadBinlogEvent() 154 if err != nil { 155 select { 156 case errChan <- err: 157 case <-ctx.Done(): 158 } 159 if sqlErr, ok := err.(*mysql.SQLError); ok && sqlErr.Number() == mysql.CRServerLost { 160 // CRServerLost = Lost connection to MySQL server during query 161 // This is not necessarily an error. It could just be that we closed 162 // the connection from outside. 163 log.Infof("connection closed during binlog stream (possibly intentional): %v", err) 164 return 165 } 166 log.Errorf("read error while streaming binlog events: %v", err) 167 return 168 } 169 170 select { 171 case eventChan <- event: 172 case <-ctx.Done(): 173 return 174 } 175 } 176 }() 177 return eventChan, errChan 178 } 179 180 // StartBinlogDumpFromBinlogBeforeTimestamp requests a replication 181 // binlog dump from the source mysqld starting with a file that has 182 // timestamps smaller than the provided timestamp, and then sends 183 // binlog events to the provided channel. 184 // 185 // The startup phase will list all the binary logs, and find the one 186 // that has events starting strictly before the provided timestamp. It 187 // will then start from there, and stream all events. It is the 188 // responsibility of the calling site to filter the events more. 189 // 190 // MySQL 5.6+ note: we need to do it that way because of the way the 191 // GTIDSet works. In the previous two streaming functions, we pass in 192 // the full GTIDSet (that has the list of all transactions seen in 193 // the replication stream). In this case, we don't know it, all we 194 // have is the binlog file names. We depend on parsing the first 195 // PREVIOUS_GTIDS_EVENT event in the logs to get it. So we need the 196 // caller to parse that event, and it can't be skipped because its 197 // timestamp is lower. Then, for each subsequent event, the caller 198 // also needs to add the event GTID to its GTIDSet. Otherwise it won't 199 // be correct ever. So the caller really needs to build up its GTIDSet 200 // along the entire file, not just for events whose timestamp is in a 201 // given range. 202 // 203 // The stream will continue in the background, waiting for new events if 204 // necessary, until the connection is closed, either by the source or 205 // by canceling the context. 206 // 207 // Note the context is valid and used until eventChan is closed. 208 func (bc *BinlogConnection) StartBinlogDumpFromBinlogBeforeTimestamp(ctx context.Context, timestamp int64) (<-chan mysql.BinlogEvent, <-chan error, error) { 209 ctx, bc.cancel = context.WithCancel(ctx) 210 211 filename, err := bc.findFileBeforeTimestamp(ctx, timestamp) 212 if err != nil { 213 return nil, nil, err 214 } 215 216 // Start dumping the logs. The position is '4' to skip the 217 // Binlog File Header. See this page for more info: 218 // https://dev.mysql.com/doc/internals/en/binlog-file.html 219 if err := bc.Conn.WriteComBinlogDump(bc.serverID, filename, 4, 0); err != nil { 220 return nil, nil, fmt.Errorf("failed to send the ComBinlogDump command: %v", err) 221 } 222 e, c := bc.streamEvents(ctx) 223 return e, c, nil 224 } 225 226 func (bc *BinlogConnection) findFileBeforeTimestamp(ctx context.Context, timestamp int64) (filename string, err error) { 227 // List the binlogs. 228 binlogs, err := bc.Conn.ExecuteFetch("SHOW BINARY LOGS", 1000, false) 229 if err != nil { 230 return "", fmt.Errorf("failed to SHOW BINARY LOGS: %v", err) 231 } 232 233 // Start with the most recent binlog file until we find the right event. 234 for binlogIndex := len(binlogs.Rows) - 1; binlogIndex >= 0; binlogIndex-- { 235 // Exit the loop early if context is canceled. 236 select { 237 case <-ctx.Done(): 238 return "", ctx.Err() 239 default: 240 } 241 242 filename := binlogs.Rows[binlogIndex][0].ToString() 243 blTimestamp, err := bc.getBinlogTimeStamp(filename) 244 if err != nil { 245 return "", err 246 } 247 if blTimestamp < timestamp { 248 // The binlog timestamp is older: we've found a good starting point. 249 return filename, nil 250 } 251 } 252 253 log.Errorf("couldn't find an old enough binlog to match timestamp >= %v (looked at %v files)", timestamp, len(binlogs.Rows)) 254 return "", ErrBinlogUnavailable 255 } 256 257 func (bc *BinlogConnection) getBinlogTimeStamp(filename string) (blTimestamp int64, err error) { 258 conn, err := connectForReplication(bc.cp) 259 if err != nil { 260 return 0, err 261 } 262 defer conn.Close() 263 264 if err := conn.WriteComBinlogDump(bc.serverID, filename, 4, 0); err != nil { 265 return 0, fmt.Errorf("failed to send the ComBinlogDump command: %v", err) 266 } 267 268 // Get the first event to get its timestamp. We skip 269 // events that don't have timestamps (although it seems 270 // most do anyway). 271 for { 272 event, err := conn.ReadBinlogEvent() 273 if err != nil { 274 return 0, fmt.Errorf("error reading binlog event %v: %v", filename, err) 275 } 276 if !event.IsValid() { 277 return 0, fmt.Errorf("first event from binlog %v is not valid", filename) 278 } 279 if ts := event.Timestamp(); ts > 0 { 280 return int64(ts), nil 281 } 282 } 283 } 284 285 // Close closes the binlog connection, which also signals an ongoing dump 286 // started with StartBinlogDump() to stop and close its BinlogEvent channel. 287 // The ID for the binlog connection is recycled back into the pool. 288 func (bc *BinlogConnection) Close() { 289 if bc.Conn != nil { 290 log.Infof("closing binlog socket to unblock reads") 291 bc.Conn.Close() 292 293 // bc.cancel is set at the beginning of the StartBinlogDump* 294 // methods. If we error out before then, it's nil. 295 // Note we also may error out before adding 1 to bc.wg, 296 // but then the Wait() still works. 297 if bc.cancel != nil { 298 log.Infof("waiting for binlog dump thread to end") 299 bc.cancel() 300 bc.wg.Wait() 301 bc.cancel = nil 302 } 303 304 log.Infof("closing binlog MySQL client with serverID %v. Will recycle ID.", bc.serverID) 305 bc.Conn = nil 306 serverIDPool.Put(bc.serverID) 307 } 308 }