github.com/percona/percona-xtradb-cluster-operator@v1.14.0/cmd/pitr/collector/collector.go (about) 1 package collector 2 3 import ( 4 "bufio" 5 "bytes" 6 "context" 7 "crypto/md5" 8 "fmt" 9 "io" 10 "log" 11 "os" 12 "os/exec" 13 "strings" 14 "syscall" 15 "time" 16 17 "github.com/go-sql-driver/mysql" 18 "github.com/pkg/errors" 19 20 "github.com/percona/percona-xtradb-cluster-operator/cmd/pitr/pxc" 21 "github.com/percona/percona-xtradb-cluster-operator/pkg/pxc/backup/storage" 22 ) 23 24 type Collector struct { 25 db *pxc.PXC 26 storage storage.Storage 27 lastUploadedSet pxc.GTIDSet // last uploaded binary logs set 28 pxcServiceName string // k8s service name for PXC, its for get correct host for connection 29 pxcUser string // user for connection to PXC 30 pxcPass string // password for connection to PXC 31 } 32 33 type Config struct { 34 PXCServiceName string `env:"PXC_SERVICE,required"` 35 PXCUser string `env:"PXC_USER,required"` 36 PXCPass string `env:"PXC_PASS,required"` 37 StorageType string `env:"STORAGE_TYPE,required"` 38 BackupStorageS3 BackupS3 39 BackupStorageAzure BackupAzure 40 BufferSize int64 `env:"BUFFER_SIZE"` 41 CollectSpanSec float64 `env:"COLLECT_SPAN_SEC" envDefault:"60"` 42 VerifyTLS bool `env:"VERIFY_TLS" envDefault:"true"` 43 TimeoutSeconds float64 `env:"TIMEOUT_SECONDS" envDefault:"60"` 44 } 45 46 type BackupS3 struct { 47 Endpoint string `env:"ENDPOINT" envDefault:"s3.amazonaws.com"` 48 AccessKeyID string `env:"ACCESS_KEY_ID,required"` 49 AccessKey string `env:"SECRET_ACCESS_KEY,required"` 50 BucketURL string `env:"S3_BUCKET_URL,required"` 51 Region string `env:"DEFAULT_REGION,required"` 52 } 53 54 type BackupAzure struct { 55 Endpoint string `env:"AZURE_ENDPOINT,required"` 56 ContainerPath string `env:"AZURE_CONTAINER_PATH,required"` 57 StorageClass string `env:"AZURE_STORAGE_CLASS"` 58 AccountName string `env:"AZURE_STORAGE_ACCOUNT,required"` 59 AccountKey string `env:"AZURE_ACCESS_KEY,required"` 60 } 61 62 const ( 63 lastSetFilePrefix string = "last-binlog-set-" // filename prefix for object where the last binlog set will stored 64 gtidPostfix string = "-gtid-set" // filename postfix for files with GTID set 65 timelinePath string = "/tmp/pitr-timeline" // path to file with timeline 66 ) 67 68 func New(ctx context.Context, c Config) (*Collector, error) { 69 var s storage.Storage 70 var err error 71 switch c.StorageType { 72 case "s3": 73 bucketArr := strings.Split(c.BackupStorageS3.BucketURL, "/") 74 prefix := "" 75 // if c.S3BucketURL looks like "my-bucket/data/more-data" we need prefix to be "data/more-data/" 76 if len(bucketArr) > 1 { 77 prefix = strings.TrimPrefix(c.BackupStorageS3.BucketURL, bucketArr[0]+"/") + "/" 78 } 79 s, err = storage.NewS3(ctx, c.BackupStorageS3.Endpoint, c.BackupStorageS3.AccessKeyID, c.BackupStorageS3.AccessKey, bucketArr[0], prefix, c.BackupStorageS3.Region, c.VerifyTLS) 80 if err != nil { 81 return nil, errors.Wrap(err, "new storage manager") 82 } 83 case "azure": 84 container, prefix, _ := strings.Cut(c.BackupStorageAzure.ContainerPath, "/") 85 if prefix != "" { 86 prefix += "/" 87 } 88 s, err = storage.NewAzure(c.BackupStorageAzure.AccountName, c.BackupStorageAzure.AccountKey, c.BackupStorageAzure.Endpoint, container, prefix) 89 if err != nil { 90 return nil, errors.Wrap(err, "new azure storage") 91 } 92 default: 93 return nil, errors.New("unknown STORAGE_TYPE") 94 } 95 96 return &Collector{ 97 storage: s, 98 pxcUser: c.PXCUser, 99 pxcServiceName: c.PXCServiceName, 100 }, nil 101 } 102 103 func (c *Collector) Run(ctx context.Context) error { 104 err := c.newDB(ctx) 105 if err != nil { 106 return errors.Wrap(err, "new db connection") 107 } 108 defer c.close() 109 110 // remove last set because we always 111 // read it from aws file 112 c.lastUploadedSet = pxc.NewGTIDSet("") 113 114 err = c.CollectBinLogs(ctx) 115 if err != nil { 116 return errors.Wrap(err, "collect binlog files") 117 } 118 119 return nil 120 } 121 122 func (c *Collector) lastGTIDSet(ctx context.Context, suffix string) (pxc.GTIDSet, error) { 123 // get last binlog set stored on S3 124 lastSetObject, err := c.storage.GetObject(ctx, lastSetFilePrefix+suffix) 125 if err != nil { 126 if err == storage.ErrObjectNotFound { 127 return pxc.GTIDSet{}, nil 128 } 129 return pxc.GTIDSet{}, errors.Wrap(err, "get last set content") 130 } 131 lastSet, err := io.ReadAll(lastSetObject) 132 if err != nil { 133 return pxc.GTIDSet{}, errors.Wrap(err, "read last gtid set") 134 } 135 return pxc.NewGTIDSet(string(lastSet)), nil 136 } 137 138 func (c *Collector) newDB(ctx context.Context) error { 139 file, err := os.Open("/etc/mysql/mysql-users-secret/xtrabackup") 140 if err != nil { 141 return errors.Wrap(err, "open file") 142 } 143 pxcPass, err := io.ReadAll(file) 144 if err != nil { 145 return errors.Wrap(err, "read password") 146 } 147 c.pxcPass = string(pxcPass) 148 149 host, err := pxc.GetPXCOldestBinlogHost(ctx, c.pxcServiceName, c.pxcUser, c.pxcPass) 150 if err != nil { 151 return errors.Wrap(err, "get host") 152 } 153 154 log.Println("Reading binlogs from pxc with hostname=", host) 155 156 c.db, err = pxc.NewPXC(host, c.pxcUser, c.pxcPass) 157 if err != nil { 158 return errors.Wrapf(err, "new manager with host %s", host) 159 } 160 161 return nil 162 } 163 164 func (c *Collector) close() error { 165 return c.db.Close() 166 } 167 168 func (c *Collector) removeEmptyBinlogs(ctx context.Context, logs []pxc.Binlog) ([]pxc.Binlog, error) { 169 result := make([]pxc.Binlog, 0) 170 for _, v := range logs { 171 if !v.GTIDSet.IsEmpty() { 172 result = append(result, v) 173 } 174 } 175 return result, nil 176 } 177 178 func (c *Collector) filterBinLogs(ctx context.Context, logs []pxc.Binlog, lastBinlogName string) ([]pxc.Binlog, error) { 179 if lastBinlogName == "" { 180 return c.removeEmptyBinlogs(ctx, logs) 181 } 182 183 logsLen := len(logs) 184 185 startIndex := 0 186 for logs[startIndex].Name != lastBinlogName && startIndex < logsLen { 187 startIndex++ 188 } 189 190 if startIndex == logsLen { 191 return nil, nil 192 } 193 194 set, err := c.db.GetGTIDSet(ctx, logs[startIndex].Name) 195 if err != nil { 196 return nil, errors.Wrap(err, "get gtid set of last uploaded binlog") 197 } 198 // we don't need to reupload last file 199 // if gtid set is not changed 200 if set == c.lastUploadedSet.Raw() { 201 startIndex++ 202 } 203 204 return c.removeEmptyBinlogs(ctx, logs[startIndex:]) 205 } 206 207 func createGapFile(gtidSet pxc.GTIDSet) error { 208 p := "/tmp/gap-detected" 209 f, err := os.Create(p) 210 if err != nil { 211 return errors.Wrapf(err, "create %s", p) 212 } 213 214 _, err = f.WriteString(gtidSet.Raw()) 215 if err != nil { 216 return errors.Wrapf(err, "write GTID set to %s", p) 217 } 218 219 return nil 220 } 221 222 func fileExists(name string) (bool, error) { 223 _, err := os.Stat(name) 224 if err != nil { 225 if os.IsNotExist(err) { 226 return false, nil 227 } 228 return false, errors.Wrap(err, "os stat") 229 } 230 return true, nil 231 } 232 233 func createTimelineFile(firstTs string) error { 234 f, err := os.Create(timelinePath) 235 if err != nil { 236 return errors.Wrapf(err, "create %s", timelinePath) 237 } 238 239 _, err = f.WriteString(firstTs) 240 if err != nil { 241 return errors.Wrap(err, "write first timestamp to timeline file") 242 } 243 244 return nil 245 } 246 247 func updateTimelineFile(lastTs string) error { 248 f, err := os.OpenFile(timelinePath, os.O_RDWR, 0o644) 249 if err != nil { 250 return errors.Wrapf(err, "open %s", timelinePath) 251 } 252 defer f.Close() 253 254 var lines []string 255 scanner := bufio.NewScanner(f) 256 for scanner.Scan() { 257 lines = append(lines, scanner.Text()) 258 } 259 260 if err := scanner.Err(); err != nil { 261 return errors.Wrapf(err, "scan %s", timelinePath) 262 } 263 264 if len(lines) > 1 { 265 lines[len(lines)-1] = lastTs 266 } else { 267 lines = append(lines, lastTs) 268 } 269 270 if _, err := f.Seek(0, 0); err != nil { 271 return errors.Wrapf(err, "seek %s", timelinePath) 272 } 273 274 if err := f.Truncate(0); err != nil { 275 return errors.Wrapf(err, "truncate %s", timelinePath) 276 } 277 278 _, err = f.WriteString(strings.Join(lines, "\n")) 279 if err != nil { 280 return errors.Wrap(err, "write last timestamp to timeline file") 281 } 282 283 return nil 284 } 285 286 func (c *Collector) addGTIDSets(ctx context.Context, logs []pxc.Binlog) error { 287 for i, v := range logs { 288 set, err := c.db.GetGTIDSet(ctx, v.Name) 289 if err != nil { 290 if errors.Is(err, &mysql.MySQLError{Number: 3200}) { 291 log.Printf("ERROR: Binlog file %s is invalid on host %s: %s\n", v.Name, c.db.GetHost(), err.Error()) 292 continue 293 } 294 return errors.Wrap(err, "get GTID set") 295 } 296 logs[i].GTIDSet = pxc.NewGTIDSet(set) 297 } 298 return nil 299 } 300 301 func (c *Collector) CollectBinLogs(ctx context.Context) error { 302 list, err := c.db.GetBinLogList(ctx) 303 if err != nil { 304 return errors.Wrap(err, "get binlog list") 305 } 306 err = c.addGTIDSets(ctx, list) 307 if err != nil { 308 return errors.Wrap(err, "get GTID sets") 309 } 310 var lastGTIDSetList []string 311 for i := len(list) - 1; i >= 0 && len(lastGTIDSetList) == 0; i-- { 312 gtidSetList := list[i].GTIDSet.List() 313 if gtidSetList == nil { 314 continue 315 } 316 lastGTIDSetList = gtidSetList 317 } 318 319 if len(lastGTIDSetList) == 0 { 320 log.Println("No binlogs to upload") 321 return nil 322 } 323 324 for _, gtidSet := range lastGTIDSetList { 325 sourceID := strings.Split(gtidSet, ":")[0] 326 c.lastUploadedSet, err = c.lastGTIDSet(ctx, sourceID) 327 if err != nil { 328 return errors.Wrap(err, "get last uploaded gtid set") 329 } 330 if !c.lastUploadedSet.IsEmpty() { 331 break 332 } 333 } 334 335 lastUploadedBinlogName := "" 336 337 if !c.lastUploadedSet.IsEmpty() { 338 for i := len(list) - 1; i >= 0 && lastUploadedBinlogName == ""; i-- { 339 for _, gtidSet := range list[i].GTIDSet.List() { 340 if lastUploadedBinlogName != "" { 341 break 342 } 343 for _, lastUploaded := range c.lastUploadedSet.List() { 344 isSubset, err := c.db.GTIDSubset(ctx, lastUploaded, gtidSet) 345 if err != nil { 346 return errors.Wrap(err, "check if gtid set is subset") 347 } 348 if isSubset { 349 lastUploadedBinlogName = list[i].Name 350 break 351 } 352 isSubset, err = c.db.GTIDSubset(ctx, gtidSet, lastUploaded) 353 if err != nil { 354 return errors.Wrap(err, "check if gtid set is subset") 355 } 356 if isSubset { 357 lastUploadedBinlogName = list[i].Name 358 break 359 } 360 } 361 } 362 } 363 364 if lastUploadedBinlogName == "" { 365 log.Println("ERROR: Couldn't find the binlog that contains GTID set:", c.lastUploadedSet.Raw()) 366 log.Println("ERROR: Gap detected in the binary logs. Binary logs will be uploaded anyway, but full backup needed for consistent recovery.") 367 if err := createGapFile(c.lastUploadedSet); err != nil { 368 return errors.Wrap(err, "create gap file") 369 } 370 } 371 } 372 373 list, err = c.filterBinLogs(ctx, list, lastUploadedBinlogName) 374 if err != nil { 375 return errors.Wrap(err, "filter empty binlogs") 376 } 377 378 if len(list) == 0 { 379 log.Println("No binlogs to upload") 380 return nil 381 } 382 383 if exists, err := fileExists(timelinePath); !exists && err == nil { 384 firstTs, err := c.db.GetBinLogFirstTimestamp(ctx, list[0].Name) 385 if err != nil { 386 return errors.Wrap(err, "get first timestamp") 387 } 388 389 if err := createTimelineFile(firstTs); err != nil { 390 return errors.Wrap(err, "create timeline file") 391 } 392 } 393 394 for _, binlog := range list { 395 err = c.manageBinlog(ctx, binlog) 396 if err != nil { 397 return errors.Wrap(err, "manage binlog") 398 } 399 400 lastTs, err := c.db.GetBinLogLastTimestamp(ctx, binlog.Name) 401 if err != nil { 402 return errors.Wrap(err, "get last timestamp") 403 } 404 405 if err := updateTimelineFile(lastTs); err != nil { 406 return errors.Wrap(err, "update timeline file") 407 } 408 } 409 return nil 410 } 411 412 func mergeErrors(a, b error) error { 413 if a != nil && b != nil { 414 return errors.New(a.Error() + "; " + b.Error()) 415 } 416 if a != nil { 417 return a 418 } 419 420 return b 421 } 422 423 func (c *Collector) manageBinlog(ctx context.Context, binlog pxc.Binlog) (err error) { 424 binlogTmstmp, err := c.db.GetBinLogFirstTimestamp(ctx, binlog.Name) 425 if err != nil { 426 return errors.Wrapf(err, "get first timestamp for %s", binlog.Name) 427 } 428 429 binlogName := fmt.Sprintf("binlog_%s_%x", binlogTmstmp, md5.Sum([]byte(binlog.GTIDSet.Raw()))) 430 431 var setBuffer bytes.Buffer 432 // no error handling because WriteString() always return nil error 433 // nolint:errcheck 434 setBuffer.WriteString(binlog.GTIDSet.Raw()) 435 436 tmpDir := os.TempDir() + "/" 437 438 err = os.Remove(tmpDir + binlog.Name) 439 if err != nil && !os.IsNotExist(err) { 440 return errors.Wrap(err, "remove temp file") 441 } 442 443 err = syscall.Mkfifo(tmpDir+binlog.Name, 0o666) 444 if err != nil { 445 return errors.Wrap(err, "make named pipe file error") 446 } 447 448 errBuf := &bytes.Buffer{} 449 cmd := exec.CommandContext(ctx, "mysqlbinlog", "-R", "-P", "33062", "--raw", "-h"+c.db.GetHost(), "-u"+c.pxcUser, binlog.Name) 450 cmd.Env = append(cmd.Env, "MYSQL_PWD="+c.pxcPass) 451 cmd.Dir = os.TempDir() 452 cmd.Stderr = errBuf 453 454 err = cmd.Start() 455 if err != nil { 456 return errors.Wrap(err, "run mysqlbinlog command") 457 } 458 459 log.Println("Starting to process binlog with name", binlog.Name) 460 461 file, err := os.OpenFile(tmpDir+binlog.Name, os.O_RDONLY, os.ModeNamedPipe) 462 if err != nil { 463 return errors.Wrap(err, "open named pipe file error") 464 } 465 466 defer func() { 467 errC := file.Close() 468 if errC != nil { 469 err = mergeErrors(err, errors.Wrapf(errC, "close tmp file for %s", binlog.Name)) 470 return 471 } 472 errR := os.Remove(tmpDir + binlog.Name) 473 if errR != nil { 474 err = mergeErrors(err, errors.Wrapf(errR, "remove tmp file for %s", binlog.Name)) 475 return 476 } 477 }() 478 479 // create a pipe to transfer data from the binlog pipe to s3 480 pr, pw := io.Pipe() 481 482 go readBinlog(file, pw, errBuf, binlog.Name) 483 484 err = c.storage.PutObject(ctx, binlogName, pr, -1) 485 if err != nil { 486 return errors.Wrapf(err, "put %s object", binlog.Name) 487 } 488 489 log.Println("Successfully written binlog file", binlog.Name, "to s3 with name", binlogName) 490 491 err = cmd.Wait() 492 if err != nil { 493 return errors.Wrap(err, "wait mysqlbinlog command error:"+errBuf.String()) 494 } 495 496 err = c.storage.PutObject(ctx, binlogName+gtidPostfix, &setBuffer, int64(setBuffer.Len())) 497 if err != nil { 498 return errors.Wrap(err, "put gtid-set object") 499 } 500 for _, gtidSet := range binlog.GTIDSet.List() { 501 // no error handling because WriteString() always return nil error 502 // nolint:errcheck 503 setBuffer.WriteString(binlog.GTIDSet.Raw()) 504 505 err = c.storage.PutObject(ctx, lastSetFilePrefix+strings.Split(gtidSet, ":")[0], &setBuffer, int64(setBuffer.Len())) 506 if err != nil { 507 return errors.Wrap(err, "put last-set object") 508 } 509 } 510 c.lastUploadedSet = binlog.GTIDSet 511 512 return nil 513 } 514 515 func readBinlog(file *os.File, pipe *io.PipeWriter, errBuf *bytes.Buffer, binlogName string) { 516 b := make([]byte, 10485760) // alloc buffer for 10mb 517 518 // in case of binlog is slow and hasn't written anything to the file yet 519 // we have to skip this error and try to read again until some data appears 520 isEmpty := true 521 for { 522 if errBuf.Len() != 0 { 523 // stop reading since we receive error from binlog command in stderr 524 // no error handling because CloseWithError() always return nil error 525 // nolint:errcheck 526 pipe.CloseWithError(errors.Errorf("Error: mysqlbinlog %s", errBuf.String())) 527 return 528 } 529 n, err := file.Read(b) 530 if err == io.EOF { 531 // If we got EOF immediately after starting to read a file we should skip it since 532 // data has not appeared yet. If we receive EOF error after already got some data - then exit. 533 if isEmpty { 534 time.Sleep(10 * time.Millisecond) 535 continue 536 } 537 break 538 } 539 if err != nil && !strings.Contains(err.Error(), "file already closed") { 540 // no error handling because CloseWithError() always return nil error 541 // nolint:errcheck 542 pipe.CloseWithError(errors.Wrapf(err, "Error: reading named pipe for %s", binlogName)) 543 return 544 } 545 if n == 0 { 546 time.Sleep(10 * time.Millisecond) 547 continue 548 } 549 _, err = pipe.Write(b[:n]) 550 if err != nil { 551 // no error handling because CloseWithError() always return nil error 552 // nolint:errcheck 553 pipe.CloseWithError(errors.Wrapf(err, "Error: write to pipe for %s", binlogName)) 554 return 555 } 556 isEmpty = false 557 } 558 // in case of any errors from mysqlbinlog it sends EOF to pipe 559 // to prevent this, need to check error buffer before closing pipe without error 560 if errBuf.Len() != 0 { 561 // no error handling because CloseWithError() always return nil error 562 // nolint:errcheck 563 pipe.CloseWithError(errors.New("mysqlbinlog error:" + errBuf.String())) 564 return 565 } 566 // no error handling because Close() always return nil error 567 // nolint:errcheck 568 pipe.Close() 569 }