github.com/percona/percona-xtradb-cluster-operator@v1.14.0/cmd/pitr/collector/collector.go (about)

     1  package collector
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"context"
     7  	"crypto/md5"
     8  	"fmt"
     9  	"io"
    10  	"log"
    11  	"os"
    12  	"os/exec"
    13  	"strings"
    14  	"syscall"
    15  	"time"
    16  
    17  	"github.com/go-sql-driver/mysql"
    18  	"github.com/pkg/errors"
    19  
    20  	"github.com/percona/percona-xtradb-cluster-operator/cmd/pitr/pxc"
    21  	"github.com/percona/percona-xtradb-cluster-operator/pkg/pxc/backup/storage"
    22  )
    23  
    24  type Collector struct {
    25  	db              *pxc.PXC
    26  	storage         storage.Storage
    27  	lastUploadedSet pxc.GTIDSet // last uploaded binary logs set
    28  	pxcServiceName  string      // k8s service name for PXC, its for get correct host for connection
    29  	pxcUser         string      // user for connection to PXC
    30  	pxcPass         string      // password for connection to PXC
    31  }
    32  
    33  type Config struct {
    34  	PXCServiceName     string `env:"PXC_SERVICE,required"`
    35  	PXCUser            string `env:"PXC_USER,required"`
    36  	PXCPass            string `env:"PXC_PASS,required"`
    37  	StorageType        string `env:"STORAGE_TYPE,required"`
    38  	BackupStorageS3    BackupS3
    39  	BackupStorageAzure BackupAzure
    40  	BufferSize         int64   `env:"BUFFER_SIZE"`
    41  	CollectSpanSec     float64 `env:"COLLECT_SPAN_SEC" envDefault:"60"`
    42  	VerifyTLS          bool    `env:"VERIFY_TLS" envDefault:"true"`
    43  	TimeoutSeconds     float64 `env:"TIMEOUT_SECONDS" envDefault:"60"`
    44  }
    45  
    46  type BackupS3 struct {
    47  	Endpoint    string `env:"ENDPOINT" envDefault:"s3.amazonaws.com"`
    48  	AccessKeyID string `env:"ACCESS_KEY_ID,required"`
    49  	AccessKey   string `env:"SECRET_ACCESS_KEY,required"`
    50  	BucketURL   string `env:"S3_BUCKET_URL,required"`
    51  	Region      string `env:"DEFAULT_REGION,required"`
    52  }
    53  
    54  type BackupAzure struct {
    55  	Endpoint      string `env:"AZURE_ENDPOINT,required"`
    56  	ContainerPath string `env:"AZURE_CONTAINER_PATH,required"`
    57  	StorageClass  string `env:"AZURE_STORAGE_CLASS"`
    58  	AccountName   string `env:"AZURE_STORAGE_ACCOUNT,required"`
    59  	AccountKey    string `env:"AZURE_ACCESS_KEY,required"`
    60  }
    61  
    62  const (
    63  	lastSetFilePrefix string = "last-binlog-set-"   // filename prefix for object where the last binlog set will stored
    64  	gtidPostfix       string = "-gtid-set"          // filename postfix for files with GTID set
    65  	timelinePath      string = "/tmp/pitr-timeline" // path to file with timeline
    66  )
    67  
    68  func New(ctx context.Context, c Config) (*Collector, error) {
    69  	var s storage.Storage
    70  	var err error
    71  	switch c.StorageType {
    72  	case "s3":
    73  		bucketArr := strings.Split(c.BackupStorageS3.BucketURL, "/")
    74  		prefix := ""
    75  		// if c.S3BucketURL looks like "my-bucket/data/more-data" we need prefix to be "data/more-data/"
    76  		if len(bucketArr) > 1 {
    77  			prefix = strings.TrimPrefix(c.BackupStorageS3.BucketURL, bucketArr[0]+"/") + "/"
    78  		}
    79  		s, err = storage.NewS3(ctx, c.BackupStorageS3.Endpoint, c.BackupStorageS3.AccessKeyID, c.BackupStorageS3.AccessKey, bucketArr[0], prefix, c.BackupStorageS3.Region, c.VerifyTLS)
    80  		if err != nil {
    81  			return nil, errors.Wrap(err, "new storage manager")
    82  		}
    83  	case "azure":
    84  		container, prefix, _ := strings.Cut(c.BackupStorageAzure.ContainerPath, "/")
    85  		if prefix != "" {
    86  			prefix += "/"
    87  		}
    88  		s, err = storage.NewAzure(c.BackupStorageAzure.AccountName, c.BackupStorageAzure.AccountKey, c.BackupStorageAzure.Endpoint, container, prefix)
    89  		if err != nil {
    90  			return nil, errors.Wrap(err, "new azure storage")
    91  		}
    92  	default:
    93  		return nil, errors.New("unknown STORAGE_TYPE")
    94  	}
    95  
    96  	return &Collector{
    97  		storage:        s,
    98  		pxcUser:        c.PXCUser,
    99  		pxcServiceName: c.PXCServiceName,
   100  	}, nil
   101  }
   102  
   103  func (c *Collector) Run(ctx context.Context) error {
   104  	err := c.newDB(ctx)
   105  	if err != nil {
   106  		return errors.Wrap(err, "new db connection")
   107  	}
   108  	defer c.close()
   109  
   110  	// remove last set because we always
   111  	// read it from aws file
   112  	c.lastUploadedSet = pxc.NewGTIDSet("")
   113  
   114  	err = c.CollectBinLogs(ctx)
   115  	if err != nil {
   116  		return errors.Wrap(err, "collect binlog files")
   117  	}
   118  
   119  	return nil
   120  }
   121  
   122  func (c *Collector) lastGTIDSet(ctx context.Context, suffix string) (pxc.GTIDSet, error) {
   123  	// get last binlog set stored on S3
   124  	lastSetObject, err := c.storage.GetObject(ctx, lastSetFilePrefix+suffix)
   125  	if err != nil {
   126  		if err == storage.ErrObjectNotFound {
   127  			return pxc.GTIDSet{}, nil
   128  		}
   129  		return pxc.GTIDSet{}, errors.Wrap(err, "get last set content")
   130  	}
   131  	lastSet, err := io.ReadAll(lastSetObject)
   132  	if err != nil {
   133  		return pxc.GTIDSet{}, errors.Wrap(err, "read last gtid set")
   134  	}
   135  	return pxc.NewGTIDSet(string(lastSet)), nil
   136  }
   137  
   138  func (c *Collector) newDB(ctx context.Context) error {
   139  	file, err := os.Open("/etc/mysql/mysql-users-secret/xtrabackup")
   140  	if err != nil {
   141  		return errors.Wrap(err, "open file")
   142  	}
   143  	pxcPass, err := io.ReadAll(file)
   144  	if err != nil {
   145  		return errors.Wrap(err, "read password")
   146  	}
   147  	c.pxcPass = string(pxcPass)
   148  
   149  	host, err := pxc.GetPXCOldestBinlogHost(ctx, c.pxcServiceName, c.pxcUser, c.pxcPass)
   150  	if err != nil {
   151  		return errors.Wrap(err, "get host")
   152  	}
   153  
   154  	log.Println("Reading binlogs from pxc with hostname=", host)
   155  
   156  	c.db, err = pxc.NewPXC(host, c.pxcUser, c.pxcPass)
   157  	if err != nil {
   158  		return errors.Wrapf(err, "new manager with host %s", host)
   159  	}
   160  
   161  	return nil
   162  }
   163  
   164  func (c *Collector) close() error {
   165  	return c.db.Close()
   166  }
   167  
   168  func (c *Collector) removeEmptyBinlogs(ctx context.Context, logs []pxc.Binlog) ([]pxc.Binlog, error) {
   169  	result := make([]pxc.Binlog, 0)
   170  	for _, v := range logs {
   171  		if !v.GTIDSet.IsEmpty() {
   172  			result = append(result, v)
   173  		}
   174  	}
   175  	return result, nil
   176  }
   177  
   178  func (c *Collector) filterBinLogs(ctx context.Context, logs []pxc.Binlog, lastBinlogName string) ([]pxc.Binlog, error) {
   179  	if lastBinlogName == "" {
   180  		return c.removeEmptyBinlogs(ctx, logs)
   181  	}
   182  
   183  	logsLen := len(logs)
   184  
   185  	startIndex := 0
   186  	for logs[startIndex].Name != lastBinlogName && startIndex < logsLen {
   187  		startIndex++
   188  	}
   189  
   190  	if startIndex == logsLen {
   191  		return nil, nil
   192  	}
   193  
   194  	set, err := c.db.GetGTIDSet(ctx, logs[startIndex].Name)
   195  	if err != nil {
   196  		return nil, errors.Wrap(err, "get gtid set of last uploaded binlog")
   197  	}
   198  	// we don't need to reupload last file
   199  	// if gtid set is not changed
   200  	if set == c.lastUploadedSet.Raw() {
   201  		startIndex++
   202  	}
   203  
   204  	return c.removeEmptyBinlogs(ctx, logs[startIndex:])
   205  }
   206  
   207  func createGapFile(gtidSet pxc.GTIDSet) error {
   208  	p := "/tmp/gap-detected"
   209  	f, err := os.Create(p)
   210  	if err != nil {
   211  		return errors.Wrapf(err, "create %s", p)
   212  	}
   213  
   214  	_, err = f.WriteString(gtidSet.Raw())
   215  	if err != nil {
   216  		return errors.Wrapf(err, "write GTID set to %s", p)
   217  	}
   218  
   219  	return nil
   220  }
   221  
   222  func fileExists(name string) (bool, error) {
   223  	_, err := os.Stat(name)
   224  	if err != nil {
   225  		if os.IsNotExist(err) {
   226  			return false, nil
   227  		}
   228  		return false, errors.Wrap(err, "os stat")
   229  	}
   230  	return true, nil
   231  }
   232  
   233  func createTimelineFile(firstTs string) error {
   234  	f, err := os.Create(timelinePath)
   235  	if err != nil {
   236  		return errors.Wrapf(err, "create %s", timelinePath)
   237  	}
   238  
   239  	_, err = f.WriteString(firstTs)
   240  	if err != nil {
   241  		return errors.Wrap(err, "write first timestamp to timeline file")
   242  	}
   243  
   244  	return nil
   245  }
   246  
   247  func updateTimelineFile(lastTs string) error {
   248  	f, err := os.OpenFile(timelinePath, os.O_RDWR, 0o644)
   249  	if err != nil {
   250  		return errors.Wrapf(err, "open %s", timelinePath)
   251  	}
   252  	defer f.Close()
   253  
   254  	var lines []string
   255  	scanner := bufio.NewScanner(f)
   256  	for scanner.Scan() {
   257  		lines = append(lines, scanner.Text())
   258  	}
   259  
   260  	if err := scanner.Err(); err != nil {
   261  		return errors.Wrapf(err, "scan %s", timelinePath)
   262  	}
   263  
   264  	if len(lines) > 1 {
   265  		lines[len(lines)-1] = lastTs
   266  	} else {
   267  		lines = append(lines, lastTs)
   268  	}
   269  
   270  	if _, err := f.Seek(0, 0); err != nil {
   271  		return errors.Wrapf(err, "seek %s", timelinePath)
   272  	}
   273  
   274  	if err := f.Truncate(0); err != nil {
   275  		return errors.Wrapf(err, "truncate %s", timelinePath)
   276  	}
   277  
   278  	_, err = f.WriteString(strings.Join(lines, "\n"))
   279  	if err != nil {
   280  		return errors.Wrap(err, "write last timestamp to timeline file")
   281  	}
   282  
   283  	return nil
   284  }
   285  
   286  func (c *Collector) addGTIDSets(ctx context.Context, logs []pxc.Binlog) error {
   287  	for i, v := range logs {
   288  		set, err := c.db.GetGTIDSet(ctx, v.Name)
   289  		if err != nil {
   290  			if errors.Is(err, &mysql.MySQLError{Number: 3200}) {
   291  				log.Printf("ERROR: Binlog file %s is invalid on host %s: %s\n", v.Name, c.db.GetHost(), err.Error())
   292  				continue
   293  			}
   294  			return errors.Wrap(err, "get GTID set")
   295  		}
   296  		logs[i].GTIDSet = pxc.NewGTIDSet(set)
   297  	}
   298  	return nil
   299  }
   300  
   301  func (c *Collector) CollectBinLogs(ctx context.Context) error {
   302  	list, err := c.db.GetBinLogList(ctx)
   303  	if err != nil {
   304  		return errors.Wrap(err, "get binlog list")
   305  	}
   306  	err = c.addGTIDSets(ctx, list)
   307  	if err != nil {
   308  		return errors.Wrap(err, "get GTID sets")
   309  	}
   310  	var lastGTIDSetList []string
   311  	for i := len(list) - 1; i >= 0 && len(lastGTIDSetList) == 0; i-- {
   312  		gtidSetList := list[i].GTIDSet.List()
   313  		if gtidSetList == nil {
   314  			continue
   315  		}
   316  		lastGTIDSetList = gtidSetList
   317  	}
   318  
   319  	if len(lastGTIDSetList) == 0 {
   320  		log.Println("No binlogs to upload")
   321  		return nil
   322  	}
   323  
   324  	for _, gtidSet := range lastGTIDSetList {
   325  		sourceID := strings.Split(gtidSet, ":")[0]
   326  		c.lastUploadedSet, err = c.lastGTIDSet(ctx, sourceID)
   327  		if err != nil {
   328  			return errors.Wrap(err, "get last uploaded gtid set")
   329  		}
   330  		if !c.lastUploadedSet.IsEmpty() {
   331  			break
   332  		}
   333  	}
   334  
   335  	lastUploadedBinlogName := ""
   336  
   337  	if !c.lastUploadedSet.IsEmpty() {
   338  		for i := len(list) - 1; i >= 0 && lastUploadedBinlogName == ""; i-- {
   339  			for _, gtidSet := range list[i].GTIDSet.List() {
   340  				if lastUploadedBinlogName != "" {
   341  					break
   342  				}
   343  				for _, lastUploaded := range c.lastUploadedSet.List() {
   344  					isSubset, err := c.db.GTIDSubset(ctx, lastUploaded, gtidSet)
   345  					if err != nil {
   346  						return errors.Wrap(err, "check if gtid set is subset")
   347  					}
   348  					if isSubset {
   349  						lastUploadedBinlogName = list[i].Name
   350  						break
   351  					}
   352  					isSubset, err = c.db.GTIDSubset(ctx, gtidSet, lastUploaded)
   353  					if err != nil {
   354  						return errors.Wrap(err, "check if gtid set is subset")
   355  					}
   356  					if isSubset {
   357  						lastUploadedBinlogName = list[i].Name
   358  						break
   359  					}
   360  				}
   361  			}
   362  		}
   363  
   364  		if lastUploadedBinlogName == "" {
   365  			log.Println("ERROR: Couldn't find the binlog that contains GTID set:", c.lastUploadedSet.Raw())
   366  			log.Println("ERROR: Gap detected in the binary logs. Binary logs will be uploaded anyway, but full backup needed for consistent recovery.")
   367  			if err := createGapFile(c.lastUploadedSet); err != nil {
   368  				return errors.Wrap(err, "create gap file")
   369  			}
   370  		}
   371  	}
   372  
   373  	list, err = c.filterBinLogs(ctx, list, lastUploadedBinlogName)
   374  	if err != nil {
   375  		return errors.Wrap(err, "filter empty binlogs")
   376  	}
   377  
   378  	if len(list) == 0 {
   379  		log.Println("No binlogs to upload")
   380  		return nil
   381  	}
   382  
   383  	if exists, err := fileExists(timelinePath); !exists && err == nil {
   384  		firstTs, err := c.db.GetBinLogFirstTimestamp(ctx, list[0].Name)
   385  		if err != nil {
   386  			return errors.Wrap(err, "get first timestamp")
   387  		}
   388  
   389  		if err := createTimelineFile(firstTs); err != nil {
   390  			return errors.Wrap(err, "create timeline file")
   391  		}
   392  	}
   393  
   394  	for _, binlog := range list {
   395  		err = c.manageBinlog(ctx, binlog)
   396  		if err != nil {
   397  			return errors.Wrap(err, "manage binlog")
   398  		}
   399  
   400  		lastTs, err := c.db.GetBinLogLastTimestamp(ctx, binlog.Name)
   401  		if err != nil {
   402  			return errors.Wrap(err, "get last timestamp")
   403  		}
   404  
   405  		if err := updateTimelineFile(lastTs); err != nil {
   406  			return errors.Wrap(err, "update timeline file")
   407  		}
   408  	}
   409  	return nil
   410  }
   411  
   412  func mergeErrors(a, b error) error {
   413  	if a != nil && b != nil {
   414  		return errors.New(a.Error() + "; " + b.Error())
   415  	}
   416  	if a != nil {
   417  		return a
   418  	}
   419  
   420  	return b
   421  }
   422  
   423  func (c *Collector) manageBinlog(ctx context.Context, binlog pxc.Binlog) (err error) {
   424  	binlogTmstmp, err := c.db.GetBinLogFirstTimestamp(ctx, binlog.Name)
   425  	if err != nil {
   426  		return errors.Wrapf(err, "get first timestamp for %s", binlog.Name)
   427  	}
   428  
   429  	binlogName := fmt.Sprintf("binlog_%s_%x", binlogTmstmp, md5.Sum([]byte(binlog.GTIDSet.Raw())))
   430  
   431  	var setBuffer bytes.Buffer
   432  	// no error handling because WriteString() always return nil error
   433  	// nolint:errcheck
   434  	setBuffer.WriteString(binlog.GTIDSet.Raw())
   435  
   436  	tmpDir := os.TempDir() + "/"
   437  
   438  	err = os.Remove(tmpDir + binlog.Name)
   439  	if err != nil && !os.IsNotExist(err) {
   440  		return errors.Wrap(err, "remove temp file")
   441  	}
   442  
   443  	err = syscall.Mkfifo(tmpDir+binlog.Name, 0o666)
   444  	if err != nil {
   445  		return errors.Wrap(err, "make named pipe file error")
   446  	}
   447  
   448  	errBuf := &bytes.Buffer{}
   449  	cmd := exec.CommandContext(ctx, "mysqlbinlog", "-R", "-P", "33062", "--raw", "-h"+c.db.GetHost(), "-u"+c.pxcUser, binlog.Name)
   450  	cmd.Env = append(cmd.Env, "MYSQL_PWD="+c.pxcPass)
   451  	cmd.Dir = os.TempDir()
   452  	cmd.Stderr = errBuf
   453  
   454  	err = cmd.Start()
   455  	if err != nil {
   456  		return errors.Wrap(err, "run mysqlbinlog command")
   457  	}
   458  
   459  	log.Println("Starting to process binlog with name", binlog.Name)
   460  
   461  	file, err := os.OpenFile(tmpDir+binlog.Name, os.O_RDONLY, os.ModeNamedPipe)
   462  	if err != nil {
   463  		return errors.Wrap(err, "open named pipe file error")
   464  	}
   465  
   466  	defer func() {
   467  		errC := file.Close()
   468  		if errC != nil {
   469  			err = mergeErrors(err, errors.Wrapf(errC, "close tmp file for %s", binlog.Name))
   470  			return
   471  		}
   472  		errR := os.Remove(tmpDir + binlog.Name)
   473  		if errR != nil {
   474  			err = mergeErrors(err, errors.Wrapf(errR, "remove tmp file for %s", binlog.Name))
   475  			return
   476  		}
   477  	}()
   478  
   479  	// create a pipe to transfer data from the binlog pipe to s3
   480  	pr, pw := io.Pipe()
   481  
   482  	go readBinlog(file, pw, errBuf, binlog.Name)
   483  
   484  	err = c.storage.PutObject(ctx, binlogName, pr, -1)
   485  	if err != nil {
   486  		return errors.Wrapf(err, "put %s object", binlog.Name)
   487  	}
   488  
   489  	log.Println("Successfully written binlog file", binlog.Name, "to s3 with name", binlogName)
   490  
   491  	err = cmd.Wait()
   492  	if err != nil {
   493  		return errors.Wrap(err, "wait mysqlbinlog command error:"+errBuf.String())
   494  	}
   495  
   496  	err = c.storage.PutObject(ctx, binlogName+gtidPostfix, &setBuffer, int64(setBuffer.Len()))
   497  	if err != nil {
   498  		return errors.Wrap(err, "put gtid-set object")
   499  	}
   500  	for _, gtidSet := range binlog.GTIDSet.List() {
   501  		// no error handling because WriteString() always return nil error
   502  		// nolint:errcheck
   503  		setBuffer.WriteString(binlog.GTIDSet.Raw())
   504  
   505  		err = c.storage.PutObject(ctx, lastSetFilePrefix+strings.Split(gtidSet, ":")[0], &setBuffer, int64(setBuffer.Len()))
   506  		if err != nil {
   507  			return errors.Wrap(err, "put last-set object")
   508  		}
   509  	}
   510  	c.lastUploadedSet = binlog.GTIDSet
   511  
   512  	return nil
   513  }
   514  
   515  func readBinlog(file *os.File, pipe *io.PipeWriter, errBuf *bytes.Buffer, binlogName string) {
   516  	b := make([]byte, 10485760) // alloc buffer for 10mb
   517  
   518  	// in case of binlog is slow and hasn't written anything to the file yet
   519  	// we have to skip this error and try to read again until some data appears
   520  	isEmpty := true
   521  	for {
   522  		if errBuf.Len() != 0 {
   523  			// stop reading since we receive error from binlog command in stderr
   524  			// no error handling because CloseWithError() always return nil error
   525  			// nolint:errcheck
   526  			pipe.CloseWithError(errors.Errorf("Error: mysqlbinlog %s", errBuf.String()))
   527  			return
   528  		}
   529  		n, err := file.Read(b)
   530  		if err == io.EOF {
   531  			// If we got EOF immediately after starting to read a file we should skip it since
   532  			// data has not appeared yet. If we receive EOF error after already got some data - then exit.
   533  			if isEmpty {
   534  				time.Sleep(10 * time.Millisecond)
   535  				continue
   536  			}
   537  			break
   538  		}
   539  		if err != nil && !strings.Contains(err.Error(), "file already closed") {
   540  			// no error handling because CloseWithError() always return nil error
   541  			// nolint:errcheck
   542  			pipe.CloseWithError(errors.Wrapf(err, "Error: reading named pipe for %s", binlogName))
   543  			return
   544  		}
   545  		if n == 0 {
   546  			time.Sleep(10 * time.Millisecond)
   547  			continue
   548  		}
   549  		_, err = pipe.Write(b[:n])
   550  		if err != nil {
   551  			// no error handling because CloseWithError() always return nil error
   552  			// nolint:errcheck
   553  			pipe.CloseWithError(errors.Wrapf(err, "Error: write to pipe for %s", binlogName))
   554  			return
   555  		}
   556  		isEmpty = false
   557  	}
   558  	// in case of any errors from mysqlbinlog it sends EOF to pipe
   559  	// to prevent this, need to check error buffer before closing pipe without error
   560  	if errBuf.Len() != 0 {
   561  		// no error handling because CloseWithError() always return nil error
   562  		// nolint:errcheck
   563  		pipe.CloseWithError(errors.New("mysqlbinlog error:" + errBuf.String()))
   564  		return
   565  	}
   566  	// no error handling because Close() always return nil error
   567  	// nolint:errcheck
   568  	pipe.Close()
   569  }