github.com/weaviate/weaviate@v1.24.6/usecases/backup/restorer.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package backup
    13  
    14  import (
    15  	"context"
    16  	"encoding/json"
    17  	"errors"
    18  	"fmt"
    19  	"reflect"
    20  	"sync"
    21  	"time"
    22  
    23  	enterrors "github.com/weaviate/weaviate/entities/errors"
    24  
    25  	"github.com/prometheus/client_golang/prometheus"
    26  	"github.com/sirupsen/logrus"
    27  	"github.com/weaviate/weaviate/entities/backup"
    28  	"github.com/weaviate/weaviate/entities/models"
    29  	"github.com/weaviate/weaviate/entities/schema"
    30  	"github.com/weaviate/weaviate/usecases/monitoring"
    31  	migratefs "github.com/weaviate/weaviate/usecases/schema/migrate/fs"
    32  	"github.com/weaviate/weaviate/usecases/sharding"
    33  )
    34  
    35  type restorer struct {
    36  	node     string // node name
    37  	logger   logrus.FieldLogger
    38  	sourcer  Sourcer
    39  	backends BackupBackendProvider
    40  	schema   schemaManger
    41  	shardSyncChan
    42  
    43  	// TODO: keeping status in memory after restore has been done
    44  	// is not a proper solution for communicating status to the user.
    45  	// On app crash or restart this data will be lost
    46  	// This should be regarded as workaround and should be fixed asap
    47  	restoreStatusMap sync.Map
    48  }
    49  
    50  func newRestorer(node string, logger logrus.FieldLogger,
    51  	sourcer Sourcer,
    52  	backends BackupBackendProvider,
    53  	schema schemaManger,
    54  ) *restorer {
    55  	return &restorer{
    56  		node:          node,
    57  		logger:        logger,
    58  		sourcer:       sourcer,
    59  		backends:      backends,
    60  		schema:        schema,
    61  		shardSyncChan: shardSyncChan{coordChan: make(chan interface{}, 5)},
    62  	}
    63  }
    64  
    65  func (r *restorer) restore(ctx context.Context,
    66  	req *Request,
    67  	desc *backup.BackupDescriptor,
    68  	store nodeStore,
    69  ) (CanCommitResponse, error) {
    70  	expiration := req.Duration
    71  	if expiration > _TimeoutShardCommit {
    72  		expiration = _TimeoutShardCommit
    73  	}
    74  	ret := CanCommitResponse{
    75  		Method:  OpCreate,
    76  		ID:      req.ID,
    77  		Timeout: expiration,
    78  	}
    79  
    80  	destPath := store.HomeDir()
    81  
    82  	// make sure there is no active restore
    83  	if prevID := r.lastOp.renew(req.ID, destPath); prevID != "" {
    84  		err := fmt.Errorf("restore %s already in progress", prevID)
    85  		return ret, err
    86  	}
    87  	r.waitingForCoordinatorToCommit.Store(true) // is set to false by wait()
    88  
    89  	f := func() {
    90  		var err error
    91  		status := Status{
    92  			Path:      destPath,
    93  			StartedAt: time.Now().UTC(),
    94  			Status:    backup.Transferring,
    95  		}
    96  		defer func() {
    97  			status.CompletedAt = time.Now().UTC()
    98  			if err == nil {
    99  				status.Status = backup.Success
   100  			} else {
   101  				status.Err = err.Error()
   102  				status.Status = backup.Failed
   103  			}
   104  			r.restoreStatusMap.Store(basePath(req.Backend, req.ID), status)
   105  			r.lastOp.reset()
   106  		}()
   107  
   108  		if err = r.waitForCoordinator(expiration, req.ID); err != nil {
   109  			r.logger.WithField("action", "create_backup").
   110  				Error(err)
   111  			r.lastAsyncError = err
   112  			return
   113  		}
   114  
   115  		err = r.restoreAll(context.Background(), desc, req.CPUPercentage, store, req.NodeMapping)
   116  		logFields := logrus.Fields{"action": "restore", "backup_id": req.ID}
   117  		if err != nil {
   118  			r.logger.WithFields(logFields).Error(err)
   119  		} else {
   120  			r.logger.WithFields(logFields).Info("backup restored successfully")
   121  		}
   122  	}
   123  	enterrors.GoWrapper(f, r.logger)
   124  
   125  	return ret, nil
   126  }
   127  
   128  func (r *restorer) restoreAll(ctx context.Context,
   129  	desc *backup.BackupDescriptor, cpuPercentage int,
   130  	store nodeStore, nodeMapping map[string]string,
   131  ) (err error) {
   132  	compressed := desc.Version > version1
   133  	r.lastOp.set(backup.Transferring)
   134  	for _, cdesc := range desc.Classes {
   135  		if err := r.restoreOne(ctx, &cdesc, desc.ServerVersion, compressed, cpuPercentage, store, nodeMapping); err != nil {
   136  			return fmt.Errorf("restore class %s: %w", cdesc.Name, err)
   137  		}
   138  		r.logger.WithField("action", "restore").
   139  			WithField("backup_id", desc.ID).
   140  			WithField("class", cdesc.Name).Info("successfully restored")
   141  	}
   142  	return nil
   143  }
   144  
   145  func getType(myvar interface{}) string {
   146  	if t := reflect.TypeOf(myvar); t.Kind() == reflect.Ptr {
   147  		return "*" + t.Elem().Name()
   148  	} else {
   149  		return t.Name()
   150  	}
   151  }
   152  
   153  func (r *restorer) restoreOne(ctx context.Context,
   154  	desc *backup.ClassDescriptor, serverVersion string,
   155  	compressed bool, cpuPercentage int, store nodeStore, nodeMapping map[string]string,
   156  ) (err error) {
   157  	classLabel := desc.Name
   158  	if monitoring.GetMetrics().Group {
   159  		classLabel = "n/a"
   160  	}
   161  	metric, err := monitoring.GetMetrics().BackupRestoreDurations.GetMetricWithLabelValues(getType(store.b), classLabel)
   162  	if err != nil {
   163  		timer := prometheus.NewTimer(metric)
   164  		defer timer.ObserveDuration()
   165  	}
   166  
   167  	if r.sourcer.ClassExists(desc.Name) {
   168  		return fmt.Errorf("already exists")
   169  	}
   170  	fw := newFileWriter(r.sourcer, store, compressed, r.logger).
   171  		WithPoolPercentage(cpuPercentage)
   172  
   173  	// Pre-v1.23 versions store files in a flat format
   174  	if serverVersion < "1.23" {
   175  		f, err := hfsMigrator(desc, r.node, serverVersion)
   176  		if err != nil {
   177  			return fmt.Errorf("migrate to pre 1.23: %w", err)
   178  		}
   179  		fw.setMigrator(f)
   180  	}
   181  
   182  	rollback, err := fw.Write(ctx, desc)
   183  	if err != nil {
   184  		return fmt.Errorf("write files: %w", err)
   185  	}
   186  	if err := r.schema.RestoreClass(ctx, desc, nodeMapping); err != nil {
   187  		if rerr := rollback(); rerr != nil {
   188  			r.logger.WithField("className", desc.Name).WithField("action", "rollback").Error(rerr)
   189  		}
   190  		return fmt.Errorf("restore schema: %w", err)
   191  	}
   192  	return nil
   193  }
   194  
   195  func (r *restorer) status(backend, ID string) (Status, error) {
   196  	if st := r.lastOp.get(); st.ID == ID {
   197  		return Status{
   198  			Path:      st.Path,
   199  			StartedAt: st.Starttime,
   200  			Status:    st.Status,
   201  		}, nil
   202  	}
   203  	ref := basePath(backend, ID)
   204  	istatus, ok := r.restoreStatusMap.Load(ref)
   205  	if !ok {
   206  		err := fmt.Errorf("status not found: %s", ref)
   207  		return Status{}, backup.NewErrNotFound(err)
   208  	}
   209  	return istatus.(Status), nil
   210  }
   211  
   212  func (r *restorer) validate(ctx context.Context, store *nodeStore, req *Request) (*backup.BackupDescriptor, []string, error) {
   213  	destPath := store.HomeDir()
   214  	meta, err := store.Meta(ctx, req.ID, true)
   215  	if err != nil {
   216  		nerr := backup.ErrNotFound{}
   217  		if errors.As(err, &nerr) {
   218  			return nil, nil, fmt.Errorf("restorer cannot validate: %w: %q (%w)", errMetaNotFound, destPath, err)
   219  		}
   220  		return nil, nil, fmt.Errorf("find backup %s: %w", destPath, err)
   221  	}
   222  	if meta.ID != req.ID {
   223  		return nil, nil, fmt.Errorf("wrong backup file: expected %q got %q", req.ID, meta.ID)
   224  	}
   225  	if meta.Status != string(backup.Success) {
   226  		err = fmt.Errorf("invalid backup %s status: %s", destPath, meta.Status)
   227  		return nil, nil, err
   228  	}
   229  	if err := meta.Validate(meta.Version > version1); err != nil {
   230  		return nil, nil, fmt.Errorf("corrupted backup file: %w", err)
   231  	}
   232  	if v := meta.Version; v > Version {
   233  		return nil, nil, fmt.Errorf("%s: %s > %s", errMsgHigherVersion, v, Version)
   234  	}
   235  	cs := meta.List()
   236  	if len(req.Classes) > 0 {
   237  		if first := meta.AllExist(req.Classes); first != "" {
   238  			err = fmt.Errorf("class %s doesn't exist in the backup, but does have %v: ", first, cs)
   239  			return nil, cs, err
   240  		}
   241  		meta.Include(req.Classes)
   242  	}
   243  	return meta, cs, nil
   244  }
   245  
   246  // oneClassSchema allows for creating schema with one class
   247  // This is required when migrating to hierarchical file structure from pre-v1.23
   248  type oneClassSchema struct {
   249  	cls *models.Class
   250  	ss  *sharding.State
   251  }
   252  
   253  func (s oneClassSchema) CopyShardingState(class string) *sharding.State {
   254  	return s.ss
   255  }
   256  
   257  func (s oneClassSchema) GetSchemaSkipAuth() schema.Schema {
   258  	return schema.Schema{
   259  		Objects: &models.Schema{
   260  			Classes: []*models.Class{s.cls},
   261  		},
   262  	}
   263  }
   264  
   265  // hfsMigrator builds and return a class migrator ready for use
   266  func hfsMigrator(desc *backup.ClassDescriptor, nodeName string, serverVersion string) (func(classDir string) error, error) {
   267  	if serverVersion >= "1.23" {
   268  		return func(string) error { return nil }, nil
   269  	}
   270  	var ss sharding.State
   271  	if desc.ShardingState != nil {
   272  		err := json.Unmarshal(desc.ShardingState, &ss)
   273  		if err != nil {
   274  			return nil, fmt.Errorf("marshal sharding state: %w", err)
   275  		}
   276  	}
   277  	ss.SetLocalName(nodeName)
   278  
   279  	// get schema and sharding state
   280  	class := &models.Class{}
   281  	if err := json.Unmarshal(desc.Schema, &class); err != nil {
   282  		return nil, fmt.Errorf("marshal class schema: %w", err)
   283  	}
   284  
   285  	return func(classDir string) error {
   286  		return migratefs.MigrateToHierarchicalFS(classDir, oneClassSchema{class, &ss})
   287  	}, nil
   288  }