github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/backup.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package db
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"sync"
    18  	"time"
    19  
    20  	enterrors "github.com/weaviate/weaviate/entities/errors"
    21  
    22  	"github.com/pkg/errors"
    23  	"github.com/sirupsen/logrus"
    24  	"github.com/weaviate/weaviate/entities/backup"
    25  	"github.com/weaviate/weaviate/entities/schema"
    26  )
    27  
    28  type BackupState struct {
    29  	BackupID   string
    30  	InProgress bool
    31  }
    32  
    33  // Backupable returns whether all given class can be backed up.
    34  func (db *DB) Backupable(ctx context.Context, classes []string) error {
    35  	for _, c := range classes {
    36  		className := schema.ClassName(c)
    37  		idx := db.GetIndex(className)
    38  		if idx == nil || idx.Config.ClassName != className {
    39  			return fmt.Errorf("class %v doesn't exist", c)
    40  		}
    41  	}
    42  	return nil
    43  }
    44  
    45  // ListBackupable returns a list of all classes which can be backed up.
    46  func (db *DB) ListBackupable() []string {
    47  	cs := make([]string, 0, len(db.indices))
    48  	db.indexLock.RLock()
    49  	defer db.indexLock.RUnlock()
    50  	for _, idx := range db.indices {
    51  		cls := string(idx.Config.ClassName)
    52  		cs = append(cs, cls)
    53  	}
    54  	return cs
    55  }
    56  
    57  // BackupDescriptors returns a channel of class descriptors.
    58  // Class descriptor records everything needed to restore a class
    59  // If an error happens a descriptor with an error will be written to the channel just before closing it.
    60  func (db *DB) BackupDescriptors(ctx context.Context, bakid string, classes []string,
    61  ) <-chan backup.ClassDescriptor {
    62  	ds := make(chan backup.ClassDescriptor, len(classes))
    63  	f := func() {
    64  		for _, c := range classes {
    65  			desc := backup.ClassDescriptor{Name: c}
    66  			idx := db.GetIndex(schema.ClassName(c))
    67  			if idx == nil {
    68  				desc.Error = fmt.Errorf("class %v doesn't exist any more", c)
    69  			} else if err := idx.descriptor(ctx, bakid, &desc); err != nil {
    70  				desc.Error = fmt.Errorf("backup class %v descriptor: %w", c, err)
    71  			}
    72  			ds <- desc
    73  			if desc.Error != nil {
    74  				break
    75  			}
    76  		}
    77  		close(ds)
    78  	}
    79  	enterrors.GoWrapper(f, db.logger)
    80  	return ds
    81  }
    82  
    83  func (db *DB) ShardsBackup(
    84  	ctx context.Context, bakID, class string, shards []string,
    85  ) (_ backup.ClassDescriptor, err error) {
    86  	cd := backup.ClassDescriptor{Name: class}
    87  	idx := db.GetIndex(schema.ClassName(class))
    88  	if idx == nil {
    89  		return cd, fmt.Errorf("no index for class %q", class)
    90  	}
    91  
    92  	if err := idx.initBackup(bakID); err != nil {
    93  		return cd, fmt.Errorf("init backup state for class %q: %w", class, err)
    94  	}
    95  
    96  	defer func() {
    97  		if err != nil {
    98  			enterrors.GoWrapper(func() { idx.ReleaseBackup(ctx, bakID) }, db.logger)
    99  		}
   100  	}()
   101  
   102  	sm := make(map[string]ShardLike, len(shards))
   103  	for _, shardName := range shards {
   104  		shard := idx.shards.Load(shardName)
   105  		if shard == nil {
   106  			return cd, fmt.Errorf("no shard %q for class %q", shardName, class)
   107  		}
   108  		sm[shardName] = shard
   109  	}
   110  
   111  	// prevent writing into the index during collection of metadata
   112  	idx.backupMutex.Lock()
   113  	defer idx.backupMutex.Unlock()
   114  	for shardName, shard := range sm {
   115  		if err := shard.BeginBackup(ctx); err != nil {
   116  			return cd, fmt.Errorf("class %q: shard %q: begin backup: %w", class, shardName, err)
   117  		}
   118  
   119  		sd := backup.ShardDescriptor{Name: shardName}
   120  		if err := shard.ListBackupFiles(ctx, &sd); err != nil {
   121  			return cd, fmt.Errorf("class %q: shard %q: list backup files: %w", class, shardName, err)
   122  		}
   123  
   124  		cd.Shards = append(cd.Shards, &sd)
   125  	}
   126  
   127  	return cd, nil
   128  }
   129  
   130  // ReleaseBackup release resources acquired by the index during backup
   131  func (db *DB) ReleaseBackup(ctx context.Context, bakID, class string) (err error) {
   132  	fields := logrus.Fields{
   133  		"op":    "release_backup",
   134  		"class": class,
   135  		"id":    bakID,
   136  	}
   137  	db.logger.WithFields(fields).Debug("starting")
   138  	begin := time.Now()
   139  	defer func() {
   140  		l := db.logger.WithFields(fields).WithField("took", time.Since(begin))
   141  		if err != nil {
   142  			l.Error(err)
   143  			return
   144  		}
   145  		l.Debug("finish")
   146  	}()
   147  
   148  	idx := db.GetIndex(schema.ClassName(class))
   149  	if idx != nil {
   150  		return idx.ReleaseBackup(ctx, bakID)
   151  	}
   152  	return nil
   153  }
   154  
   155  func (db *DB) ClassExists(name string) bool {
   156  	return db.IndexExists(schema.ClassName(name))
   157  }
   158  
   159  // Returns the list of nodes where shards of class are contained.
   160  // If there are no shards for the class, returns an empty list
   161  // If there are shards for the class but no nodes are found, return an error
   162  func (db *DB) Shards(ctx context.Context, class string) ([]string, error) {
   163  	unique := make(map[string]struct{})
   164  
   165  	ss := db.schemaGetter.CopyShardingState(class)
   166  	if len(ss.Physical) == 0 {
   167  		return []string{}, nil
   168  	}
   169  
   170  	for _, shard := range ss.Physical {
   171  		for _, node := range shard.BelongsToNodes {
   172  			unique[node] = struct{}{}
   173  		}
   174  	}
   175  
   176  	var (
   177  		nodes   = make([]string, len(unique))
   178  		counter = 0
   179  	)
   180  
   181  	for node := range unique {
   182  		nodes[counter] = node
   183  		counter++
   184  	}
   185  	if len(nodes) == 0 {
   186  		return nil, fmt.Errorf("found %v shards, but has 0 nodes", len(ss.Physical))
   187  	}
   188  
   189  	return nodes, nil
   190  }
   191  
   192  func (db *DB) ListClasses(ctx context.Context) []string {
   193  	classes := db.schemaGetter.GetSchemaSkipAuth().Objects.Classes
   194  	classNames := make([]string, len(classes))
   195  
   196  	for i, class := range classes {
   197  		classNames[i] = class.Class
   198  	}
   199  
   200  	return classNames
   201  }
   202  
   203  // descriptor record everything needed to restore a class
   204  func (i *Index) descriptor(ctx context.Context, backupID string, desc *backup.ClassDescriptor) (err error) {
   205  	if err := i.initBackup(backupID); err != nil {
   206  		return err
   207  	}
   208  	defer func() {
   209  		if err != nil {
   210  			enterrors.GoWrapper(func() { i.ReleaseBackup(ctx, backupID) }, i.logger)
   211  		}
   212  	}()
   213  	// prevent writing into the index during collection of metadata
   214  	i.backupMutex.Lock()
   215  	defer i.backupMutex.Unlock()
   216  
   217  	if err = i.ForEachShard(func(name string, s ShardLike) error {
   218  		if err = s.BeginBackup(ctx); err != nil {
   219  			return fmt.Errorf("pause compaction and flush: %w", err)
   220  		}
   221  		var sd backup.ShardDescriptor
   222  		if err := s.ListBackupFiles(ctx, &sd); err != nil {
   223  			return fmt.Errorf("list shard %v files: %w", s.Name(), err)
   224  		}
   225  
   226  		desc.Shards = append(desc.Shards, &sd)
   227  		return nil
   228  	}); err != nil {
   229  		return err
   230  	}
   231  
   232  	if desc.ShardingState, err = i.marshalShardingState(); err != nil {
   233  		return fmt.Errorf("marshal sharding state %w", err)
   234  	}
   235  	if desc.Schema, err = i.marshalSchema(); err != nil {
   236  		return fmt.Errorf("marshal schema %w", err)
   237  	}
   238  	return ctx.Err()
   239  }
   240  
   241  // ReleaseBackup marks the specified backup as inactive and restarts all
   242  // async background and maintenance processes. It errors if the backup does not exist
   243  // or is already inactive.
   244  func (i *Index) ReleaseBackup(ctx context.Context, id string) error {
   245  	i.logger.WithField("backup_id", id).WithField("class", i.Config.ClassName).Info("release backup")
   246  	defer i.resetBackupState()
   247  	if err := i.resumeMaintenanceCycles(ctx); err != nil {
   248  		return err
   249  	}
   250  	return nil
   251  }
   252  
   253  func (i *Index) initBackup(id string) error {
   254  	new := &BackupState{
   255  		BackupID:   id,
   256  		InProgress: true,
   257  	}
   258  	if !i.lastBackup.CompareAndSwap(nil, new) {
   259  		bid := ""
   260  		if x := i.lastBackup.Load(); x != nil {
   261  			bid = x.BackupID
   262  		}
   263  		return errors.Errorf(
   264  			"cannot create new backup, backup ‘%s’ is not yet released, this "+
   265  				"means its contents have not yet been fully copied to its destination, "+
   266  				"try again later", bid)
   267  	}
   268  
   269  	return nil
   270  }
   271  
   272  func (i *Index) resetBackupState() {
   273  	i.lastBackup.Store(nil)
   274  }
   275  
   276  func (i *Index) resumeMaintenanceCycles(ctx context.Context) (lastErr error) {
   277  	i.ForEachShard(func(name string, shard ShardLike) error {
   278  		if err := shard.resumeMaintenanceCycles(ctx); err != nil {
   279  			lastErr = err
   280  			i.logger.WithField("shard", name).WithField("op", "resume_maintenance").Error(err)
   281  		}
   282  		time.Sleep(time.Millisecond * 10)
   283  		return nil
   284  	})
   285  	return lastErr
   286  }
   287  
   288  func (i *Index) marshalShardingState() ([]byte, error) {
   289  	b, err := i.getSchema.CopyShardingState(i.Config.ClassName.String()).JSON()
   290  	if err != nil {
   291  		return nil, errors.Wrap(err, "marshal sharding state")
   292  	}
   293  
   294  	return b, nil
   295  }
   296  
   297  func (i *Index) marshalSchema() ([]byte, error) {
   298  	schema := i.getSchema.GetSchemaSkipAuth()
   299  
   300  	b, err := schema.GetClass(i.Config.ClassName).MarshalBinary()
   301  	if err != nil {
   302  		return nil, errors.Wrap(err, "marshal schema")
   303  	}
   304  
   305  	return b, err
   306  }
   307  
   308  const (
   309  	mutexRetryDuration  = time.Millisecond * 500
   310  	mutexNotifyDuration = 20 * time.Second
   311  )
   312  
   313  // backupMutex is an adapter built around rwmutex that facilitates cooperative blocking between write and read locks
   314  type backupMutex struct {
   315  	sync.RWMutex
   316  	log            logrus.FieldLogger
   317  	retryDuration  time.Duration
   318  	notifyDuration time.Duration
   319  }
   320  
   321  // LockWithContext attempts to acquire a write lock while respecting the provided context.
   322  // It reports whether the lock acquisition was successful or if the context has been cancelled.
   323  func (m *backupMutex) LockWithContext(ctx context.Context) error {
   324  	return m.lock(ctx, m.TryLock)
   325  }
   326  
   327  func (m *backupMutex) lock(ctx context.Context, tryLock func() bool) error {
   328  	if tryLock() {
   329  		return nil
   330  	}
   331  	curTime := time.Now()
   332  	t := time.NewTicker(m.retryDuration)
   333  	defer t.Stop()
   334  	for {
   335  		select {
   336  		case <-ctx.Done():
   337  			return ctx.Err()
   338  		case <-t.C:
   339  			if tryLock() {
   340  				return nil
   341  			}
   342  			if time.Since(curTime) > m.notifyDuration {
   343  				curTime = time.Now()
   344  				m.log.Info("backup process waiting for ongoing writes to finish")
   345  			}
   346  		}
   347  	}
   348  }
   349  
   350  func (s *backupMutex) RLockGuard(reader func() error) error {
   351  	s.RLock()
   352  	defer s.RUnlock()
   353  	return reader()
   354  }