github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/backup_integration_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  //go:build integrationTest
    13  // +build integrationTest
    14  
    15  package db
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"os"
    21  	"path"
    22  	"path/filepath"
    23  	"regexp"
    24  	"testing"
    25  	"time"
    26  
    27  	"github.com/sirupsen/logrus/hooks/test"
    28  	"github.com/stretchr/testify/assert"
    29  	"github.com/stretchr/testify/require"
    30  	"github.com/weaviate/weaviate/entities/models"
    31  	"github.com/weaviate/weaviate/entities/schema"
    32  	"github.com/weaviate/weaviate/entities/storobj"
    33  	enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    34  )
    35  
    36  func TestBackup_DBLevel(t *testing.T) {
    37  	t.Run("successful backup creation", func(t *testing.T) {
    38  		ctx := testCtx()
    39  		dirName := t.TempDir()
    40  		className := "DBLevelBackupClass"
    41  		backupID := "backup1"
    42  		now := time.Now()
    43  
    44  		db := setupTestDB(t, dirName, makeTestClass(className))
    45  		defer func() {
    46  			require.Nil(t, db.Shutdown(context.Background()))
    47  		}()
    48  
    49  		t.Run("insert data", func(t *testing.T) {
    50  			require.Nil(t, db.PutObject(ctx, &models.Object{
    51  				Class:              className,
    52  				CreationTimeUnix:   now.UnixNano(),
    53  				ID:                 "ff9fcae5-57b8-431c-b8e2-986fd78f5809",
    54  				LastUpdateTimeUnix: now.UnixNano(),
    55  				Vector:             []float32{1, 2, 3},
    56  				VectorWeights:      nil,
    57  			}, []float32{1, 2, 3}, nil, nil))
    58  		})
    59  
    60  		expectedNodeName := "node1"
    61  		expectedShardName := db.schemaGetter.
    62  			CopyShardingState(className).
    63  			AllPhysicalShards()[0]
    64  		testShd := db.GetIndex(schema.ClassName(className)).
    65  			shards.Load(expectedShardName)
    66  		expectedCounterPath, _ := filepath.Rel(testShd.Index().Config.RootPath, testShd.Counter().FileName())
    67  		expectedCounter, err := os.ReadFile(testShd.Counter().FileName())
    68  		require.Nil(t, err)
    69  		expectedPropLengthPath, _ := filepath.Rel(testShd.Index().Config.RootPath, testShd.GetPropertyLengthTracker().FileName())
    70  		expectedShardVersionPath, _ := filepath.Rel(testShd.Index().Config.RootPath, testShd.Versioner().path)
    71  		expectedShardVersion, err := os.ReadFile(testShd.Versioner().path)
    72  		require.Nil(t, err)
    73  		expectedPropLength, err := os.ReadFile(testShd.GetPropertyLengthTracker().FileName())
    74  		require.Nil(t, err)
    75  		expectedShardState, err := testShd.Index().getSchema.CopyShardingState(className).JSON()
    76  		require.Nil(t, err)
    77  		expectedSchema, err := testShd.Index().getSchema.GetSchemaSkipAuth().
    78  			Objects.Classes[0].MarshalBinary()
    79  		require.Nil(t, err)
    80  
    81  		classes := db.ListBackupable()
    82  
    83  		t.Run("doesn't fail on casing permutation of existing class", func(t *testing.T) {
    84  			err := db.Backupable(ctx, []string{"DBLeVELBackupClass"})
    85  			require.NotNil(t, err)
    86  			require.Equal(t, "class DBLeVELBackupClass doesn't exist", err.Error())
    87  		})
    88  
    89  		t.Run("create backup", func(t *testing.T) {
    90  			err := db.Backupable(ctx, classes)
    91  			assert.Nil(t, err)
    92  
    93  			ch := db.BackupDescriptors(ctx, backupID, classes)
    94  
    95  			for d := range ch {
    96  				assert.Equal(t, className, d.Name)
    97  				assert.Len(t, d.Shards, len(classes))
    98  				for _, shd := range d.Shards {
    99  					assert.Equal(t, expectedShardName, shd.Name)
   100  					assert.Equal(t, expectedNodeName, shd.Node)
   101  					assert.NotEmpty(t, shd.Files)
   102  					for _, f := range shd.Files {
   103  						assert.NotEmpty(t, f)
   104  					}
   105  					assert.Equal(t, expectedCounterPath, shd.DocIDCounterPath)
   106  					assert.Equal(t, expectedCounter, shd.DocIDCounter)
   107  					assert.Equal(t, expectedPropLengthPath, shd.PropLengthTrackerPath)
   108  					assert.Equal(t, expectedPropLength, shd.PropLengthTracker)
   109  					assert.Equal(t, expectedShardVersionPath, shd.ShardVersionPath)
   110  					assert.Equal(t, expectedShardVersion, shd.Version)
   111  				}
   112  				assert.Equal(t, expectedShardState, d.ShardingState)
   113  				assert.Equal(t, expectedSchema, d.Schema)
   114  			}
   115  		})
   116  
   117  		t.Run("release backup", func(t *testing.T) {
   118  			for _, class := range classes {
   119  				err := db.ReleaseBackup(ctx, backupID, class)
   120  				assert.Nil(t, err)
   121  			}
   122  		})
   123  
   124  		t.Run("node names from shards", func(t *testing.T) {
   125  			res, err := db.Shards(ctx, className)
   126  			assert.NoError(t, err)
   127  			assert.Len(t, res, 1)
   128  			assert.Equal(t, "node1", res[0])
   129  		})
   130  
   131  		t.Run("get all classes", func(t *testing.T) {
   132  			res := db.ListClasses(ctx)
   133  			assert.Len(t, res, 1)
   134  			assert.Equal(t, className, res[0])
   135  		})
   136  	})
   137  
   138  	t.Run("failed backup creation from expired context", func(t *testing.T) {
   139  		ctx := testCtx()
   140  		dirName := t.TempDir()
   141  		className := "DBLevelBackupClass"
   142  		backupID := "backup1"
   143  		now := time.Now()
   144  
   145  		db := setupTestDB(t, dirName, makeTestClass(className))
   146  		defer func() {
   147  			require.Nil(t, db.Shutdown(context.Background()))
   148  		}()
   149  
   150  		t.Run("insert data", func(t *testing.T) {
   151  			require.Nil(t, db.PutObject(ctx, &models.Object{
   152  				Class:              className,
   153  				CreationTimeUnix:   now.UnixNano(),
   154  				ID:                 "ff9fcae5-57b8-431c-b8e2-986fd78f5809",
   155  				LastUpdateTimeUnix: now.UnixNano(),
   156  				Vector:             []float32{1, 2, 3},
   157  				VectorWeights:      nil,
   158  			}, []float32{1, 2, 3}, nil, nil))
   159  		})
   160  
   161  		t.Run("fail with expired context", func(t *testing.T) {
   162  			classes := db.ListBackupable()
   163  
   164  			err := db.Backupable(ctx, classes)
   165  			assert.Nil(t, err)
   166  
   167  			timeoutCtx, cancel := context.WithTimeout(context.Background(), 0)
   168  			defer cancel()
   169  
   170  			ch := db.BackupDescriptors(timeoutCtx, backupID, classes)
   171  			for d := range ch {
   172  				require.NotNil(t, d.Error)
   173  				assert.Contains(t, d.Error.Error(), "context deadline exceeded")
   174  			}
   175  		})
   176  	})
   177  }
   178  
   179  func TestBackup_BucketLevel(t *testing.T) {
   180  	ctx := testCtx()
   181  	className := "BucketLevelBackup"
   182  	shard, _ := testShard(t, ctx, className)
   183  
   184  	t.Run("insert data", func(t *testing.T) {
   185  		err := shard.PutObject(ctx, &storobj.Object{
   186  			MarshallerVersion: 1,
   187  			Object: models.Object{
   188  				ID:    "8c29da7a-600a-43dc-85fb-83ab2b08c294",
   189  				Class: className,
   190  				Properties: map[string]interface{}{
   191  					"stringField": "somevalue",
   192  				},
   193  			},
   194  		},
   195  		)
   196  		require.Nil(t, err)
   197  	})
   198  
   199  	t.Run("perform backup sequence", func(t *testing.T) {
   200  		objBucket := shard.Store().Bucket("objects")
   201  		require.NotNil(t, objBucket)
   202  
   203  		err := shard.Store().PauseCompaction(ctx)
   204  		require.Nil(t, err)
   205  
   206  		err = objBucket.FlushMemtable()
   207  		require.Nil(t, err)
   208  
   209  		files, err := objBucket.ListFiles(ctx, shard.Index().Config.RootPath)
   210  		require.Nil(t, err)
   211  
   212  		t.Run("check ListFiles, results", func(t *testing.T) {
   213  			assert.Len(t, files, 4)
   214  
   215  			// build regex to get very close approximation to the expected
   216  			// contents of the ListFiles result. the only thing we can't
   217  			// know for sure is the actual name of the segment group, hence
   218  			// the `.*`
   219  			re := path.Clean(fmt.Sprintf("%s\\/.*\\.(wal|db|bloom|cna)", shard.Index().Config.RootPath))
   220  
   221  			// we expect to see only four files inside the bucket at this point:
   222  			//   1. a *.db file - the segment itself
   223  			//   2. a *.bloom file - the segments' bloom filter (only since v1.17)
   224  			//   3. a *.secondary.0.bloom file - the bloom filter for the secondary index at pos 0 (only since v1.17)
   225  			//   4. a *.cna file - th segment's count net additions (only since v1.17)
   226  			//
   227  			// These files are created when the memtable is flushed, and the new
   228  			// segment is initialized. Both happens as a result of calling
   229  			// FlushMemtable().
   230  			for i := range files {
   231  				isMatch, err := regexp.MatchString(re, files[i])
   232  				assert.Nil(t, err)
   233  				assert.True(t, isMatch, files[i])
   234  			}
   235  
   236  			// check that we have one of each: *.db
   237  			exts := make([]string, 4)
   238  			for i, file := range files {
   239  				exts[i] = filepath.Ext(file)
   240  			}
   241  			assert.Contains(t, exts, ".db")    // the main segment
   242  			assert.Contains(t, exts, ".cna")   // the segment's count net additions
   243  			assert.Contains(t, exts, ".bloom") // matches both bloom filters (primary+secondary)
   244  		})
   245  
   246  		err = shard.Store().ResumeCompaction(ctx)
   247  		require.Nil(t, err)
   248  	})
   249  
   250  	t.Run("cleanup", func(t *testing.T) {
   251  		require.Nil(t, shard.Shutdown(ctx))
   252  		require.Nil(t, os.RemoveAll(shard.Index().Config.RootPath))
   253  	})
   254  }
   255  
   256  func setupTestDB(t *testing.T, rootDir string, classes ...*models.Class) *DB {
   257  	logger, _ := test.NewNullLogger()
   258  
   259  	schemaGetter := &fakeSchemaGetter{
   260  		schema:     schema.Schema{Objects: &models.Schema{Classes: nil}},
   261  		shardState: singleShardState(),
   262  	}
   263  	db, err := New(logger, Config{
   264  		MemtablesFlushDirtyAfter:  60,
   265  		RootPath:                  rootDir,
   266  		QueryMaximumResults:       10,
   267  		MaxImportGoroutinesFactor: 1,
   268  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil)
   269  	require.Nil(t, err)
   270  	db.SetSchemaGetter(schemaGetter)
   271  	require.Nil(t, db.WaitForStartup(testCtx()))
   272  	migrator := NewMigrator(db, logger)
   273  
   274  	for _, class := range classes {
   275  		require.Nil(t,
   276  			migrator.AddClass(context.Background(), class, schemaGetter.shardState))
   277  	}
   278  
   279  	// update schema getter so it's in sync with class
   280  	schemaGetter.schema = schema.Schema{
   281  		Objects: &models.Schema{
   282  			Classes: classes,
   283  		},
   284  	}
   285  
   286  	return db
   287  }
   288  
   289  func makeTestClass(className string) *models.Class {
   290  	return &models.Class{
   291  		VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   292  		InvertedIndexConfig: invertedConfig(),
   293  		Class:               className,
   294  		Properties: []*models.Property{
   295  			{
   296  				Name:         "stringProp",
   297  				DataType:     schema.DataTypeText.PropString(),
   298  				Tokenization: models.PropertyTokenizationWhitespace,
   299  			},
   300  		},
   301  	}
   302  }