github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/backup_integration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTest 13 // +build integrationTest 14 15 package db 16 17 import ( 18 "context" 19 "fmt" 20 "os" 21 "path" 22 "path/filepath" 23 "regexp" 24 "testing" 25 "time" 26 27 "github.com/sirupsen/logrus/hooks/test" 28 "github.com/stretchr/testify/assert" 29 "github.com/stretchr/testify/require" 30 "github.com/weaviate/weaviate/entities/models" 31 "github.com/weaviate/weaviate/entities/schema" 32 "github.com/weaviate/weaviate/entities/storobj" 33 enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 34 ) 35 36 func TestBackup_DBLevel(t *testing.T) { 37 t.Run("successful backup creation", func(t *testing.T) { 38 ctx := testCtx() 39 dirName := t.TempDir() 40 className := "DBLevelBackupClass" 41 backupID := "backup1" 42 now := time.Now() 43 44 db := setupTestDB(t, dirName, makeTestClass(className)) 45 defer func() { 46 require.Nil(t, db.Shutdown(context.Background())) 47 }() 48 49 t.Run("insert data", func(t *testing.T) { 50 require.Nil(t, db.PutObject(ctx, &models.Object{ 51 Class: className, 52 CreationTimeUnix: now.UnixNano(), 53 ID: "ff9fcae5-57b8-431c-b8e2-986fd78f5809", 54 LastUpdateTimeUnix: now.UnixNano(), 55 Vector: []float32{1, 2, 3}, 56 VectorWeights: nil, 57 }, []float32{1, 2, 3}, nil, nil)) 58 }) 59 60 expectedNodeName := "node1" 61 expectedShardName := db.schemaGetter. 62 CopyShardingState(className). 63 AllPhysicalShards()[0] 64 testShd := db.GetIndex(schema.ClassName(className)). 65 shards.Load(expectedShardName) 66 expectedCounterPath, _ := filepath.Rel(testShd.Index().Config.RootPath, testShd.Counter().FileName()) 67 expectedCounter, err := os.ReadFile(testShd.Counter().FileName()) 68 require.Nil(t, err) 69 expectedPropLengthPath, _ := filepath.Rel(testShd.Index().Config.RootPath, testShd.GetPropertyLengthTracker().FileName()) 70 expectedShardVersionPath, _ := filepath.Rel(testShd.Index().Config.RootPath, testShd.Versioner().path) 71 expectedShardVersion, err := os.ReadFile(testShd.Versioner().path) 72 require.Nil(t, err) 73 expectedPropLength, err := os.ReadFile(testShd.GetPropertyLengthTracker().FileName()) 74 require.Nil(t, err) 75 expectedShardState, err := testShd.Index().getSchema.CopyShardingState(className).JSON() 76 require.Nil(t, err) 77 expectedSchema, err := testShd.Index().getSchema.GetSchemaSkipAuth(). 78 Objects.Classes[0].MarshalBinary() 79 require.Nil(t, err) 80 81 classes := db.ListBackupable() 82 83 t.Run("doesn't fail on casing permutation of existing class", func(t *testing.T) { 84 err := db.Backupable(ctx, []string{"DBLeVELBackupClass"}) 85 require.NotNil(t, err) 86 require.Equal(t, "class DBLeVELBackupClass doesn't exist", err.Error()) 87 }) 88 89 t.Run("create backup", func(t *testing.T) { 90 err := db.Backupable(ctx, classes) 91 assert.Nil(t, err) 92 93 ch := db.BackupDescriptors(ctx, backupID, classes) 94 95 for d := range ch { 96 assert.Equal(t, className, d.Name) 97 assert.Len(t, d.Shards, len(classes)) 98 for _, shd := range d.Shards { 99 assert.Equal(t, expectedShardName, shd.Name) 100 assert.Equal(t, expectedNodeName, shd.Node) 101 assert.NotEmpty(t, shd.Files) 102 for _, f := range shd.Files { 103 assert.NotEmpty(t, f) 104 } 105 assert.Equal(t, expectedCounterPath, shd.DocIDCounterPath) 106 assert.Equal(t, expectedCounter, shd.DocIDCounter) 107 assert.Equal(t, expectedPropLengthPath, shd.PropLengthTrackerPath) 108 assert.Equal(t, expectedPropLength, shd.PropLengthTracker) 109 assert.Equal(t, expectedShardVersionPath, shd.ShardVersionPath) 110 assert.Equal(t, expectedShardVersion, shd.Version) 111 } 112 assert.Equal(t, expectedShardState, d.ShardingState) 113 assert.Equal(t, expectedSchema, d.Schema) 114 } 115 }) 116 117 t.Run("release backup", func(t *testing.T) { 118 for _, class := range classes { 119 err := db.ReleaseBackup(ctx, backupID, class) 120 assert.Nil(t, err) 121 } 122 }) 123 124 t.Run("node names from shards", func(t *testing.T) { 125 res, err := db.Shards(ctx, className) 126 assert.NoError(t, err) 127 assert.Len(t, res, 1) 128 assert.Equal(t, "node1", res[0]) 129 }) 130 131 t.Run("get all classes", func(t *testing.T) { 132 res := db.ListClasses(ctx) 133 assert.Len(t, res, 1) 134 assert.Equal(t, className, res[0]) 135 }) 136 }) 137 138 t.Run("failed backup creation from expired context", func(t *testing.T) { 139 ctx := testCtx() 140 dirName := t.TempDir() 141 className := "DBLevelBackupClass" 142 backupID := "backup1" 143 now := time.Now() 144 145 db := setupTestDB(t, dirName, makeTestClass(className)) 146 defer func() { 147 require.Nil(t, db.Shutdown(context.Background())) 148 }() 149 150 t.Run("insert data", func(t *testing.T) { 151 require.Nil(t, db.PutObject(ctx, &models.Object{ 152 Class: className, 153 CreationTimeUnix: now.UnixNano(), 154 ID: "ff9fcae5-57b8-431c-b8e2-986fd78f5809", 155 LastUpdateTimeUnix: now.UnixNano(), 156 Vector: []float32{1, 2, 3}, 157 VectorWeights: nil, 158 }, []float32{1, 2, 3}, nil, nil)) 159 }) 160 161 t.Run("fail with expired context", func(t *testing.T) { 162 classes := db.ListBackupable() 163 164 err := db.Backupable(ctx, classes) 165 assert.Nil(t, err) 166 167 timeoutCtx, cancel := context.WithTimeout(context.Background(), 0) 168 defer cancel() 169 170 ch := db.BackupDescriptors(timeoutCtx, backupID, classes) 171 for d := range ch { 172 require.NotNil(t, d.Error) 173 assert.Contains(t, d.Error.Error(), "context deadline exceeded") 174 } 175 }) 176 }) 177 } 178 179 func TestBackup_BucketLevel(t *testing.T) { 180 ctx := testCtx() 181 className := "BucketLevelBackup" 182 shard, _ := testShard(t, ctx, className) 183 184 t.Run("insert data", func(t *testing.T) { 185 err := shard.PutObject(ctx, &storobj.Object{ 186 MarshallerVersion: 1, 187 Object: models.Object{ 188 ID: "8c29da7a-600a-43dc-85fb-83ab2b08c294", 189 Class: className, 190 Properties: map[string]interface{}{ 191 "stringField": "somevalue", 192 }, 193 }, 194 }, 195 ) 196 require.Nil(t, err) 197 }) 198 199 t.Run("perform backup sequence", func(t *testing.T) { 200 objBucket := shard.Store().Bucket("objects") 201 require.NotNil(t, objBucket) 202 203 err := shard.Store().PauseCompaction(ctx) 204 require.Nil(t, err) 205 206 err = objBucket.FlushMemtable() 207 require.Nil(t, err) 208 209 files, err := objBucket.ListFiles(ctx, shard.Index().Config.RootPath) 210 require.Nil(t, err) 211 212 t.Run("check ListFiles, results", func(t *testing.T) { 213 assert.Len(t, files, 4) 214 215 // build regex to get very close approximation to the expected 216 // contents of the ListFiles result. the only thing we can't 217 // know for sure is the actual name of the segment group, hence 218 // the `.*` 219 re := path.Clean(fmt.Sprintf("%s\\/.*\\.(wal|db|bloom|cna)", shard.Index().Config.RootPath)) 220 221 // we expect to see only four files inside the bucket at this point: 222 // 1. a *.db file - the segment itself 223 // 2. a *.bloom file - the segments' bloom filter (only since v1.17) 224 // 3. a *.secondary.0.bloom file - the bloom filter for the secondary index at pos 0 (only since v1.17) 225 // 4. a *.cna file - th segment's count net additions (only since v1.17) 226 // 227 // These files are created when the memtable is flushed, and the new 228 // segment is initialized. Both happens as a result of calling 229 // FlushMemtable(). 230 for i := range files { 231 isMatch, err := regexp.MatchString(re, files[i]) 232 assert.Nil(t, err) 233 assert.True(t, isMatch, files[i]) 234 } 235 236 // check that we have one of each: *.db 237 exts := make([]string, 4) 238 for i, file := range files { 239 exts[i] = filepath.Ext(file) 240 } 241 assert.Contains(t, exts, ".db") // the main segment 242 assert.Contains(t, exts, ".cna") // the segment's count net additions 243 assert.Contains(t, exts, ".bloom") // matches both bloom filters (primary+secondary) 244 }) 245 246 err = shard.Store().ResumeCompaction(ctx) 247 require.Nil(t, err) 248 }) 249 250 t.Run("cleanup", func(t *testing.T) { 251 require.Nil(t, shard.Shutdown(ctx)) 252 require.Nil(t, os.RemoveAll(shard.Index().Config.RootPath)) 253 }) 254 } 255 256 func setupTestDB(t *testing.T, rootDir string, classes ...*models.Class) *DB { 257 logger, _ := test.NewNullLogger() 258 259 schemaGetter := &fakeSchemaGetter{ 260 schema: schema.Schema{Objects: &models.Schema{Classes: nil}}, 261 shardState: singleShardState(), 262 } 263 db, err := New(logger, Config{ 264 MemtablesFlushDirtyAfter: 60, 265 RootPath: rootDir, 266 QueryMaximumResults: 10, 267 MaxImportGoroutinesFactor: 1, 268 }, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil) 269 require.Nil(t, err) 270 db.SetSchemaGetter(schemaGetter) 271 require.Nil(t, db.WaitForStartup(testCtx())) 272 migrator := NewMigrator(db, logger) 273 274 for _, class := range classes { 275 require.Nil(t, 276 migrator.AddClass(context.Background(), class, schemaGetter.shardState)) 277 } 278 279 // update schema getter so it's in sync with class 280 schemaGetter.schema = schema.Schema{ 281 Objects: &models.Schema{ 282 Classes: classes, 283 }, 284 } 285 286 return db 287 } 288 289 func makeTestClass(className string) *models.Class { 290 return &models.Class{ 291 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 292 InvertedIndexConfig: invertedConfig(), 293 Class: className, 294 Properties: []*models.Property{ 295 { 296 Name: "stringProp", 297 DataType: schema.DataTypeText.PropString(), 298 Tokenization: models.PropertyTokenizationWhitespace, 299 }, 300 }, 301 } 302 }