github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/backup_integration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTest 13 // +build integrationTest 14 15 package hnsw 16 17 import ( 18 "context" 19 "fmt" 20 "os" 21 "path" 22 "testing" 23 "time" 24 25 "github.com/sirupsen/logrus/hooks/test" 26 "github.com/stretchr/testify/assert" 27 "github.com/stretchr/testify/require" 28 "github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer" 29 "github.com/weaviate/weaviate/entities/cyclemanager" 30 enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 31 ) 32 33 func TestBackup_Integration(t *testing.T) { 34 ctx := context.Background() 35 logger, _ := test.NewNullLogger() 36 37 dirName := t.TempDir() 38 indexID := "backup-integration-test" 39 40 parentCommitLoggerCallbacks := cyclemanager.NewCallbackGroup("parentCommitLogger", logger, 1) 41 parentCommitLoggerCycle := cyclemanager.NewManager( 42 cyclemanager.HnswCommitLoggerCycleTicker(), 43 parentCommitLoggerCallbacks.CycleCallback, logger) 44 parentCommitLoggerCycle.Start() 45 defer parentCommitLoggerCycle.StopAndWait(ctx) 46 commitLoggerCallbacks := cyclemanager.NewCallbackGroup("childCommitLogger", logger, 1) 47 commitLoggerCallbacksCtrl := parentCommitLoggerCallbacks.Register("commitLogger", commitLoggerCallbacks.CycleCallback) 48 49 parentTombstoneCleanupCallbacks := cyclemanager.NewCallbackGroup("parentTombstoneCleanup", logger, 1) 50 parentTombstoneCleanupCycle := cyclemanager.NewManager( 51 cyclemanager.NewFixedTicker(enthnsw.DefaultCleanupIntervalSeconds*time.Second), 52 parentTombstoneCleanupCallbacks.CycleCallback, logger) 53 parentTombstoneCleanupCycle.Start() 54 defer parentTombstoneCleanupCycle.StopAndWait(ctx) 55 tombstoneCleanupCallbacks := cyclemanager.NewCallbackGroup("childTombstoneCleanup", logger, 1) 56 tombstoneCleanupCallbacksCtrl := parentTombstoneCleanupCallbacks.Register("tombstoneCleanup", tombstoneCleanupCallbacks.CycleCallback) 57 58 combinedCtrl := cyclemanager.NewCombinedCallbackCtrl(2, logger, commitLoggerCallbacksCtrl, tombstoneCleanupCallbacksCtrl) 59 60 idx, err := New(Config{ 61 RootPath: dirName, 62 ID: indexID, 63 Logger: logger, 64 DistanceProvider: distancer.NewCosineDistanceProvider(), 65 VectorForIDThunk: testVectorForID, 66 MakeCommitLoggerThunk: func() (CommitLogger, error) { 67 return NewCommitLogger(dirName, indexID, logger, commitLoggerCallbacks) 68 }, 69 }, enthnsw.NewDefaultUserConfig(), tombstoneCleanupCallbacks, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), nil) 70 require.Nil(t, err) 71 idx.PostStartup() 72 73 t.Run("insert vector into index", func(t *testing.T) { 74 for i := 0; i < 10; i++ { 75 inc := float32(i) 76 err := idx.Add(uint64(i), []float32{inc, inc + 1, inc + 2}) 77 require.Nil(t, err) 78 } 79 }) 80 81 // let the index age for a second so that 82 // the commitlogger filenames, which are 83 // based on current timestamp, can differ 84 time.Sleep(time.Second) 85 86 t.Run("pause maintenance", func(t *testing.T) { 87 err = combinedCtrl.Deactivate(ctx) 88 require.Nil(t, err) 89 }) 90 91 t.Run("switch commit logs", func(t *testing.T) { 92 err = idx.SwitchCommitLogs(ctx) 93 require.Nil(t, err) 94 }) 95 96 t.Run("list files", func(t *testing.T) { 97 files, err := idx.ListFiles(ctx, dirName) 98 require.Nil(t, err) 99 100 // by this point there should be two files in the commitlog directory. 101 // one is the active log file, and the other is the previous active 102 // log which was in use prior to `SwitchCommitLogs`. additionally, 103 // maintenance has been paused, so we shouldn't see any .condensed 104 // files either. 105 // 106 // because `ListFiles` is used within the context of backups, 107 // it excludes any currently active log files, which are not part 108 // of the backup. in this case, the only other file is the prev 109 // commitlog, so we should only have 1 result here. 110 assert.Len(t, files, 1) 111 112 t.Run("verify commitlog dir contents", func(t *testing.T) { 113 // checking to ensure that indeed there are only 2 files in the 114 // commit log directory, and that one of them is the one result 115 // from `ListFiles`, and that the other is not a .condensed file 116 ls, err := os.ReadDir(path.Join(dirName, fmt.Sprintf("%s.hnsw.commitlog.d", indexID))) 117 require.Nil(t, err) 118 assert.Len(t, ls, 2) 119 120 var prevLogFound bool 121 for _, info := range ls { 122 if path.Base(files[0]) == info.Name() { 123 prevLogFound = true 124 } 125 126 assert.Empty(t, path.Ext(info.Name())) 127 } 128 assert.True(t, prevLogFound, "previous commitlog not found in commitlog root dir") 129 }) 130 }) 131 132 t.Run("resume maintenance", func(t *testing.T) { 133 err = combinedCtrl.Activate() 134 require.Nil(t, err) 135 }) 136 137 err = idx.Shutdown(ctx) 138 require.Nil(t, err) 139 140 err = combinedCtrl.Unregister(ctx) 141 require.Nil(t, err) 142 }