github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/file_structure_migration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package db 13 14 import ( 15 "fmt" 16 "math/rand" 17 "os" 18 "path" 19 "strings" 20 "testing" 21 22 "github.com/sirupsen/logrus/hooks/test" 23 "github.com/stretchr/testify/assert" 24 "github.com/stretchr/testify/require" 25 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 26 "github.com/weaviate/weaviate/entities/models" 27 "github.com/weaviate/weaviate/entities/schema" 28 "github.com/weaviate/weaviate/usecases/sharding" 29 ) 30 31 const ( 32 numClasses = 100 33 numShards = 10 34 uppercase = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 35 lowercase = "abcdefghijklmnopqrstuvwxyz" 36 digits = "0123456789" 37 chars = uppercase + lowercase + digits 38 localNode = "node1" 39 ) 40 41 var ( 42 rootFiles = []string{ 43 "classifications.db", 44 "modules.db", 45 "schema.db", 46 } 47 indexDirExts = []string{ 48 ".hnsw.commitlog.d", 49 "_someGeoProp.hnsw.commitlog.d", 50 "_lsm", 51 } 52 indexFileExts = []string{ 53 ".indexcount", 54 ".proplengths", 55 ".version", 56 } 57 migratedRootFiles = append(rootFiles, 58 "migration1.22.fs.hierarchy") 59 ) 60 61 func TestFileStructureMigration(t *testing.T) { 62 shardsByClass := make(map[string][]string, numClasses) 63 64 t.Run("generate index and shard names", func(t *testing.T) { 65 for i := 0; i < numClasses; i++ { 66 c := randClassName() 67 shardsByClass[c] = make([]string, numShards) 68 for j := 0; j < numShards; j++ { 69 s := randShardName() 70 shardsByClass[c][j] = s 71 } 72 } 73 }) 74 75 root := t.TempDir() 76 77 t.Run("write test db files", func(t *testing.T) { 78 for _, f := range rootFiles { 79 require.Nil(t, os.WriteFile(path.Join(root, f), nil, os.ModePerm)) 80 } 81 82 for class, shards := range shardsByClass { 83 for _, shard := range shards { 84 idx := path.Join(root, fmt.Sprintf("%s_%s", strings.ToLower(class), shard)) 85 for _, ext := range indexDirExts { 86 require.Nil(t, os.MkdirAll(idx+ext, os.ModePerm)) 87 } 88 for _, ext := range indexFileExts { 89 require.Nil(t, os.WriteFile(idx+ext, nil, os.ModePerm)) 90 } 91 92 pqDir := path.Join(root, class, shard, "compressed_objects") 93 require.Nil(t, os.MkdirAll(pqDir, os.ModePerm)) 94 } 95 } 96 }) 97 98 files, err := os.ReadDir(root) 99 require.Nil(t, err) 100 101 t.Run("assert expected flat contents length", func(t *testing.T) { 102 // Flat structure root contains: 103 // - (3 dirs + 3 files) per shard per index 104 // - dirs: main commilog, geo prop commitlog, lsm store 105 // - files: indexcount, proplengths, version 106 // - 1 dir per index; shards dirs are nested 107 // - pq store 108 // - 3 root db files 109 expectedLen := numClasses*(numShards*(len(indexDirExts)+len(indexFileExts))+1) + len(rootFiles) 110 require.Len(t, files, expectedLen) 111 }) 112 113 t.Run("migrate the db", func(t *testing.T) { 114 classes := make([]*models.Class, numClasses) 115 states := make(map[string]*sharding.State, numClasses) 116 117 i := 0 118 for class, shards := range shardsByClass { 119 classes[i] = &models.Class{ 120 Class: class, 121 Properties: []*models.Property{{ 122 Name: "someGeoProp", 123 DataType: schema.DataTypeGeoCoordinates.PropString(), 124 }}, 125 } 126 states[class] = &sharding.State{ 127 Physical: make(map[string]sharding.Physical), 128 } 129 states[class].SetLocalName(localNode) 130 131 for _, shard := range shards { 132 states[class].Physical[shard] = sharding.Physical{ 133 Name: shard, 134 BelongsToNodes: []string{localNode}, 135 } 136 } 137 138 i++ 139 } 140 141 db := testDB(root, classes, states) 142 require.Nil(t, db.migrateFileStructureIfNecessary()) 143 }) 144 145 files, err = os.ReadDir(root) 146 require.Nil(t, err) 147 148 t.Run("assert expected hierarchical contents length", func(t *testing.T) { 149 // After migration, the hierarchical structure root contains: 150 // - one dir per index 151 // - 3 original root db files, and one additional which is the FS migration indicator 152 expectedLen := numClasses + len(migratedRootFiles) 153 require.Len(t, files, expectedLen) 154 }) 155 156 t.Run("assert all db files were migrated", func(t *testing.T) { 157 var foundRootFiles []string 158 for _, f := range files { 159 if f.IsDir() { 160 idx := f 161 shardsRoot, err := os.ReadDir(path.Join(root, idx.Name())) 162 require.Nil(t, err) 163 for _, shard := range shardsRoot { 164 assertShardRootContents(t, shardsByClass, root, idx, shard) 165 } 166 } else { 167 foundRootFiles = append(foundRootFiles, f.Name()) 168 } 169 } 170 171 assert.ElementsMatch(t, migratedRootFiles, foundRootFiles) 172 }) 173 } 174 175 func assertShardRootContents(t *testing.T, shardsByClass map[string][]string, root string, idx, shard os.DirEntry) { 176 assert.True(t, shard.IsDir()) 177 178 // Whatever we find in this shard directory, it should be able to 179 // be mapped back to the original flat structure root contents 180 lowercasedClasses := make(map[string]string, len(shardsByClass)) 181 for class := range shardsByClass { 182 lowercasedClasses[strings.ToLower(class)] = class 183 } 184 require.Contains(t, lowercasedClasses, idx.Name()) 185 assert.Contains(t, shardsByClass[lowercasedClasses[idx.Name()]], shard.Name()) 186 187 // Now we will get a set of all expected files within the shard dir. 188 // Check to see if all of these files are found. 189 expected := expectedShardContents() 190 shardFiles, err := os.ReadDir(path.Join(root, idx.Name(), shard.Name())) 191 require.Nil(t, err) 192 for _, sf := range shardFiles { 193 expected[sf.Name()] = true 194 } 195 expected.assert(t) 196 197 // Check if pq store was migrated to main store as "vectors_compressed" subdir 198 pqDir := path.Join(root, idx.Name(), shard.Name(), "lsm", helpers.VectorsCompressedBucketLSM) 199 info, err := os.Stat(pqDir) 200 require.NoError(t, err) 201 assert.True(t, info.IsDir()) 202 } 203 204 func testDB(root string, classes []*models.Class, states map[string]*sharding.State) *DB { 205 logger, _ := test.NewNullLogger() 206 return &DB{ 207 config: Config{RootPath: root}, 208 logger: logger, 209 schemaGetter: &fakeMigrationSchemaGetter{ 210 sch: schema.Schema{Objects: &models.Schema{Classes: classes}}, 211 states: states, 212 }, 213 } 214 } 215 216 func randClassName() string { 217 return randStringBytes(16) 218 } 219 220 func randShardName() string { 221 return randStringBytes(8) 222 } 223 224 func randStringBytes(n int) string { 225 b := make([]byte, n) 226 for i := range b { 227 switch { 228 case i == 0: 229 b[i] = randChar(uppercase) 230 case i == n/2: 231 b[i] = []byte("_")[0] 232 default: 233 b[i] = randChar(chars) 234 } 235 } 236 return string(b) 237 } 238 239 func randChar(str string) byte { 240 return str[rand.Intn(len(str))] 241 } 242 243 type shardContents map[string]bool 244 245 func expectedShardContents() shardContents { 246 return shardContents{ 247 "main.hnsw.commitlog.d": false, 248 "geo.someGeoProp.hnsw.commitlog.d": false, 249 "lsm": false, 250 "indexcount": false, 251 "proplengths": false, 252 "version": false, 253 } 254 } 255 256 func (c shardContents) assert(t *testing.T) { 257 for name, found := range c { 258 assert.True(t, found, "didn't find %q in shard contents", name) 259 } 260 } 261 262 type fakeMigrationSchemaGetter struct { 263 sch schema.Schema 264 states map[string]*sharding.State 265 } 266 267 func (sg *fakeMigrationSchemaGetter) GetSchemaSkipAuth() schema.Schema { 268 return sg.sch 269 } 270 271 func (sg *fakeMigrationSchemaGetter) Nodes() []string { 272 return nil 273 } 274 275 func (sg *fakeMigrationSchemaGetter) NodeName() string { 276 return "" 277 } 278 279 func (sg *fakeMigrationSchemaGetter) ClusterHealthScore() int { 280 return 0 281 } 282 283 func (sg *fakeMigrationSchemaGetter) ResolveParentNodes(string, string) (map[string]string, error) { 284 return nil, nil 285 } 286 287 func (sg *fakeMigrationSchemaGetter) CopyShardingState(class string) *sharding.State { 288 return sg.states[class] 289 } 290 291 func (sg *fakeMigrationSchemaGetter) ShardOwner(class, shard string) (string, error) { 292 return "", nil 293 } 294 295 func (sg *fakeMigrationSchemaGetter) TenantShard(class, tenant string) (string, string) { 296 return "", "" 297 } 298 299 func (sg *fakeMigrationSchemaGetter) ShardFromUUID(class string, uuid []byte) string { 300 return "" 301 } 302 303 func (sg *fakeMigrationSchemaGetter) ShardReplicas(class, shard string) ([]string, error) { 304 return nil, nil 305 }