github.com/weaviate/weaviate@v1.24.6/usecases/schema/migrate/fs/file_structure_migration.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package fs 13 14 import ( 15 "fmt" 16 "os" 17 "path" 18 "path/filepath" 19 "strings" 20 21 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 22 entschema "github.com/weaviate/weaviate/entities/schema" 23 "github.com/weaviate/weaviate/usecases/sharding" 24 ) 25 26 const vectorIndexCommitLog = `hnsw.commitlog.d` 27 28 func MigrateToHierarchicalFS(rootPath string, s schemaGetter) error { 29 root, err := os.ReadDir(rootPath) 30 if err != nil { 31 return fmt.Errorf("read source path %q: %w", rootPath, err) 32 } 33 fm := newFileMatcher(s, rootPath) 34 plan, err := assembleFSMigrationPlan(root, rootPath, fm) 35 if err != nil { 36 return err 37 } 38 39 for newRoot, parts := range plan.partsByShard { 40 for _, part := range parts { 41 newPath := path.Join(newRoot, part.newRelPath) 42 absDir, _ := filepath.Split(newPath) 43 if err := os.MkdirAll(absDir, os.ModePerm); err != nil { 44 return fmt.Errorf("mkdir %q: %w", absDir, err) 45 } 46 if err = os.Rename(part.oldAbsPath, newPath); err != nil { 47 return fmt.Errorf("mv %s %s: %w", part.oldAbsPath, newPath, err) 48 } 49 } 50 } 51 52 return nil 53 } 54 55 type migrationPart struct { 56 oldAbsPath string 57 newRelPath string 58 } 59 60 type shardRoot = string 61 62 type migrationPlan struct { 63 rootPath string 64 partsByShard map[shardRoot][]migrationPart 65 } 66 67 func newMigrationPlan(rootPath string) *migrationPlan { 68 return &migrationPlan{rootPath: rootPath, partsByShard: make(map[string][]migrationPart)} 69 } 70 71 func (p *migrationPlan) append(class, shard, oldRootRelPath, newShardRelPath string) { 72 shardRoot := path.Join(p.rootPath, strings.ToLower(class), shard) 73 p.partsByShard[shardRoot] = append(p.partsByShard[shardRoot], migrationPart{ 74 oldAbsPath: path.Join(p.rootPath, oldRootRelPath), 75 newRelPath: newShardRelPath, 76 }) 77 } 78 79 func (p *migrationPlan) prepend(class, shard, oldRootRelPath, newShardRelPath string) { 80 shardRoot := path.Join(p.rootPath, strings.ToLower(class), shard) 81 p.partsByShard[shardRoot] = append([]migrationPart{{ 82 oldAbsPath: path.Join(p.rootPath, oldRootRelPath), 83 newRelPath: newShardRelPath, 84 }}, p.partsByShard[shardRoot]...) 85 } 86 87 func assembleFSMigrationPlan(entries []os.DirEntry, rootPath string, fm *fileMatcher) (*migrationPlan, error) { 88 plan := newMigrationPlan(rootPath) 89 90 for _, entry := range entries { 91 if ok, cs := fm.isShardLsmDir(entry); ok { 92 // make sure lsm dir is moved first, otherwise os.Rename may fail 93 // if directory already exists (created by other files/dirs moved before) 94 plan.prepend(cs.class, cs.shard, 95 entry.Name(), 96 "lsm") 97 } else if ok, cs, suffix := fm.isShardFile(entry); ok { 98 plan.append(cs.class, cs.shard, 99 entry.Name(), 100 suffix) 101 } else if ok, cs := fm.isShardCommitLogDir(entry); ok { 102 plan.append(cs.class, cs.shard, 103 entry.Name(), 104 fmt.Sprintf("main.%s", vectorIndexCommitLog)) 105 } else if ok, csp := fm.isShardGeoCommitLogDir(entry); ok { 106 plan.append(csp.class, csp.shard, 107 entry.Name(), 108 fmt.Sprintf("geo.%s.%s", csp.geoProp, vectorIndexCommitLog)) 109 } else if ok, css := fm.isPqDir(entry); ok { 110 for _, cs := range css { 111 plan.append(cs.class, cs.shard, 112 path.Join(strings.ToLower(entry.Name()), cs.shard, "compressed_objects"), 113 path.Join("lsm", helpers.VectorsCompressedBucketLSM)) 114 } 115 116 // explicitly rename Class directory starting with uppercase to lowercase 117 // as MkdirAll will not create lowercased dir if uppercased one exists 118 oldClassRoot := path.Join(rootPath, entry.Name()) 119 newClassRoot := path.Join(rootPath, strings.ToLower(entry.Name())) 120 if err := os.Rename(oldClassRoot, newClassRoot); err != nil { 121 return nil, fmt.Errorf( 122 "rename pq index dir to avoid collision, old: %q, new: %q, err: %w", 123 oldClassRoot, newClassRoot, err) 124 } 125 } 126 } 127 return plan, nil 128 } 129 130 type classShard struct { 131 class string 132 shard string 133 } 134 135 type classShardGeoProp struct { 136 class string 137 shard string 138 geoProp string 139 } 140 141 type fileMatcher struct { 142 rootPath string 143 shardLsmDirs map[string]*classShard 144 shardFilePrefixes map[string]*classShard 145 shardGeoDirPrefixes map[string]*classShardGeoProp 146 classes map[string][]*classShard 147 } 148 149 type schemaGetter interface { 150 CopyShardingState(class string) *sharding.State 151 GetSchemaSkipAuth() entschema.Schema 152 } 153 154 func newFileMatcher(schemaGetter schemaGetter, rootPath string) *fileMatcher { 155 shardLsmDirs := make(map[string]*classShard) 156 shardFilePrefixes := make(map[string]*classShard) 157 shardGeoDirPrefixes := make(map[string]*classShardGeoProp) 158 classes := make(map[string][]*classShard) 159 160 sch := schemaGetter.GetSchemaSkipAuth() 161 for _, class := range sch.Objects.Classes { 162 shards := schemaGetter.CopyShardingState(class.Class).AllLocalPhysicalShards() 163 lowercasedClass := strings.ToLower(class.Class) 164 165 var geoProps []string 166 for _, prop := range class.Properties { 167 if dt, ok := entschema.AsPrimitive(prop.DataType); ok && dt == entschema.DataTypeGeoCoordinates { 168 geoProps = append(geoProps, prop.Name) 169 } 170 } 171 172 classes[class.Class] = make([]*classShard, 0, len(shards)) 173 for _, shard := range shards { 174 cs := &classShard{class: class.Class, shard: shard} 175 shardLsmDirs[fmt.Sprintf("%s_%s_lsm", lowercasedClass, shard)] = cs 176 shardFilePrefixes[fmt.Sprintf("%s_%s", lowercasedClass, shard)] = cs 177 classes[class.Class] = append(classes[class.Class], cs) 178 179 for _, geoProp := range geoProps { 180 csp := &classShardGeoProp{class: class.Class, shard: shard, geoProp: geoProp} 181 shardGeoDirPrefixes[fmt.Sprintf("%s_%s_%s", lowercasedClass, shard, geoProp)] = csp 182 } 183 } 184 } 185 186 return &fileMatcher{ 187 rootPath: rootPath, 188 shardLsmDirs: shardLsmDirs, 189 shardFilePrefixes: shardFilePrefixes, 190 shardGeoDirPrefixes: shardGeoDirPrefixes, 191 classes: classes, 192 } 193 } 194 195 // Checks if entry is directory with name (class is lowercased): 196 // class_shard_lsm 197 func (fm *fileMatcher) isShardLsmDir(entry os.DirEntry) (bool, *classShard) { 198 if !entry.IsDir() { 199 return false, nil 200 } 201 if cs, ok := fm.shardLsmDirs[entry.Name()]; ok { 202 return true, cs 203 } 204 return false, nil 205 } 206 207 // Checks if entry is file with name (class is lowercased): 208 // class_shard.* 209 // (e.g. class_shard.version, class_shard.indexcount) 210 func (fm *fileMatcher) isShardFile(entry os.DirEntry) (bool, *classShard, string) { 211 if !entry.Type().IsRegular() { 212 return false, nil, "" 213 } 214 parts := strings.SplitN(entry.Name(), ".", 2) 215 if len(parts) != 2 { 216 return false, nil, "" 217 } 218 if cs, ok := fm.shardFilePrefixes[parts[0]]; ok { 219 return true, cs, parts[1] 220 } 221 return false, nil, "" 222 } 223 224 // Checks if entry is directory with name (class is lowercased): 225 // class_shard.hnsw.commitlog.d 226 func (fm *fileMatcher) isShardCommitLogDir(entry os.DirEntry) (bool, *classShard) { 227 if !entry.IsDir() { 228 return false, nil 229 } 230 parts := strings.SplitN(entry.Name(), ".", 2) 231 if len(parts) != 2 { 232 return false, nil 233 } 234 if parts[1] != vectorIndexCommitLog { 235 return false, nil 236 } 237 if cs, ok := fm.shardFilePrefixes[parts[0]]; ok { 238 return true, cs 239 } 240 return false, nil 241 } 242 243 // Checks if entry is directory with name (class is lowercased): 244 // class_shard_prop.hnsw.commitlog.d 245 func (fm *fileMatcher) isShardGeoCommitLogDir(entry os.DirEntry) (bool, *classShardGeoProp) { 246 if !entry.IsDir() { 247 return false, nil 248 } 249 parts := strings.SplitN(entry.Name(), ".", 2) 250 if len(parts) != 2 { 251 return false, nil 252 } 253 if parts[1] != vectorIndexCommitLog { 254 return false, nil 255 } 256 if csp, ok := fm.shardGeoDirPrefixes[parts[0]]; ok { 257 return true, csp 258 } 259 return false, nil 260 } 261 262 // Checks if entry is directory containing PQ index: 263 // Class/shard/compressed_object 264 func (fm *fileMatcher) isPqDir(entry os.DirEntry) (bool, []*classShard) { 265 if !entry.IsDir() { 266 return false, nil 267 } 268 269 resultcss := []*classShard{} 270 if css, ok := fm.classes[entry.Name()]; ok { 271 for _, cs := range css { 272 pqDir := path.Join(fm.rootPath, cs.class, cs.shard, "compressed_objects") 273 if info, err := os.Stat(pqDir); err == nil && info.IsDir() { 274 resultcss = append(resultcss, cs) 275 } 276 } 277 return true, resultcss 278 } 279 return false, nil 280 }